summaryrefslogtreecommitdiffstats
path: root/contrib/llvm/lib/Target/ARM
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm/lib/Target/ARM')
-rw-r--r--contrib/llvm/lib/Target/ARM/A15SDOptimizer.cpp720
-rw-r--r--contrib/llvm/lib/Target/ARM/ARM.h50
-rw-r--r--contrib/llvm/lib/Target/ARM/ARM.td699
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp1906
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMAsmPrinter.h136
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp4631
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h509
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp810
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h191
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMCallingConv.h288
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMCallingConv.td246
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp2281
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.cpp261
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.h256
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp1413
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMFastISel.cpp3070
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMFeatures.h97
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp2168
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMFrameLowering.h85
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMHazardRecognizer.cpp102
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMHazardRecognizer.h49
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp3954
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp12326
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMISelLowering.h677
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMInstrFormats.td2372
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMInstrInfo.cpp134
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMInstrInfo.h48
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMInstrInfo.td5756
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMInstrNEON.td8178
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMInstrThumb.td1518
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMInstrThumb2.td4765
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMInstrVFP.td1894
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp2338
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMMCInstLower.cpp163
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.cpp23
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h226
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMOptimizeBarriersPass.cpp99
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMPerfectShuffle.h6591
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMRegisterInfo.cpp19
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMRegisterInfo.h31
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMRegisterInfo.td430
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMSchedule.td354
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMScheduleA8.td1075
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMScheduleA9.td2529
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMScheduleSwift.td1046
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMScheduleV6.td300
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp271
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMSelectionDAGInfo.h73
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMSubtarget.cpp366
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMSubtarget.h488
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMTargetMachine.cpp431
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMTargetMachine.h130
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMTargetObjectFile.cpp61
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMTargetObjectFile.h43
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp495
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.h127
-rw-r--r--contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp10034
-rw-r--r--contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp5223
-rw-r--r--contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp1647
-rw-r--r--contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.h229
-rwxr-xr-xcontrib/llvm/lib/Target/ARM/LICENSE.TXT47
-rw-r--r--contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h713
-rw-r--r--contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp1089
-rw-r--r--contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h79
-rw-r--r--contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h38
-rw-r--r--contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackendELF.h28
-rw-r--r--contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackendWinCOFF.h26
-rw-r--r--contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h464
-rw-r--r--contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp259
-rw-r--r--contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp1377
-rw-r--r--contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h110
-rw-r--r--contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp115
-rw-r--r--contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.h56
-rw-r--r--contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp1695
-rw-r--r--contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp41
-rw-r--r--contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h79
-rw-r--r--contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp344
-rw-r--r--contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h123
-rw-r--r--contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMachORelocationInfo.cpp43
-rw-r--r--contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp483
-rw-r--r--contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp74
-rw-r--r--contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.cpp196
-rw-r--r--contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.h93
-rw-r--r--contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp91
-rw-r--r--contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp47
-rw-r--r--contrib/llvm/lib/Target/ARM/MLxExpansionPass.cpp400
-rw-r--r--contrib/llvm/lib/Target/ARM/TargetInfo/ARMTargetInfo.cpp28
-rw-r--r--contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp659
-rw-r--r--contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.h88
-rw-r--r--contrib/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp128
-rw-r--r--contrib/llvm/lib/Target/ARM/Thumb1InstrInfo.h63
-rw-r--r--contrib/llvm/lib/Target/ARM/Thumb2ITBlockPass.cpp299
-rw-r--r--contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp636
-rw-r--r--contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.h78
-rw-r--r--contrib/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp1054
-rw-r--r--contrib/llvm/lib/Target/ARM/ThumbRegisterInfo.cpp623
-rw-r--r--contrib/llvm/lib/Target/ARM/ThumbRegisterInfo.h65
97 files changed, 108263 insertions, 0 deletions
diff --git a/contrib/llvm/lib/Target/ARM/A15SDOptimizer.cpp b/contrib/llvm/lib/Target/ARM/A15SDOptimizer.cpp
new file mode 100644
index 0000000..7a1865c
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/A15SDOptimizer.cpp
@@ -0,0 +1,720 @@
+//=== A15SDOptimizerPass.cpp - Optimize DPR and SPR register accesses on A15==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// The Cortex-A15 processor employs a tracking scheme in its register renaming
+// in order to process each instruction's micro-ops speculatively and
+// out-of-order with appropriate forwarding. The ARM architecture allows VFP
+// instructions to read and write 32-bit S-registers. Each S-register
+// corresponds to one half (upper or lower) of an overlaid 64-bit D-register.
+//
+// There are several instruction patterns which can be used to provide this
+// capability which can provide higher performance than other, potentially more
+// direct patterns, specifically around when one micro-op reads a D-register
+// operand that has recently been written as one or more S-register results.
+//
+// This file defines a pre-regalloc pass which looks for SPR producers which
+// are going to be used by a DPR (or QPR) consumers and creates the more
+// optimized access pattern.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARM.h"
+#include "ARMBaseInstrInfo.h"
+#include "ARMBaseRegisterInfo.h"
+#include "ARMSubtarget.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include <map>
+#include <set>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "a15-sd-optimizer"
+
+namespace {
+ struct A15SDOptimizer : public MachineFunctionPass {
+ static char ID;
+ A15SDOptimizer() : MachineFunctionPass(ID) {}
+
+ bool runOnMachineFunction(MachineFunction &Fn) override;
+
+ const char *getPassName() const override {
+ return "ARM A15 S->D optimizer";
+ }
+
+ private:
+ const ARMBaseInstrInfo *TII;
+ const TargetRegisterInfo *TRI;
+ MachineRegisterInfo *MRI;
+
+ bool runOnInstruction(MachineInstr *MI);
+
+ //
+ // Instruction builder helpers
+ //
+ unsigned createDupLane(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator InsertBefore,
+ DebugLoc DL,
+ unsigned Reg, unsigned Lane,
+ bool QPR=false);
+
+ unsigned createExtractSubreg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator InsertBefore,
+ DebugLoc DL,
+ unsigned DReg, unsigned Lane,
+ const TargetRegisterClass *TRC);
+
+ unsigned createVExt(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator InsertBefore,
+ DebugLoc DL,
+ unsigned Ssub0, unsigned Ssub1);
+
+ unsigned createRegSequence(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator InsertBefore,
+ DebugLoc DL,
+ unsigned Reg1, unsigned Reg2);
+
+ unsigned createInsertSubreg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator InsertBefore,
+ DebugLoc DL, unsigned DReg, unsigned Lane,
+ unsigned ToInsert);
+
+ unsigned createImplicitDef(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator InsertBefore,
+ DebugLoc DL);
+
+ //
+ // Various property checkers
+ //
+ bool usesRegClass(MachineOperand &MO, const TargetRegisterClass *TRC);
+ bool hasPartialWrite(MachineInstr *MI);
+ SmallVector<unsigned, 8> getReadDPRs(MachineInstr *MI);
+ unsigned getDPRLaneFromSPR(unsigned SReg);
+
+ //
+ // Methods used for getting the definitions of partial registers
+ //
+
+ MachineInstr *elideCopies(MachineInstr *MI);
+ void elideCopiesAndPHIs(MachineInstr *MI,
+ SmallVectorImpl<MachineInstr*> &Outs);
+
+ //
+ // Pattern optimization methods
+ //
+ unsigned optimizeAllLanesPattern(MachineInstr *MI, unsigned Reg);
+ unsigned optimizeSDPattern(MachineInstr *MI);
+ unsigned getPrefSPRLane(unsigned SReg);
+
+ //
+ // Sanitizing method - used to make sure if don't leave dead code around.
+ //
+ void eraseInstrWithNoUses(MachineInstr *MI);
+
+ //
+ // A map used to track the changes done by this pass.
+ //
+ std::map<MachineInstr*, unsigned> Replacements;
+ std::set<MachineInstr *> DeadInstr;
+ };
+ char A15SDOptimizer::ID = 0;
+} // end anonymous namespace
+
+// Returns true if this is a use of a SPR register.
+bool A15SDOptimizer::usesRegClass(MachineOperand &MO,
+ const TargetRegisterClass *TRC) {
+ if (!MO.isReg())
+ return false;
+ unsigned Reg = MO.getReg();
+
+ if (TargetRegisterInfo::isVirtualRegister(Reg))
+ return MRI->getRegClass(Reg)->hasSuperClassEq(TRC);
+ else
+ return TRC->contains(Reg);
+}
+
+unsigned A15SDOptimizer::getDPRLaneFromSPR(unsigned SReg) {
+ unsigned DReg = TRI->getMatchingSuperReg(SReg, ARM::ssub_1,
+ &ARM::DPRRegClass);
+ if (DReg != ARM::NoRegister) return ARM::ssub_1;
+ return ARM::ssub_0;
+}
+
+// Get the subreg type that is most likely to be coalesced
+// for an SPR register that will be used in VDUP32d pseudo.
+unsigned A15SDOptimizer::getPrefSPRLane(unsigned SReg) {
+ if (!TRI->isVirtualRegister(SReg))
+ return getDPRLaneFromSPR(SReg);
+
+ MachineInstr *MI = MRI->getVRegDef(SReg);
+ if (!MI) return ARM::ssub_0;
+ MachineOperand *MO = MI->findRegisterDefOperand(SReg);
+
+ assert(MO->isReg() && "Non-register operand found!");
+ if (!MO) return ARM::ssub_0;
+
+ if (MI->isCopy() && usesRegClass(MI->getOperand(1),
+ &ARM::SPRRegClass)) {
+ SReg = MI->getOperand(1).getReg();
+ }
+
+ if (TargetRegisterInfo::isVirtualRegister(SReg)) {
+ if (MO->getSubReg() == ARM::ssub_1) return ARM::ssub_1;
+ return ARM::ssub_0;
+ }
+ return getDPRLaneFromSPR(SReg);
+}
+
+// MI is known to be dead. Figure out what instructions
+// are also made dead by this and mark them for removal.
+void A15SDOptimizer::eraseInstrWithNoUses(MachineInstr *MI) {
+ SmallVector<MachineInstr *, 8> Front;
+ DeadInstr.insert(MI);
+
+ DEBUG(dbgs() << "Deleting base instruction " << *MI << "\n");
+ Front.push_back(MI);
+
+ while (Front.size() != 0) {
+ MI = Front.back();
+ Front.pop_back();
+
+ // MI is already known to be dead. We need to see
+ // if other instructions can also be removed.
+ for (unsigned int i = 0; i < MI->getNumOperands(); ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if ((!MO.isReg()) || (!MO.isUse()))
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!TRI->isVirtualRegister(Reg))
+ continue;
+ MachineOperand *Op = MI->findRegisterDefOperand(Reg);
+
+ if (!Op)
+ continue;
+
+ MachineInstr *Def = Op->getParent();
+
+ // We don't need to do anything if we have already marked
+ // this instruction as being dead.
+ if (DeadInstr.find(Def) != DeadInstr.end())
+ continue;
+
+ // Check if all the uses of this instruction are marked as
+ // dead. If so, we can also mark this instruction as being
+ // dead.
+ bool IsDead = true;
+ for (unsigned int j = 0; j < Def->getNumOperands(); ++j) {
+ MachineOperand &MODef = Def->getOperand(j);
+ if ((!MODef.isReg()) || (!MODef.isDef()))
+ continue;
+ unsigned DefReg = MODef.getReg();
+ if (!TRI->isVirtualRegister(DefReg)) {
+ IsDead = false;
+ break;
+ }
+ for (MachineRegisterInfo::use_instr_iterator
+ II = MRI->use_instr_begin(Reg), EE = MRI->use_instr_end();
+ II != EE; ++II) {
+ // We don't care about self references.
+ if (&*II == Def)
+ continue;
+ if (DeadInstr.find(&*II) == DeadInstr.end()) {
+ IsDead = false;
+ break;
+ }
+ }
+ }
+
+ if (!IsDead) continue;
+
+ DEBUG(dbgs() << "Deleting instruction " << *Def << "\n");
+ DeadInstr.insert(Def);
+ }
+ }
+}
+
+// Creates the more optimized patterns and generally does all the code
+// transformations in this pass.
+unsigned A15SDOptimizer::optimizeSDPattern(MachineInstr *MI) {
+ if (MI->isCopy()) {
+ return optimizeAllLanesPattern(MI, MI->getOperand(1).getReg());
+ }
+
+ if (MI->isInsertSubreg()) {
+ unsigned DPRReg = MI->getOperand(1).getReg();
+ unsigned SPRReg = MI->getOperand(2).getReg();
+
+ if (TRI->isVirtualRegister(DPRReg) && TRI->isVirtualRegister(SPRReg)) {
+ MachineInstr *DPRMI = MRI->getVRegDef(MI->getOperand(1).getReg());
+ MachineInstr *SPRMI = MRI->getVRegDef(MI->getOperand(2).getReg());
+
+ if (DPRMI && SPRMI) {
+ // See if the first operand of this insert_subreg is IMPLICIT_DEF
+ MachineInstr *ECDef = elideCopies(DPRMI);
+ if (ECDef && ECDef->isImplicitDef()) {
+ // Another corner case - if we're inserting something that is purely
+ // a subreg copy of a DPR, just use that DPR.
+
+ MachineInstr *EC = elideCopies(SPRMI);
+ // Is it a subreg copy of ssub_0?
+ if (EC && EC->isCopy() &&
+ EC->getOperand(1).getSubReg() == ARM::ssub_0) {
+ DEBUG(dbgs() << "Found a subreg copy: " << *SPRMI);
+
+ // Find the thing we're subreg copying out of - is it of the same
+ // regclass as DPRMI? (i.e. a DPR or QPR).
+ unsigned FullReg = SPRMI->getOperand(1).getReg();
+ const TargetRegisterClass *TRC =
+ MRI->getRegClass(MI->getOperand(1).getReg());
+ if (TRC->hasSuperClassEq(MRI->getRegClass(FullReg))) {
+ DEBUG(dbgs() << "Subreg copy is compatible - returning ");
+ DEBUG(dbgs() << PrintReg(FullReg) << "\n");
+ eraseInstrWithNoUses(MI);
+ return FullReg;
+ }
+ }
+
+ return optimizeAllLanesPattern(MI, MI->getOperand(2).getReg());
+ }
+ }
+ }
+ return optimizeAllLanesPattern(MI, MI->getOperand(0).getReg());
+ }
+
+ if (MI->isRegSequence() && usesRegClass(MI->getOperand(1),
+ &ARM::SPRRegClass)) {
+ // See if all bar one of the operands are IMPLICIT_DEF and insert the
+ // optimizer pattern accordingly.
+ unsigned NumImplicit = 0, NumTotal = 0;
+ unsigned NonImplicitReg = ~0U;
+
+ for (unsigned I = 1; I < MI->getNumExplicitOperands(); ++I) {
+ if (!MI->getOperand(I).isReg())
+ continue;
+ ++NumTotal;
+ unsigned OpReg = MI->getOperand(I).getReg();
+
+ if (!TRI->isVirtualRegister(OpReg))
+ break;
+
+ MachineInstr *Def = MRI->getVRegDef(OpReg);
+ if (!Def)
+ break;
+ if (Def->isImplicitDef())
+ ++NumImplicit;
+ else
+ NonImplicitReg = MI->getOperand(I).getReg();
+ }
+
+ if (NumImplicit == NumTotal - 1)
+ return optimizeAllLanesPattern(MI, NonImplicitReg);
+ else
+ return optimizeAllLanesPattern(MI, MI->getOperand(0).getReg());
+ }
+
+ llvm_unreachable("Unhandled update pattern!");
+}
+
+// Return true if this MachineInstr inserts a scalar (SPR) value into
+// a D or Q register.
+bool A15SDOptimizer::hasPartialWrite(MachineInstr *MI) {
+ // The only way we can do a partial register update is through a COPY,
+ // INSERT_SUBREG or REG_SEQUENCE.
+ if (MI->isCopy() && usesRegClass(MI->getOperand(1), &ARM::SPRRegClass))
+ return true;
+
+ if (MI->isInsertSubreg() && usesRegClass(MI->getOperand(2),
+ &ARM::SPRRegClass))
+ return true;
+
+ if (MI->isRegSequence() && usesRegClass(MI->getOperand(1), &ARM::SPRRegClass))
+ return true;
+
+ return false;
+}
+
+// Looks through full copies to get the instruction that defines the input
+// operand for MI.
+MachineInstr *A15SDOptimizer::elideCopies(MachineInstr *MI) {
+ if (!MI->isFullCopy())
+ return MI;
+ if (!TRI->isVirtualRegister(MI->getOperand(1).getReg()))
+ return nullptr;
+ MachineInstr *Def = MRI->getVRegDef(MI->getOperand(1).getReg());
+ if (!Def)
+ return nullptr;
+ return elideCopies(Def);
+}
+
+// Look through full copies and PHIs to get the set of non-copy MachineInstrs
+// that can produce MI.
+void A15SDOptimizer::elideCopiesAndPHIs(MachineInstr *MI,
+ SmallVectorImpl<MachineInstr*> &Outs) {
+ // Looking through PHIs may create loops so we need to track what
+ // instructions we have visited before.
+ std::set<MachineInstr *> Reached;
+ SmallVector<MachineInstr *, 8> Front;
+ Front.push_back(MI);
+ while (Front.size() != 0) {
+ MI = Front.back();
+ Front.pop_back();
+
+ // If we have already explored this MachineInstr, ignore it.
+ if (Reached.find(MI) != Reached.end())
+ continue;
+ Reached.insert(MI);
+ if (MI->isPHI()) {
+ for (unsigned I = 1, E = MI->getNumOperands(); I != E; I += 2) {
+ unsigned Reg = MI->getOperand(I).getReg();
+ if (!TRI->isVirtualRegister(Reg)) {
+ continue;
+ }
+ MachineInstr *NewMI = MRI->getVRegDef(Reg);
+ if (!NewMI)
+ continue;
+ Front.push_back(NewMI);
+ }
+ } else if (MI->isFullCopy()) {
+ if (!TRI->isVirtualRegister(MI->getOperand(1).getReg()))
+ continue;
+ MachineInstr *NewMI = MRI->getVRegDef(MI->getOperand(1).getReg());
+ if (!NewMI)
+ continue;
+ Front.push_back(NewMI);
+ } else {
+ DEBUG(dbgs() << "Found partial copy" << *MI <<"\n");
+ Outs.push_back(MI);
+ }
+ }
+}
+
+// Return the DPR virtual registers that are read by this machine instruction
+// (if any).
+SmallVector<unsigned, 8> A15SDOptimizer::getReadDPRs(MachineInstr *MI) {
+ if (MI->isCopyLike() || MI->isInsertSubreg() || MI->isRegSequence() ||
+ MI->isKill())
+ return SmallVector<unsigned, 8>();
+
+ SmallVector<unsigned, 8> Defs;
+ for (unsigned i = 0; i < MI->getNumOperands(); ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+
+ if (!MO.isReg() || !MO.isUse())
+ continue;
+ if (!usesRegClass(MO, &ARM::DPRRegClass) &&
+ !usesRegClass(MO, &ARM::QPRRegClass) &&
+ !usesRegClass(MO, &ARM::DPairRegClass)) // Treat DPair as QPR
+ continue;
+
+ Defs.push_back(MO.getReg());
+ }
+ return Defs;
+}
+
+// Creates a DPR register from an SPR one by using a VDUP.
+unsigned
+A15SDOptimizer::createDupLane(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator InsertBefore,
+ DebugLoc DL,
+ unsigned Reg, unsigned Lane, bool QPR) {
+ unsigned Out = MRI->createVirtualRegister(QPR ? &ARM::QPRRegClass :
+ &ARM::DPRRegClass);
+ AddDefaultPred(BuildMI(MBB,
+ InsertBefore,
+ DL,
+ TII->get(QPR ? ARM::VDUPLN32q : ARM::VDUPLN32d),
+ Out)
+ .addReg(Reg)
+ .addImm(Lane));
+
+ return Out;
+}
+
+// Creates a SPR register from a DPR by copying the value in lane 0.
+unsigned
+A15SDOptimizer::createExtractSubreg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator InsertBefore,
+ DebugLoc DL,
+ unsigned DReg, unsigned Lane,
+ const TargetRegisterClass *TRC) {
+ unsigned Out = MRI->createVirtualRegister(TRC);
+ BuildMI(MBB,
+ InsertBefore,
+ DL,
+ TII->get(TargetOpcode::COPY), Out)
+ .addReg(DReg, 0, Lane);
+
+ return Out;
+}
+
+// Takes two SPR registers and creates a DPR by using a REG_SEQUENCE.
+unsigned
+A15SDOptimizer::createRegSequence(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator InsertBefore,
+ DebugLoc DL,
+ unsigned Reg1, unsigned Reg2) {
+ unsigned Out = MRI->createVirtualRegister(&ARM::QPRRegClass);
+ BuildMI(MBB,
+ InsertBefore,
+ DL,
+ TII->get(TargetOpcode::REG_SEQUENCE), Out)
+ .addReg(Reg1)
+ .addImm(ARM::dsub_0)
+ .addReg(Reg2)
+ .addImm(ARM::dsub_1);
+ return Out;
+}
+
+// Takes two DPR registers that have previously been VDUPed (Ssub0 and Ssub1)
+// and merges them into one DPR register.
+unsigned
+A15SDOptimizer::createVExt(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator InsertBefore,
+ DebugLoc DL,
+ unsigned Ssub0, unsigned Ssub1) {
+ unsigned Out = MRI->createVirtualRegister(&ARM::DPRRegClass);
+ AddDefaultPred(BuildMI(MBB,
+ InsertBefore,
+ DL,
+ TII->get(ARM::VEXTd32), Out)
+ .addReg(Ssub0)
+ .addReg(Ssub1)
+ .addImm(1));
+ return Out;
+}
+
+unsigned
+A15SDOptimizer::createInsertSubreg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator InsertBefore,
+ DebugLoc DL, unsigned DReg, unsigned Lane,
+ unsigned ToInsert) {
+ unsigned Out = MRI->createVirtualRegister(&ARM::DPR_VFP2RegClass);
+ BuildMI(MBB,
+ InsertBefore,
+ DL,
+ TII->get(TargetOpcode::INSERT_SUBREG), Out)
+ .addReg(DReg)
+ .addReg(ToInsert)
+ .addImm(Lane);
+
+ return Out;
+}
+
+unsigned
+A15SDOptimizer::createImplicitDef(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator InsertBefore,
+ DebugLoc DL) {
+ unsigned Out = MRI->createVirtualRegister(&ARM::DPRRegClass);
+ BuildMI(MBB,
+ InsertBefore,
+ DL,
+ TII->get(TargetOpcode::IMPLICIT_DEF), Out);
+ return Out;
+}
+
+// This function inserts instructions in order to optimize interactions between
+// SPR registers and DPR/QPR registers. It does so by performing VDUPs on all
+// lanes, and the using VEXT instructions to recompose the result.
+unsigned
+A15SDOptimizer::optimizeAllLanesPattern(MachineInstr *MI, unsigned Reg) {
+ MachineBasicBlock::iterator InsertPt(MI);
+ DebugLoc DL = MI->getDebugLoc();
+ MachineBasicBlock &MBB = *MI->getParent();
+ InsertPt++;
+ unsigned Out;
+
+ // DPair has the same length as QPR and also has two DPRs as subreg.
+ // Treat DPair as QPR.
+ if (MRI->getRegClass(Reg)->hasSuperClassEq(&ARM::QPRRegClass) ||
+ MRI->getRegClass(Reg)->hasSuperClassEq(&ARM::DPairRegClass)) {
+ unsigned DSub0 = createExtractSubreg(MBB, InsertPt, DL, Reg,
+ ARM::dsub_0, &ARM::DPRRegClass);
+ unsigned DSub1 = createExtractSubreg(MBB, InsertPt, DL, Reg,
+ ARM::dsub_1, &ARM::DPRRegClass);
+
+ unsigned Out1 = createDupLane(MBB, InsertPt, DL, DSub0, 0);
+ unsigned Out2 = createDupLane(MBB, InsertPt, DL, DSub0, 1);
+ Out = createVExt(MBB, InsertPt, DL, Out1, Out2);
+
+ unsigned Out3 = createDupLane(MBB, InsertPt, DL, DSub1, 0);
+ unsigned Out4 = createDupLane(MBB, InsertPt, DL, DSub1, 1);
+ Out2 = createVExt(MBB, InsertPt, DL, Out3, Out4);
+
+ Out = createRegSequence(MBB, InsertPt, DL, Out, Out2);
+
+ } else if (MRI->getRegClass(Reg)->hasSuperClassEq(&ARM::DPRRegClass)) {
+ unsigned Out1 = createDupLane(MBB, InsertPt, DL, Reg, 0);
+ unsigned Out2 = createDupLane(MBB, InsertPt, DL, Reg, 1);
+ Out = createVExt(MBB, InsertPt, DL, Out1, Out2);
+
+ } else {
+ assert(MRI->getRegClass(Reg)->hasSuperClassEq(&ARM::SPRRegClass) &&
+ "Found unexpected regclass!");
+
+ unsigned PrefLane = getPrefSPRLane(Reg);
+ unsigned Lane;
+ switch (PrefLane) {
+ case ARM::ssub_0: Lane = 0; break;
+ case ARM::ssub_1: Lane = 1; break;
+ default: llvm_unreachable("Unknown preferred lane!");
+ }
+
+ // Treat DPair as QPR
+ bool UsesQPR = usesRegClass(MI->getOperand(0), &ARM::QPRRegClass) ||
+ usesRegClass(MI->getOperand(0), &ARM::DPairRegClass);
+
+ Out = createImplicitDef(MBB, InsertPt, DL);
+ Out = createInsertSubreg(MBB, InsertPt, DL, Out, PrefLane, Reg);
+ Out = createDupLane(MBB, InsertPt, DL, Out, Lane, UsesQPR);
+ eraseInstrWithNoUses(MI);
+ }
+ return Out;
+}
+
+bool A15SDOptimizer::runOnInstruction(MachineInstr *MI) {
+ // We look for instructions that write S registers that are then read as
+ // D/Q registers. These can only be caused by COPY, INSERT_SUBREG and
+ // REG_SEQUENCE pseudos that insert an SPR value into a DPR register or
+ // merge two SPR values to form a DPR register. In order avoid false
+ // positives we make sure that there is an SPR producer so we look past
+ // COPY and PHI nodes to find it.
+ //
+ // The best code pattern for when an SPR producer is going to be used by a
+ // DPR or QPR consumer depends on whether the other lanes of the
+ // corresponding DPR/QPR are currently defined.
+ //
+ // We can handle these efficiently, depending on the type of
+ // pseudo-instruction that is producing the pattern
+ //
+ // * COPY: * VDUP all lanes and merge the results together
+ // using VEXTs.
+ //
+ // * INSERT_SUBREG: * If the SPR value was originally in another DPR/QPR
+ // lane, and the other lane(s) of the DPR/QPR register
+ // that we are inserting in are undefined, use the
+ // original DPR/QPR value.
+ // * Otherwise, fall back on the same stategy as COPY.
+ //
+ // * REG_SEQUENCE: * If all except one of the input operands are
+ // IMPLICIT_DEFs, insert the VDUP pattern for just the
+ // defined input operand
+ // * Otherwise, fall back on the same stategy as COPY.
+ //
+
+ // First, get all the reads of D-registers done by this instruction.
+ SmallVector<unsigned, 8> Defs = getReadDPRs(MI);
+ bool Modified = false;
+
+ for (SmallVectorImpl<unsigned>::iterator I = Defs.begin(), E = Defs.end();
+ I != E; ++I) {
+ // Follow the def-use chain for this DPR through COPYs, and also through
+ // PHIs (which are essentially multi-way COPYs). It is because of PHIs that
+ // we can end up with multiple defs of this DPR.
+
+ SmallVector<MachineInstr *, 8> DefSrcs;
+ if (!TRI->isVirtualRegister(*I))
+ continue;
+ MachineInstr *Def = MRI->getVRegDef(*I);
+ if (!Def)
+ continue;
+
+ elideCopiesAndPHIs(Def, DefSrcs);
+
+ for (SmallVectorImpl<MachineInstr *>::iterator II = DefSrcs.begin(),
+ EE = DefSrcs.end(); II != EE; ++II) {
+ MachineInstr *MI = *II;
+
+ // If we've already analyzed and replaced this operand, don't do
+ // anything.
+ if (Replacements.find(MI) != Replacements.end())
+ continue;
+
+ // Now, work out if the instruction causes a SPR->DPR dependency.
+ if (!hasPartialWrite(MI))
+ continue;
+
+ // Collect all the uses of this MI's DPR def for updating later.
+ SmallVector<MachineOperand*, 8> Uses;
+ unsigned DPRDefReg = MI->getOperand(0).getReg();
+ for (MachineRegisterInfo::use_iterator I = MRI->use_begin(DPRDefReg),
+ E = MRI->use_end(); I != E; ++I)
+ Uses.push_back(&*I);
+
+ // We can optimize this.
+ unsigned NewReg = optimizeSDPattern(MI);
+
+ if (NewReg != 0) {
+ Modified = true;
+ for (SmallVectorImpl<MachineOperand *>::const_iterator I = Uses.begin(),
+ E = Uses.end(); I != E; ++I) {
+ // Make sure to constrain the register class of the new register to
+ // match what we're replacing. Otherwise we can optimize a DPR_VFP2
+ // reference into a plain DPR, and that will end poorly. NewReg is
+ // always virtual here, so there will always be a matching subclass
+ // to find.
+ MRI->constrainRegClass(NewReg, MRI->getRegClass((*I)->getReg()));
+
+ DEBUG(dbgs() << "Replacing operand "
+ << **I << " with "
+ << PrintReg(NewReg) << "\n");
+ (*I)->substVirtReg(NewReg, 0, *TRI);
+ }
+ }
+ Replacements[MI] = NewReg;
+ }
+ }
+ return Modified;
+}
+
+bool A15SDOptimizer::runOnMachineFunction(MachineFunction &Fn) {
+ const ARMSubtarget &STI = Fn.getSubtarget<ARMSubtarget>();
+ // Since the A15SDOptimizer pass can insert VDUP instructions, it can only be
+ // enabled when NEON is available.
+ if (!(STI.isCortexA15() && STI.hasNEON()))
+ return false;
+ TII = STI.getInstrInfo();
+ TRI = STI.getRegisterInfo();
+ MRI = &Fn.getRegInfo();
+ bool Modified = false;
+
+ DEBUG(dbgs() << "Running on function " << Fn.getName()<< "\n");
+
+ DeadInstr.clear();
+ Replacements.clear();
+
+ for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
+ ++MFI) {
+
+ for (MachineBasicBlock::iterator MI = MFI->begin(), ME = MFI->end();
+ MI != ME;) {
+ Modified |= runOnInstruction(MI++);
+ }
+
+ }
+
+ for (std::set<MachineInstr *>::iterator I = DeadInstr.begin(),
+ E = DeadInstr.end();
+ I != E; ++I) {
+ (*I)->eraseFromParent();
+ }
+
+ return Modified;
+}
+
+FunctionPass *llvm::createA15SDOptimizerPass() {
+ return new A15SDOptimizer();
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARM.h b/contrib/llvm/lib/Target/ARM/ARM.h
new file mode 100644
index 0000000..cd7540e
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARM.h
@@ -0,0 +1,50 @@
+//===-- ARM.h - Top-level interface for ARM representation ------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the entry points for global functions defined in the LLVM
+// ARM back-end.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_ARM_ARM_H
+#define LLVM_LIB_TARGET_ARM_ARM_H
+
+#include "llvm/Support/CodeGen.h"
+#include <functional>
+
+namespace llvm {
+
+class ARMAsmPrinter;
+class ARMBaseTargetMachine;
+class Function;
+class FunctionPass;
+class ImmutablePass;
+class MachineInstr;
+class MCInst;
+class TargetLowering;
+class TargetMachine;
+
+FunctionPass *createARMISelDag(ARMBaseTargetMachine &TM,
+ CodeGenOpt::Level OptLevel);
+FunctionPass *createA15SDOptimizerPass();
+FunctionPass *createARMLoadStoreOptimizationPass(bool PreAlloc = false);
+FunctionPass *createARMExpandPseudoPass();
+FunctionPass *createARMConstantIslandPass();
+FunctionPass *createMLxExpansionPass();
+FunctionPass *createThumb2ITBlockPass();
+FunctionPass *createARMOptimizeBarriersPass();
+FunctionPass *createThumb2SizeReductionPass(
+ std::function<bool(const Function &)> Ftor = nullptr);
+
+void LowerARMMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
+ ARMAsmPrinter &AP);
+
+} // end namespace llvm;
+
+#endif
diff --git a/contrib/llvm/lib/Target/ARM/ARM.td b/contrib/llvm/lib/Target/ARM/ARM.td
new file mode 100644
index 0000000..c171656
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARM.td
@@ -0,0 +1,699 @@
+//===-- ARM.td - Describe the ARM Target Machine -----------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Target-independent interfaces which we are implementing
+//===----------------------------------------------------------------------===//
+
+include "llvm/Target/Target.td"
+
+//===----------------------------------------------------------------------===//
+// ARM Helper classes.
+//
+
+class ProcNoItin<string Name, list<SubtargetFeature> Features>
+ : Processor<Name, NoItineraries, Features>;
+
+class Architecture<string fname, string aname, list<SubtargetFeature> features >
+ : SubtargetFeature<fname, "ARMArch", aname,
+ !strconcat(aname, " architecture"), features>;
+
+//===----------------------------------------------------------------------===//
+// ARM Subtarget state.
+//
+
+def ModeThumb : SubtargetFeature<"thumb-mode", "InThumbMode", "true",
+ "Thumb mode">;
+
+def ModeSoftFloat : SubtargetFeature<"soft-float", "UseSoftFloat", "true",
+ "Use software floating point features.">;
+
+//===----------------------------------------------------------------------===//
+// ARM Subtarget features.
+//
+
+def FeatureVFP2 : SubtargetFeature<"vfp2", "HasVFPv2", "true",
+ "Enable VFP2 instructions">;
+def FeatureVFP3 : SubtargetFeature<"vfp3", "HasVFPv3", "true",
+ "Enable VFP3 instructions",
+ [FeatureVFP2]>;
+def FeatureNEON : SubtargetFeature<"neon", "HasNEON", "true",
+ "Enable NEON instructions",
+ [FeatureVFP3]>;
+def FeatureThumb2 : SubtargetFeature<"thumb2", "HasThumb2", "true",
+ "Enable Thumb2 instructions">;
+def FeatureNoARM : SubtargetFeature<"noarm", "NoARM", "true",
+ "Does not support ARM mode execution",
+ [ModeThumb]>;
+def FeatureFP16 : SubtargetFeature<"fp16", "HasFP16", "true",
+ "Enable half-precision floating point">;
+def FeatureVFP4 : SubtargetFeature<"vfp4", "HasVFPv4", "true",
+ "Enable VFP4 instructions",
+ [FeatureVFP3, FeatureFP16]>;
+def FeatureFPARMv8 : SubtargetFeature<"fp-armv8", "HasFPARMv8",
+ "true", "Enable ARMv8 FP",
+ [FeatureVFP4]>;
+def FeatureFullFP16 : SubtargetFeature<"fullfp16", "HasFullFP16", "true",
+ "Enable full half-precision floating point",
+ [FeatureFPARMv8]>;
+def FeatureD16 : SubtargetFeature<"d16", "HasD16", "true",
+ "Restrict FP to 16 double registers">;
+def FeatureHWDiv : SubtargetFeature<"hwdiv", "HasHardwareDivide", "true",
+ "Enable divide instructions">;
+def FeatureHWDivARM : SubtargetFeature<"hwdiv-arm",
+ "HasHardwareDivideInARM", "true",
+ "Enable divide instructions in ARM mode">;
+def FeatureT2XtPk : SubtargetFeature<"t2xtpk", "HasT2ExtractPack", "true",
+ "Enable Thumb2 extract and pack instructions">;
+def FeatureDB : SubtargetFeature<"db", "HasDataBarrier", "true",
+ "Has data barrier (dmb / dsb) instructions">;
+def FeatureSlowFPBrcc : SubtargetFeature<"slow-fp-brcc", "SlowFPBrcc", "true",
+ "FP compare + branch is slow">;
+def FeatureVFPOnlySP : SubtargetFeature<"fp-only-sp", "FPOnlySP", "true",
+ "Floating point unit supports single precision only">;
+def FeaturePerfMon : SubtargetFeature<"perfmon", "HasPerfMon", "true",
+ "Enable support for Performance Monitor extensions">;
+def FeatureTrustZone : SubtargetFeature<"trustzone", "HasTrustZone", "true",
+ "Enable support for TrustZone security extensions">;
+def FeatureCrypto : SubtargetFeature<"crypto", "HasCrypto", "true",
+ "Enable support for Cryptography extensions",
+ [FeatureNEON]>;
+def FeatureCRC : SubtargetFeature<"crc", "HasCRC", "true",
+ "Enable support for CRC instructions">;
+
+// Cyclone has preferred instructions for zeroing VFP registers, which can
+// execute in 0 cycles.
+def FeatureZCZeroing : SubtargetFeature<"zcz", "HasZeroCycleZeroing", "true",
+ "Has zero-cycle zeroing instructions">;
+
+// Some processors have FP multiply-accumulate instructions that don't
+// play nicely with other VFP / NEON instructions, and it's generally better
+// to just not use them.
+def FeatureHasSlowFPVMLx : SubtargetFeature<"slowfpvmlx", "SlowFPVMLx", "true",
+ "Disable VFP / NEON MAC instructions">;
+
+// Cortex-A8 / A9 Advanced SIMD has multiplier accumulator forwarding.
+def FeatureVMLxForwarding : SubtargetFeature<"vmlx-forwarding",
+ "HasVMLxForwarding", "true",
+ "Has multiplier accumulator forwarding">;
+
+// Some processors benefit from using NEON instructions for scalar
+// single-precision FP operations.
+def FeatureNEONForFP : SubtargetFeature<"neonfp", "UseNEONForSinglePrecisionFP",
+ "true",
+ "Use NEON for single precision FP">;
+
+// Disable 32-bit to 16-bit narrowing for experimentation.
+def FeaturePref32BitThumb : SubtargetFeature<"32bit", "Pref32BitThumb", "true",
+ "Prefer 32-bit Thumb instrs">;
+
+/// Some instructions update CPSR partially, which can add false dependency for
+/// out-of-order implementation, e.g. Cortex-A9, unless each individual bit is
+/// mapped to a separate physical register. Avoid partial CPSR update for these
+/// processors.
+def FeatureAvoidPartialCPSR : SubtargetFeature<"avoid-partial-cpsr",
+ "AvoidCPSRPartialUpdate", "true",
+ "Avoid CPSR partial update for OOO execution">;
+
+def FeatureAvoidMOVsShOp : SubtargetFeature<"avoid-movs-shop",
+ "AvoidMOVsShifterOperand", "true",
+ "Avoid movs instructions with shifter operand">;
+
+// Some processors perform return stack prediction. CodeGen should avoid issue
+// "normal" call instructions to callees which do not return.
+def FeatureHasRAS : SubtargetFeature<"ras", "HasRAS", "true",
+ "Has return address stack">;
+
+/// DSP extension.
+def FeatureDSP : SubtargetFeature<"dsp", "HasDSP", "true",
+ "Supports DSP instructions in ARM and/or Thumb2">;
+
+// Multiprocessing extension.
+def FeatureMP : SubtargetFeature<"mp", "HasMPExtension", "true",
+ "Supports Multiprocessing extension">;
+
+// Virtualization extension - requires HW divide (ARMv7-AR ARMARM - 4.4.8).
+def FeatureVirtualization : SubtargetFeature<"virtualization",
+ "HasVirtualization", "true",
+ "Supports Virtualization extension",
+ [FeatureHWDiv, FeatureHWDivARM]>;
+
+// M-series ISA
+def FeatureMClass : SubtargetFeature<"mclass", "ARMProcClass", "MClass",
+ "Is microcontroller profile ('M' series)">;
+
+// R-series ISA
+def FeatureRClass : SubtargetFeature<"rclass", "ARMProcClass", "RClass",
+ "Is realtime profile ('R' series)">;
+
+// A-series ISA
+def FeatureAClass : SubtargetFeature<"aclass", "ARMProcClass", "AClass",
+ "Is application profile ('A' series)">;
+
+// Special TRAP encoding for NaCl, which looks like a TRAP in Thumb too.
+// See ARMInstrInfo.td for details.
+def FeatureNaClTrap : SubtargetFeature<"nacl-trap", "UseNaClTrap", "true",
+ "NaCl trap">;
+
+def FeatureStrictAlign : SubtargetFeature<"strict-align",
+ "StrictAlign", "true",
+ "Disallow all unaligned memory "
+ "access">;
+
+def FeatureLongCalls : SubtargetFeature<"long-calls", "GenLongCalls", "true",
+ "Generate calls via indirect call "
+ "instructions">;
+
+def FeatureReserveR9 : SubtargetFeature<"reserve-r9", "ReserveR9", "true",
+ "Reserve R9, making it unavailable as "
+ "GPR">;
+
+def FeatureNoMovt : SubtargetFeature<"no-movt", "NoMovt", "true",
+ "Don't use movt/movw pairs for 32-bit "
+ "imms">;
+
+
+//===----------------------------------------------------------------------===//
+// ARM ISAa.
+//
+
+def HasV4TOps : SubtargetFeature<"v4t", "HasV4TOps", "true",
+ "Support ARM v4T instructions">;
+def HasV5TOps : SubtargetFeature<"v5t", "HasV5TOps", "true",
+ "Support ARM v5T instructions",
+ [HasV4TOps]>;
+def HasV5TEOps : SubtargetFeature<"v5te", "HasV5TEOps", "true",
+ "Support ARM v5TE, v5TEj, and v5TExp instructions",
+ [HasV5TOps]>;
+def HasV6Ops : SubtargetFeature<"v6", "HasV6Ops", "true",
+ "Support ARM v6 instructions",
+ [HasV5TEOps]>;
+def HasV6MOps : SubtargetFeature<"v6m", "HasV6MOps", "true",
+ "Support ARM v6M instructions",
+ [HasV6Ops]>;
+def HasV6KOps : SubtargetFeature<"v6k", "HasV6KOps", "true",
+ "Support ARM v6k instructions",
+ [HasV6Ops]>;
+def HasV6T2Ops : SubtargetFeature<"v6t2", "HasV6T2Ops", "true",
+ "Support ARM v6t2 instructions",
+ [HasV6MOps, HasV6KOps, FeatureThumb2]>;
+def HasV7Ops : SubtargetFeature<"v7", "HasV7Ops", "true",
+ "Support ARM v7 instructions",
+ [HasV6T2Ops, FeaturePerfMon]>;
+def HasV8Ops : SubtargetFeature<"v8", "HasV8Ops", "true",
+ "Support ARM v8 instructions",
+ [HasV7Ops]>;
+def HasV8_1aOps : SubtargetFeature<"v8.1a", "HasV8_1aOps", "true",
+ "Support ARM v8.1a instructions",
+ [HasV8Ops]>;
+def HasV8_2aOps : SubtargetFeature<"v8.2a", "HasV8_2aOps", "true",
+ "Support ARM v8.2a instructions",
+ [HasV8_1aOps]>;
+
+
+//===----------------------------------------------------------------------===//
+// ARM Processor subtarget features.
+//
+
+def ProcA5 : SubtargetFeature<"a5", "ARMProcFamily", "CortexA5",
+ "Cortex-A5 ARM processors", []>;
+def ProcA7 : SubtargetFeature<"a7", "ARMProcFamily", "CortexA7",
+ "Cortex-A7 ARM processors", []>;
+def ProcA8 : SubtargetFeature<"a8", "ARMProcFamily", "CortexA8",
+ "Cortex-A8 ARM processors", []>;
+def ProcA9 : SubtargetFeature<"a9", "ARMProcFamily", "CortexA9",
+ "Cortex-A9 ARM processors", []>;
+def ProcA12 : SubtargetFeature<"a12", "ARMProcFamily", "CortexA12",
+ "Cortex-A12 ARM processors", []>;
+def ProcA15 : SubtargetFeature<"a15", "ARMProcFamily", "CortexA15",
+ "Cortex-A15 ARM processors", []>;
+def ProcA17 : SubtargetFeature<"a17", "ARMProcFamily", "CortexA17",
+ "Cortex-A17 ARM processors", []>;
+def ProcA35 : SubtargetFeature<"a35", "ARMProcFamily", "CortexA35",
+ "Cortex-A35 ARM processors", []>;
+def ProcA53 : SubtargetFeature<"a53", "ARMProcFamily", "CortexA53",
+ "Cortex-A53 ARM processors", []>;
+def ProcA57 : SubtargetFeature<"a57", "ARMProcFamily", "CortexA57",
+ "Cortex-A57 ARM processors", []>;
+def ProcA72 : SubtargetFeature<"a72", "ARMProcFamily", "CortexA72",
+ "Cortex-A72 ARM processors", []>;
+
+def ProcKrait : SubtargetFeature<"krait", "ARMProcFamily", "Krait",
+ "Qualcomm ARM processors", []>;
+def ProcSwift : SubtargetFeature<"swift", "ARMProcFamily", "Swift",
+ "Swift ARM processors", []>;
+
+def ProcExynosM1 : SubtargetFeature<"exynosm1", "ARMProcFamily", "ExynosM1",
+ "Samsung Exynos-M1 processors", []>;
+
+def ProcR4 : SubtargetFeature<"r4", "ARMProcFamily", "CortexR4",
+ "Cortex-R4 ARM processors", []>;
+def ProcR5 : SubtargetFeature<"r5", "ARMProcFamily", "CortexR5",
+ "Cortex-R5 ARM processors", []>;
+def ProcR7 : SubtargetFeature<"r7", "ARMProcFamily", "CortexR7",
+ "Cortex-R7 ARM processors", []>;
+
+
+//===----------------------------------------------------------------------===//
+// ARM schedules.
+//
+
+include "ARMSchedule.td"
+
+
+//===----------------------------------------------------------------------===//
+// ARM architectures
+//
+
+def ARMv2 : Architecture<"armv2", "ARMv2", []>;
+
+def ARMv2a : Architecture<"armv2a", "ARMv2a", []>;
+
+def ARMv3 : Architecture<"armv3", "ARMv3", []>;
+
+def ARMv3m : Architecture<"armv3m", "ARMv3m", []>;
+
+def ARMv4 : Architecture<"armv4", "ARMv4", []>;
+
+def ARMv4t : Architecture<"armv4t", "ARMv4t", [HasV4TOps]>;
+
+def ARMv5t : Architecture<"armv5t", "ARMv5t", [HasV5TOps]>;
+
+def ARMv5te : Architecture<"armv5te", "ARMv5te", [HasV5TEOps]>;
+
+def ARMv5tej : Architecture<"armv5tej", "ARMv5tej", [HasV5TEOps]>;
+
+def ARMv6 : Architecture<"armv6", "ARMv6", [HasV6Ops]>;
+
+def ARMv6t2 : Architecture<"armv6t2", "ARMv6t2", [HasV6T2Ops,
+ FeatureDSP]>;
+
+def ARMv6k : Architecture<"armv6k", "ARMv6k", [HasV6KOps]>;
+
+def ARMv6kz : Architecture<"armv6kz", "ARMv6kz", [HasV6KOps,
+ FeatureTrustZone]>;
+
+def ARMv6m : Architecture<"armv6-m", "ARMv6m", [HasV6MOps,
+ FeatureNoARM,
+ FeatureDB,
+ FeatureMClass]>;
+
+def ARMv6sm : Architecture<"armv6s-m", "ARMv6sm", [HasV6MOps,
+ FeatureNoARM,
+ FeatureDB,
+ FeatureMClass]>;
+
+def ARMv7a : Architecture<"armv7-a", "ARMv7a", [HasV7Ops,
+ FeatureNEON,
+ FeatureDB,
+ FeatureDSP,
+ FeatureAClass]>;
+
+def ARMv7r : Architecture<"armv7-r", "ARMv7r", [HasV7Ops,
+ FeatureDB,
+ FeatureDSP,
+ FeatureHWDiv,
+ FeatureRClass]>;
+
+def ARMv7m : Architecture<"armv7-m", "ARMv7m", [HasV7Ops,
+ FeatureThumb2,
+ FeatureNoARM,
+ FeatureDB,
+ FeatureHWDiv,
+ FeatureMClass]>;
+
+def ARMv7em : Architecture<"armv7e-m", "ARMv7em", [HasV7Ops,
+ FeatureThumb2,
+ FeatureNoARM,
+ FeatureDB,
+ FeatureHWDiv,
+ FeatureMClass,
+ FeatureDSP,
+ FeatureT2XtPk]>;
+
+def ARMv8a : Architecture<"armv8-a", "ARMv8a", [HasV8Ops,
+ FeatureAClass,
+ FeatureDB,
+ FeatureFPARMv8,
+ FeatureNEON,
+ FeatureDSP,
+ FeatureTrustZone,
+ FeatureMP,
+ FeatureVirtualization,
+ FeatureCrypto,
+ FeatureCRC]>;
+
+def ARMv81a : Architecture<"armv8.1-a", "ARMv81a", [HasV8_1aOps,
+ FeatureAClass,
+ FeatureDB,
+ FeatureFPARMv8,
+ FeatureNEON,
+ FeatureDSP,
+ FeatureTrustZone,
+ FeatureMP,
+ FeatureVirtualization,
+ FeatureCrypto,
+ FeatureCRC]>;
+
+def ARMv82a : Architecture<"armv8.2-a", "ARMv82a", [HasV8_2aOps,
+ FeatureAClass,
+ FeatureDB,
+ FeatureFPARMv8,
+ FeatureNEON,
+ FeatureDSP,
+ FeatureTrustZone,
+ FeatureMP,
+ FeatureVirtualization,
+ FeatureCrypto,
+ FeatureCRC]>;
+
+// Aliases
+def IWMMXT : Architecture<"iwmmxt", "ARMv5te", [ARMv5te]>;
+def IWMMXT2 : Architecture<"iwmmxt2", "ARMv5te", [ARMv5te]>;
+def XScale : Architecture<"xscale", "ARMv5te", [ARMv5te]>;
+def ARMv6j : Architecture<"armv6j", "ARMv7a", [ARMv6]>;
+def ARMv7k : Architecture<"armv7k", "ARMv7a", [ARMv7a]>;
+def ARMv7s : Architecture<"armv7s", "ARMv7a", [ARMv7a]>;
+
+
+//===----------------------------------------------------------------------===//
+// ARM processors
+//
+
+// Dummy CPU, used to target architectures
+def : ProcNoItin<"generic", []>;
+
+def : ProcNoItin<"arm8", [ARMv4]>;
+def : ProcNoItin<"arm810", [ARMv4]>;
+def : ProcNoItin<"strongarm", [ARMv4]>;
+def : ProcNoItin<"strongarm110", [ARMv4]>;
+def : ProcNoItin<"strongarm1100", [ARMv4]>;
+def : ProcNoItin<"strongarm1110", [ARMv4]>;
+
+def : ProcNoItin<"arm7tdmi", [ARMv4t]>;
+def : ProcNoItin<"arm7tdmi-s", [ARMv4t]>;
+def : ProcNoItin<"arm710t", [ARMv4t]>;
+def : ProcNoItin<"arm720t", [ARMv4t]>;
+def : ProcNoItin<"arm9", [ARMv4t]>;
+def : ProcNoItin<"arm9tdmi", [ARMv4t]>;
+def : ProcNoItin<"arm920", [ARMv4t]>;
+def : ProcNoItin<"arm920t", [ARMv4t]>;
+def : ProcNoItin<"arm922t", [ARMv4t]>;
+def : ProcNoItin<"arm940t", [ARMv4t]>;
+def : ProcNoItin<"ep9312", [ARMv4t]>;
+
+def : ProcNoItin<"arm10tdmi", [ARMv5t]>;
+def : ProcNoItin<"arm1020t", [ARMv5t]>;
+
+def : ProcNoItin<"arm9e", [ARMv5te]>;
+def : ProcNoItin<"arm926ej-s", [ARMv5te]>;
+def : ProcNoItin<"arm946e-s", [ARMv5te]>;
+def : ProcNoItin<"arm966e-s", [ARMv5te]>;
+def : ProcNoItin<"arm968e-s", [ARMv5te]>;
+def : ProcNoItin<"arm10e", [ARMv5te]>;
+def : ProcNoItin<"arm1020e", [ARMv5te]>;
+def : ProcNoItin<"arm1022e", [ARMv5te]>;
+def : ProcNoItin<"xscale", [ARMv5te]>;
+def : ProcNoItin<"iwmmxt", [ARMv5te]>;
+
+def : Processor<"arm1136j-s", ARMV6Itineraries, [ARMv6]>;
+def : Processor<"arm1136jf-s", ARMV6Itineraries, [ARMv6,
+ FeatureVFP2,
+ FeatureHasSlowFPVMLx]>;
+
+def : Processor<"cortex-m0", ARMV6Itineraries, [ARMv6m]>;
+def : Processor<"cortex-m0plus", ARMV6Itineraries, [ARMv6m]>;
+def : Processor<"cortex-m1", ARMV6Itineraries, [ARMv6m]>;
+def : Processor<"sc000", ARMV6Itineraries, [ARMv6m]>;
+
+def : Processor<"arm1176jz-s", ARMV6Itineraries, [ARMv6kz]>;
+def : Processor<"arm1176jzf-s", ARMV6Itineraries, [ARMv6kz,
+ FeatureVFP2,
+ FeatureHasSlowFPVMLx]>;
+
+def : Processor<"mpcorenovfp", ARMV6Itineraries, [ARMv6k]>;
+def : Processor<"mpcore", ARMV6Itineraries, [ARMv6k,
+ FeatureVFP2,
+ FeatureHasSlowFPVMLx]>;
+
+def : Processor<"arm1156t2-s", ARMV6Itineraries, [ARMv6t2]>;
+def : Processor<"arm1156t2f-s", ARMV6Itineraries, [ARMv6t2,
+ FeatureVFP2,
+ FeatureHasSlowFPVMLx]>;
+
+// FIXME: A5 has currently the same Schedule model as A8
+def : ProcessorModel<"cortex-a5", CortexA8Model, [ARMv7a, ProcA5,
+ FeatureHasRAS,
+ FeatureTrustZone,
+ FeatureSlowFPBrcc,
+ FeatureHasSlowFPVMLx,
+ FeatureVMLxForwarding,
+ FeatureT2XtPk,
+ FeatureMP,
+ FeatureVFP4]>;
+
+def : ProcessorModel<"cortex-a7", CortexA8Model, [ARMv7a, ProcA7,
+ FeatureHasRAS,
+ FeatureTrustZone,
+ FeatureSlowFPBrcc,
+ FeatureHasSlowFPVMLx,
+ FeatureVMLxForwarding,
+ FeatureT2XtPk,
+ FeatureMP,
+ FeatureVFP4,
+ FeatureHWDiv,
+ FeatureHWDivARM,
+ FeatureVirtualization]>;
+
+def : ProcessorModel<"cortex-a8", CortexA8Model, [ARMv7a, ProcA8,
+ FeatureHasRAS,
+ FeatureTrustZone,
+ FeatureSlowFPBrcc,
+ FeatureHasSlowFPVMLx,
+ FeatureVMLxForwarding,
+ FeatureT2XtPk]>;
+
+def : ProcessorModel<"cortex-a9", CortexA9Model, [ARMv7a, ProcA9,
+ FeatureHasRAS,
+ FeatureTrustZone,
+ FeatureVMLxForwarding,
+ FeatureT2XtPk,
+ FeatureFP16,
+ FeatureAvoidPartialCPSR,
+ FeatureMP]>;
+
+// FIXME: A12 has currently the same Schedule model as A9
+def : ProcessorModel<"cortex-a12", CortexA9Model, [ARMv7a, ProcA12,
+ FeatureHasRAS,
+ FeatureTrustZone,
+ FeatureVMLxForwarding,
+ FeatureT2XtPk,
+ FeatureVFP4,
+ FeatureHWDiv,
+ FeatureHWDivARM,
+ FeatureAvoidPartialCPSR,
+ FeatureVirtualization,
+ FeatureMP]>;
+
+// FIXME: A15 has currently the same Schedule model as A9.
+def : ProcessorModel<"cortex-a15", CortexA9Model, [ARMv7a, ProcA15,
+ FeatureHasRAS,
+ FeatureTrustZone,
+ FeatureT2XtPk,
+ FeatureVFP4,
+ FeatureMP,
+ FeatureHWDiv,
+ FeatureHWDivARM,
+ FeatureAvoidPartialCPSR,
+ FeatureVirtualization]>;
+
+// FIXME: A17 has currently the same Schedule model as A9
+def : ProcessorModel<"cortex-a17", CortexA9Model, [ARMv7a, ProcA17,
+ FeatureHasRAS,
+ FeatureTrustZone,
+ FeatureMP,
+ FeatureVMLxForwarding,
+ FeatureT2XtPk,
+ FeatureVFP4,
+ FeatureHWDiv,
+ FeatureHWDivARM,
+ FeatureAvoidPartialCPSR,
+ FeatureVirtualization]>;
+
+// FIXME: krait has currently the same Schedule model as A9
+// FIXME: krait has currently the same features as A9 plus VFP4 and hardware
+// division features.
+def : ProcessorModel<"krait", CortexA9Model, [ARMv7a, ProcKrait,
+ FeatureHasRAS,
+ FeatureVMLxForwarding,
+ FeatureT2XtPk,
+ FeatureFP16,
+ FeatureAvoidPartialCPSR,
+ FeatureVFP4,
+ FeatureHWDiv,
+ FeatureHWDivARM]>;
+
+def : ProcessorModel<"swift", SwiftModel, [ARMv7a, ProcSwift,
+ FeatureHasRAS,
+ FeatureNEONForFP,
+ FeatureT2XtPk,
+ FeatureVFP4,
+ FeatureMP,
+ FeatureHWDiv,
+ FeatureHWDivARM,
+ FeatureAvoidPartialCPSR,
+ FeatureAvoidMOVsShOp,
+ FeatureHasSlowFPVMLx]>;
+
+// FIXME: R4 has currently the same ProcessorModel as A8.
+def : ProcessorModel<"cortex-r4", CortexA8Model, [ARMv7r, ProcR4,
+ FeatureHasRAS,
+ FeatureAvoidPartialCPSR,
+ FeatureT2XtPk]>;
+
+// FIXME: R4F has currently the same ProcessorModel as A8.
+def : ProcessorModel<"cortex-r4f", CortexA8Model, [ARMv7r, ProcR4,
+ FeatureHasRAS,
+ FeatureSlowFPBrcc,
+ FeatureHasSlowFPVMLx,
+ FeatureVFP3,
+ FeatureD16,
+ FeatureAvoidPartialCPSR,
+ FeatureT2XtPk]>;
+
+// FIXME: R5 has currently the same ProcessorModel as A8.
+def : ProcessorModel<"cortex-r5", CortexA8Model, [ARMv7r, ProcR5,
+ FeatureHasRAS,
+ FeatureVFP3,
+ FeatureD16,
+ FeatureSlowFPBrcc,
+ FeatureHWDivARM,
+ FeatureHasSlowFPVMLx,
+ FeatureAvoidPartialCPSR,
+ FeatureT2XtPk]>;
+
+// FIXME: R7 has currently the same ProcessorModel as A8 and is modelled as R5.
+def : ProcessorModel<"cortex-r7", CortexA8Model, [ARMv7r, ProcR7,
+ FeatureHasRAS,
+ FeatureVFP3,
+ FeatureVFPOnlySP,
+ FeatureD16,
+ FeatureFP16,
+ FeatureMP,
+ FeatureSlowFPBrcc,
+ FeatureHWDivARM,
+ FeatureHasSlowFPVMLx,
+ FeatureAvoidPartialCPSR,
+ FeatureT2XtPk]>;
+
+def : ProcNoItin<"cortex-m3", [ARMv7m]>;
+def : ProcNoItin<"sc300", [ARMv7m]>;
+
+def : ProcNoItin<"cortex-m4", [ARMv7em,
+ FeatureVFP4,
+ FeatureVFPOnlySP,
+ FeatureD16]>;
+
+def : ProcNoItin<"cortex-m7", [ARMv7em,
+ FeatureFPARMv8,
+ FeatureD16]>;
+
+
+def : ProcNoItin<"cortex-a35", [ARMv8a, ProcA35,
+ FeatureHWDiv,
+ FeatureHWDivARM,
+ FeatureT2XtPk,
+ FeatureCrypto,
+ FeatureCRC]>;
+
+def : ProcNoItin<"cortex-a53", [ARMv8a, ProcA53,
+ FeatureHWDiv,
+ FeatureHWDivARM,
+ FeatureT2XtPk,
+ FeatureCrypto,
+ FeatureCRC]>;
+
+def : ProcNoItin<"cortex-a57", [ARMv8a, ProcA57,
+ FeatureHWDiv,
+ FeatureHWDivARM,
+ FeatureT2XtPk,
+ FeatureCrypto,
+ FeatureCRC]>;
+
+def : ProcNoItin<"cortex-a72", [ARMv8a, ProcA72,
+ FeatureHWDiv,
+ FeatureHWDivARM,
+ FeatureT2XtPk,
+ FeatureCrypto,
+ FeatureCRC]>;
+
+// Cyclone is very similar to swift
+def : ProcessorModel<"cyclone", SwiftModel, [ARMv8a, ProcSwift,
+ FeatureHasRAS,
+ FeatureNEONForFP,
+ FeatureT2XtPk,
+ FeatureVFP4,
+ FeatureMP,
+ FeatureHWDiv,
+ FeatureHWDivARM,
+ FeatureAvoidPartialCPSR,
+ FeatureAvoidMOVsShOp,
+ FeatureHasSlowFPVMLx,
+ FeatureCrypto,
+ FeatureZCZeroing]>;
+
+def : ProcNoItin<"exynos-m1", [ARMv8a, ProcExynosM1,
+ FeatureHWDiv,
+ FeatureHWDivARM,
+ FeatureT2XtPk,
+ FeatureCrypto,
+ FeatureCRC]>;
+
+//===----------------------------------------------------------------------===//
+// Register File Description
+//===----------------------------------------------------------------------===//
+
+include "ARMRegisterInfo.td"
+
+include "ARMCallingConv.td"
+
+//===----------------------------------------------------------------------===//
+// Instruction Descriptions
+//===----------------------------------------------------------------------===//
+
+include "ARMInstrInfo.td"
+
+def ARMInstrInfo : InstrInfo;
+
+//===----------------------------------------------------------------------===//
+// Declare the target which we are implementing
+//===----------------------------------------------------------------------===//
+
+def ARMAsmWriter : AsmWriter {
+ string AsmWriterClassName = "InstPrinter";
+ int PassSubtarget = 1;
+ int Variant = 0;
+ bit isMCAsmWriter = 1;
+}
+
+def ARMAsmParserVariant : AsmParserVariant {
+ int Variant = 0;
+ string Name = "ARM";
+ string BreakCharacters = ".";
+}
+
+def ARM : Target {
+ // Pull in Instruction Info:
+ let InstructionSet = ARMInstrInfo;
+ let AssemblyWriters = [ARMAsmWriter];
+ let AssemblyParserVariants = [ARMAsmParserVariant];
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp b/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp
new file mode 100644
index 0000000..206db96
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp
@@ -0,0 +1,1906 @@
+//===-- ARMAsmPrinter.cpp - Print machine code to an ARM .s file ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a printer that converts from our internal representation
+// of machine-dependent LLVM code to GAS-format ARM assembly language.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARMAsmPrinter.h"
+#include "ARM.h"
+#include "ARMConstantPoolValue.h"
+#include "ARMMachineFunctionInfo.h"
+#include "ARMTargetMachine.h"
+#include "ARMTargetObjectFile.h"
+#include "InstPrinter/ARMInstPrinter.h"
+#include "MCTargetDesc/ARMAddressingModes.h"
+#include "MCTargetDesc/ARMMCExpr.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineModuleInfoImpls.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugInfo.h"
+#include "llvm/IR/Mangler.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCELFStreamer.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstBuilder.h"
+#include "llvm/MC/MCObjectStreamer.h"
+#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/ARMBuildAttributes.h"
+#include "llvm/Support/TargetParser.h"
+#include "llvm/Support/COFF.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ELF.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
+#include <cctype>
+using namespace llvm;
+
+#define DEBUG_TYPE "asm-printer"
+
+ARMAsmPrinter::ARMAsmPrinter(TargetMachine &TM,
+ std::unique_ptr<MCStreamer> Streamer)
+ : AsmPrinter(TM, std::move(Streamer)), AFI(nullptr), MCP(nullptr),
+ InConstantPool(false), OptimizationGoals(-1) {}
+
+void ARMAsmPrinter::EmitFunctionBodyEnd() {
+ // Make sure to terminate any constant pools that were at the end
+ // of the function.
+ if (!InConstantPool)
+ return;
+ InConstantPool = false;
+ OutStreamer->EmitDataRegion(MCDR_DataRegionEnd);
+}
+
+void ARMAsmPrinter::EmitFunctionEntryLabel() {
+ if (AFI->isThumbFunction()) {
+ OutStreamer->EmitAssemblerFlag(MCAF_Code16);
+ OutStreamer->EmitThumbFunc(CurrentFnSym);
+ }
+
+ OutStreamer->EmitLabel(CurrentFnSym);
+}
+
+void ARMAsmPrinter::EmitXXStructor(const DataLayout &DL, const Constant *CV) {
+ uint64_t Size = getDataLayout().getTypeAllocSize(CV->getType());
+ assert(Size && "C++ constructor pointer had zero size!");
+
+ const GlobalValue *GV = dyn_cast<GlobalValue>(CV->stripPointerCasts());
+ assert(GV && "C++ constructor pointer was not a GlobalValue!");
+
+ const MCExpr *E = MCSymbolRefExpr::create(GetARMGVSymbol(GV,
+ ARMII::MO_NO_FLAG),
+ (Subtarget->isTargetELF()
+ ? MCSymbolRefExpr::VK_ARM_TARGET1
+ : MCSymbolRefExpr::VK_None),
+ OutContext);
+
+ OutStreamer->EmitValue(E, Size);
+}
+
+/// runOnMachineFunction - This uses the EmitInstruction()
+/// method to print assembly for each instruction.
+///
+bool ARMAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
+ AFI = MF.getInfo<ARMFunctionInfo>();
+ MCP = MF.getConstantPool();
+ Subtarget = &MF.getSubtarget<ARMSubtarget>();
+
+ SetupMachineFunction(MF);
+ const Function* F = MF.getFunction();
+ const TargetMachine& TM = MF.getTarget();
+
+ // Calculate this function's optimization goal.
+ unsigned OptimizationGoal;
+ if (F->hasFnAttribute(Attribute::OptimizeNone))
+ // For best debugging illusion, speed and small size sacrificed
+ OptimizationGoal = 6;
+ else if (F->optForMinSize())
+ // Aggressively for small size, speed and debug illusion sacrificed
+ OptimizationGoal = 4;
+ else if (F->optForSize())
+ // For small size, but speed and debugging illusion preserved
+ OptimizationGoal = 3;
+ else if (TM.getOptLevel() == CodeGenOpt::Aggressive)
+ // Aggressively for speed, small size and debug illusion sacrificed
+ OptimizationGoal = 2;
+ else if (TM.getOptLevel() > CodeGenOpt::None)
+ // For speed, but small size and good debug illusion preserved
+ OptimizationGoal = 1;
+ else // TM.getOptLevel() == CodeGenOpt::None
+ // For good debugging, but speed and small size preserved
+ OptimizationGoal = 5;
+
+ // Combine a new optimization goal with existing ones.
+ if (OptimizationGoals == -1) // uninitialized goals
+ OptimizationGoals = OptimizationGoal;
+ else if (OptimizationGoals != (int)OptimizationGoal) // conflicting goals
+ OptimizationGoals = 0;
+
+ if (Subtarget->isTargetCOFF()) {
+ bool Internal = F->hasInternalLinkage();
+ COFF::SymbolStorageClass Scl = Internal ? COFF::IMAGE_SYM_CLASS_STATIC
+ : COFF::IMAGE_SYM_CLASS_EXTERNAL;
+ int Type = COFF::IMAGE_SYM_DTYPE_FUNCTION << COFF::SCT_COMPLEX_TYPE_SHIFT;
+
+ OutStreamer->BeginCOFFSymbolDef(CurrentFnSym);
+ OutStreamer->EmitCOFFSymbolStorageClass(Scl);
+ OutStreamer->EmitCOFFSymbolType(Type);
+ OutStreamer->EndCOFFSymbolDef();
+ }
+
+ // Emit the rest of the function body.
+ EmitFunctionBody();
+
+ // If we need V4T thumb mode Register Indirect Jump pads, emit them.
+ // These are created per function, rather than per TU, since it's
+ // relatively easy to exceed the thumb branch range within a TU.
+ if (! ThumbIndirectPads.empty()) {
+ OutStreamer->EmitAssemblerFlag(MCAF_Code16);
+ EmitAlignment(1);
+ for (unsigned i = 0, e = ThumbIndirectPads.size(); i < e; i++) {
+ OutStreamer->EmitLabel(ThumbIndirectPads[i].second);
+ EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::tBX)
+ .addReg(ThumbIndirectPads[i].first)
+ // Add predicate operands.
+ .addImm(ARMCC::AL)
+ .addReg(0));
+ }
+ ThumbIndirectPads.clear();
+ }
+
+ // We didn't modify anything.
+ return false;
+}
+
+void ARMAsmPrinter::printOperand(const MachineInstr *MI, int OpNum,
+ raw_ostream &O) {
+ const MachineOperand &MO = MI->getOperand(OpNum);
+ unsigned TF = MO.getTargetFlags();
+
+ switch (MO.getType()) {
+ default: llvm_unreachable("<unknown operand type>");
+ case MachineOperand::MO_Register: {
+ unsigned Reg = MO.getReg();
+ assert(TargetRegisterInfo::isPhysicalRegister(Reg));
+ assert(!MO.getSubReg() && "Subregs should be eliminated!");
+ if(ARM::GPRPairRegClass.contains(Reg)) {
+ const MachineFunction &MF = *MI->getParent()->getParent();
+ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+ Reg = TRI->getSubReg(Reg, ARM::gsub_0);
+ }
+ O << ARMInstPrinter::getRegisterName(Reg);
+ break;
+ }
+ case MachineOperand::MO_Immediate: {
+ int64_t Imm = MO.getImm();
+ O << '#';
+ if (TF == ARMII::MO_LO16)
+ O << ":lower16:";
+ else if (TF == ARMII::MO_HI16)
+ O << ":upper16:";
+ O << Imm;
+ break;
+ }
+ case MachineOperand::MO_MachineBasicBlock:
+ MO.getMBB()->getSymbol()->print(O, MAI);
+ return;
+ case MachineOperand::MO_GlobalAddress: {
+ const GlobalValue *GV = MO.getGlobal();
+ if (TF & ARMII::MO_LO16)
+ O << ":lower16:";
+ else if (TF & ARMII::MO_HI16)
+ O << ":upper16:";
+ GetARMGVSymbol(GV, TF)->print(O, MAI);
+
+ printOffset(MO.getOffset(), O);
+ if (TF == ARMII::MO_PLT)
+ O << "(PLT)";
+ break;
+ }
+ case MachineOperand::MO_ConstantPoolIndex:
+ GetCPISymbol(MO.getIndex())->print(O, MAI);
+ break;
+ }
+}
+
+//===--------------------------------------------------------------------===//
+
+MCSymbol *ARMAsmPrinter::
+GetARMJTIPICJumpTableLabel(unsigned uid) const {
+ const DataLayout &DL = getDataLayout();
+ SmallString<60> Name;
+ raw_svector_ostream(Name) << DL.getPrivateGlobalPrefix() << "JTI"
+ << getFunctionNumber() << '_' << uid;
+ return OutContext.getOrCreateSymbol(Name);
+}
+
+bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
+ unsigned AsmVariant, const char *ExtraCode,
+ raw_ostream &O) {
+ // Does this asm operand have a single letter operand modifier?
+ if (ExtraCode && ExtraCode[0]) {
+ if (ExtraCode[1] != 0) return true; // Unknown modifier.
+
+ switch (ExtraCode[0]) {
+ default:
+ // See if this is a generic print operand
+ return AsmPrinter::PrintAsmOperand(MI, OpNum, AsmVariant, ExtraCode, O);
+ case 'a': // Print as a memory address.
+ if (MI->getOperand(OpNum).isReg()) {
+ O << "["
+ << ARMInstPrinter::getRegisterName(MI->getOperand(OpNum).getReg())
+ << "]";
+ return false;
+ }
+ // Fallthrough
+ case 'c': // Don't print "#" before an immediate operand.
+ if (!MI->getOperand(OpNum).isImm())
+ return true;
+ O << MI->getOperand(OpNum).getImm();
+ return false;
+ case 'P': // Print a VFP double precision register.
+ case 'q': // Print a NEON quad precision register.
+ printOperand(MI, OpNum, O);
+ return false;
+ case 'y': // Print a VFP single precision register as indexed double.
+ if (MI->getOperand(OpNum).isReg()) {
+ unsigned Reg = MI->getOperand(OpNum).getReg();
+ const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
+ // Find the 'd' register that has this 's' register as a sub-register,
+ // and determine the lane number.
+ for (MCSuperRegIterator SR(Reg, TRI); SR.isValid(); ++SR) {
+ if (!ARM::DPRRegClass.contains(*SR))
+ continue;
+ bool Lane0 = TRI->getSubReg(*SR, ARM::ssub_0) == Reg;
+ O << ARMInstPrinter::getRegisterName(*SR) << (Lane0 ? "[0]" : "[1]");
+ return false;
+ }
+ }
+ return true;
+ case 'B': // Bitwise inverse of integer or symbol without a preceding #.
+ if (!MI->getOperand(OpNum).isImm())
+ return true;
+ O << ~(MI->getOperand(OpNum).getImm());
+ return false;
+ case 'L': // The low 16 bits of an immediate constant.
+ if (!MI->getOperand(OpNum).isImm())
+ return true;
+ O << (MI->getOperand(OpNum).getImm() & 0xffff);
+ return false;
+ case 'M': { // A register range suitable for LDM/STM.
+ if (!MI->getOperand(OpNum).isReg())
+ return true;
+ const MachineOperand &MO = MI->getOperand(OpNum);
+ unsigned RegBegin = MO.getReg();
+ // This takes advantage of the 2 operand-ness of ldm/stm and that we've
+ // already got the operands in registers that are operands to the
+ // inline asm statement.
+ O << "{";
+ if (ARM::GPRPairRegClass.contains(RegBegin)) {
+ const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
+ unsigned Reg0 = TRI->getSubReg(RegBegin, ARM::gsub_0);
+ O << ARMInstPrinter::getRegisterName(Reg0) << ", ";
+ RegBegin = TRI->getSubReg(RegBegin, ARM::gsub_1);
+ }
+ O << ARMInstPrinter::getRegisterName(RegBegin);
+
+ // FIXME: The register allocator not only may not have given us the
+ // registers in sequence, but may not be in ascending registers. This
+ // will require changes in the register allocator that'll need to be
+ // propagated down here if the operands change.
+ unsigned RegOps = OpNum + 1;
+ while (MI->getOperand(RegOps).isReg()) {
+ O << ", "
+ << ARMInstPrinter::getRegisterName(MI->getOperand(RegOps).getReg());
+ RegOps++;
+ }
+
+ O << "}";
+
+ return false;
+ }
+ case 'R': // The most significant register of a pair.
+ case 'Q': { // The least significant register of a pair.
+ if (OpNum == 0)
+ return true;
+ const MachineOperand &FlagsOP = MI->getOperand(OpNum - 1);
+ if (!FlagsOP.isImm())
+ return true;
+ unsigned Flags = FlagsOP.getImm();
+
+ // This operand may not be the one that actually provides the register. If
+ // it's tied to a previous one then we should refer instead to that one
+ // for registers and their classes.
+ unsigned TiedIdx;
+ if (InlineAsm::isUseOperandTiedToDef(Flags, TiedIdx)) {
+ for (OpNum = InlineAsm::MIOp_FirstOperand; TiedIdx; --TiedIdx) {
+ unsigned OpFlags = MI->getOperand(OpNum).getImm();
+ OpNum += InlineAsm::getNumOperandRegisters(OpFlags) + 1;
+ }
+ Flags = MI->getOperand(OpNum).getImm();
+
+ // Later code expects OpNum to be pointing at the register rather than
+ // the flags.
+ OpNum += 1;
+ }
+
+ unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags);
+ unsigned RC;
+ InlineAsm::hasRegClassConstraint(Flags, RC);
+ if (RC == ARM::GPRPairRegClassID) {
+ if (NumVals != 1)
+ return true;
+ const MachineOperand &MO = MI->getOperand(OpNum);
+ if (!MO.isReg())
+ return true;
+ const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
+ unsigned Reg = TRI->getSubReg(MO.getReg(), ExtraCode[0] == 'Q' ?
+ ARM::gsub_0 : ARM::gsub_1);
+ O << ARMInstPrinter::getRegisterName(Reg);
+ return false;
+ }
+ if (NumVals != 2)
+ return true;
+ unsigned RegOp = ExtraCode[0] == 'Q' ? OpNum : OpNum + 1;
+ if (RegOp >= MI->getNumOperands())
+ return true;
+ const MachineOperand &MO = MI->getOperand(RegOp);
+ if (!MO.isReg())
+ return true;
+ unsigned Reg = MO.getReg();
+ O << ARMInstPrinter::getRegisterName(Reg);
+ return false;
+ }
+
+ case 'e': // The low doubleword register of a NEON quad register.
+ case 'f': { // The high doubleword register of a NEON quad register.
+ if (!MI->getOperand(OpNum).isReg())
+ return true;
+ unsigned Reg = MI->getOperand(OpNum).getReg();
+ if (!ARM::QPRRegClass.contains(Reg))
+ return true;
+ const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
+ unsigned SubReg = TRI->getSubReg(Reg, ExtraCode[0] == 'e' ?
+ ARM::dsub_0 : ARM::dsub_1);
+ O << ARMInstPrinter::getRegisterName(SubReg);
+ return false;
+ }
+
+ // This modifier is not yet supported.
+ case 'h': // A range of VFP/NEON registers suitable for VLD1/VST1.
+ return true;
+ case 'H': { // The highest-numbered register of a pair.
+ const MachineOperand &MO = MI->getOperand(OpNum);
+ if (!MO.isReg())
+ return true;
+ const MachineFunction &MF = *MI->getParent()->getParent();
+ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+ unsigned Reg = MO.getReg();
+ if(!ARM::GPRPairRegClass.contains(Reg))
+ return false;
+ Reg = TRI->getSubReg(Reg, ARM::gsub_1);
+ O << ARMInstPrinter::getRegisterName(Reg);
+ return false;
+ }
+ }
+ }
+
+ printOperand(MI, OpNum, O);
+ return false;
+}
+
+bool ARMAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
+ unsigned OpNum, unsigned AsmVariant,
+ const char *ExtraCode,
+ raw_ostream &O) {
+ // Does this asm operand have a single letter operand modifier?
+ if (ExtraCode && ExtraCode[0]) {
+ if (ExtraCode[1] != 0) return true; // Unknown modifier.
+
+ switch (ExtraCode[0]) {
+ case 'A': // A memory operand for a VLD1/VST1 instruction.
+ default: return true; // Unknown modifier.
+ case 'm': // The base register of a memory operand.
+ if (!MI->getOperand(OpNum).isReg())
+ return true;
+ O << ARMInstPrinter::getRegisterName(MI->getOperand(OpNum).getReg());
+ return false;
+ }
+ }
+
+ const MachineOperand &MO = MI->getOperand(OpNum);
+ assert(MO.isReg() && "unexpected inline asm memory operand");
+ O << "[" << ARMInstPrinter::getRegisterName(MO.getReg()) << "]";
+ return false;
+}
+
+static bool isThumb(const MCSubtargetInfo& STI) {
+ return STI.getFeatureBits()[ARM::ModeThumb];
+}
+
+void ARMAsmPrinter::emitInlineAsmEnd(const MCSubtargetInfo &StartInfo,
+ const MCSubtargetInfo *EndInfo) const {
+ // If either end mode is unknown (EndInfo == NULL) or different than
+ // the start mode, then restore the start mode.
+ const bool WasThumb = isThumb(StartInfo);
+ if (!EndInfo || WasThumb != isThumb(*EndInfo)) {
+ OutStreamer->EmitAssemblerFlag(WasThumb ? MCAF_Code16 : MCAF_Code32);
+ }
+}
+
+void ARMAsmPrinter::EmitStartOfAsmFile(Module &M) {
+ const Triple &TT = TM.getTargetTriple();
+ // Use unified assembler syntax.
+ OutStreamer->EmitAssemblerFlag(MCAF_SyntaxUnified);
+
+ // Emit ARM Build Attributes
+ if (TT.isOSBinFormatELF())
+ emitAttributes();
+
+ // Use the triple's architecture and subarchitecture to determine
+ // if we're thumb for the purposes of the top level code16 assembler
+ // flag.
+ bool isThumb = TT.getArch() == Triple::thumb ||
+ TT.getArch() == Triple::thumbeb ||
+ TT.getSubArch() == Triple::ARMSubArch_v7m ||
+ TT.getSubArch() == Triple::ARMSubArch_v6m;
+ if (!M.getModuleInlineAsm().empty() && isThumb)
+ OutStreamer->EmitAssemblerFlag(MCAF_Code16);
+}
+
+static void
+emitNonLazySymbolPointer(MCStreamer &OutStreamer, MCSymbol *StubLabel,
+ MachineModuleInfoImpl::StubValueTy &MCSym) {
+ // L_foo$stub:
+ OutStreamer.EmitLabel(StubLabel);
+ // .indirect_symbol _foo
+ OutStreamer.EmitSymbolAttribute(MCSym.getPointer(), MCSA_IndirectSymbol);
+
+ if (MCSym.getInt())
+ // External to current translation unit.
+ OutStreamer.EmitIntValue(0, 4/*size*/);
+ else
+ // Internal to current translation unit.
+ //
+ // When we place the LSDA into the TEXT section, the type info
+ // pointers need to be indirect and pc-rel. We accomplish this by
+ // using NLPs; however, sometimes the types are local to the file.
+ // We need to fill in the value for the NLP in those cases.
+ OutStreamer.EmitValue(
+ MCSymbolRefExpr::create(MCSym.getPointer(), OutStreamer.getContext()),
+ 4 /*size*/);
+}
+
+
+void ARMAsmPrinter::EmitEndOfAsmFile(Module &M) {
+ const Triple &TT = TM.getTargetTriple();
+ if (TT.isOSBinFormatMachO()) {
+ // All darwin targets use mach-o.
+ const TargetLoweringObjectFileMachO &TLOFMacho =
+ static_cast<const TargetLoweringObjectFileMachO &>(getObjFileLowering());
+ MachineModuleInfoMachO &MMIMacho =
+ MMI->getObjFileInfo<MachineModuleInfoMachO>();
+
+ // Output non-lazy-pointers for external and common global variables.
+ MachineModuleInfoMachO::SymbolListTy Stubs = MMIMacho.GetGVStubList();
+
+ if (!Stubs.empty()) {
+ // Switch with ".non_lazy_symbol_pointer" directive.
+ OutStreamer->SwitchSection(TLOFMacho.getNonLazySymbolPointerSection());
+ EmitAlignment(2);
+
+ for (auto &Stub : Stubs)
+ emitNonLazySymbolPointer(*OutStreamer, Stub.first, Stub.second);
+
+ Stubs.clear();
+ OutStreamer->AddBlankLine();
+ }
+
+ Stubs = MMIMacho.GetHiddenGVStubList();
+ if (!Stubs.empty()) {
+ OutStreamer->SwitchSection(TLOFMacho.getNonLazySymbolPointerSection());
+ EmitAlignment(2);
+
+ for (auto &Stub : Stubs)
+ emitNonLazySymbolPointer(*OutStreamer, Stub.first, Stub.second);
+
+ Stubs.clear();
+ OutStreamer->AddBlankLine();
+ }
+
+ // Funny Darwin hack: This flag tells the linker that no global symbols
+ // contain code that falls through to other global symbols (e.g. the obvious
+ // implementation of multiple entry points). If this doesn't occur, the
+ // linker can safely perform dead code stripping. Since LLVM never
+ // generates code that does this, it is always safe to set.
+ OutStreamer->EmitAssemblerFlag(MCAF_SubsectionsViaSymbols);
+ }
+
+ // The last attribute to be emitted is ABI_optimization_goals
+ MCTargetStreamer &TS = *OutStreamer->getTargetStreamer();
+ ARMTargetStreamer &ATS = static_cast<ARMTargetStreamer &>(TS);
+
+ if (OptimizationGoals > 0 &&
+ (Subtarget->isTargetAEABI() || Subtarget->isTargetGNUAEABI()))
+ ATS.emitAttribute(ARMBuildAttrs::ABI_optimization_goals, OptimizationGoals);
+ OptimizationGoals = -1;
+
+ ATS.finishAttributeSection();
+}
+
+//===----------------------------------------------------------------------===//
+// Helper routines for EmitStartOfAsmFile() and EmitEndOfAsmFile()
+// FIXME:
+// The following seem like one-off assembler flags, but they actually need
+// to appear in the .ARM.attributes section in ELF.
+// Instead of subclassing the MCELFStreamer, we do the work here.
+
+static ARMBuildAttrs::CPUArch getArchForCPU(StringRef CPU,
+ const ARMSubtarget *Subtarget) {
+ if (CPU == "xscale")
+ return ARMBuildAttrs::v5TEJ;
+
+ if (Subtarget->hasV8Ops())
+ return ARMBuildAttrs::v8;
+ else if (Subtarget->hasV7Ops()) {
+ if (Subtarget->isMClass() && Subtarget->hasDSP())
+ return ARMBuildAttrs::v7E_M;
+ return ARMBuildAttrs::v7;
+ } else if (Subtarget->hasV6T2Ops())
+ return ARMBuildAttrs::v6T2;
+ else if (Subtarget->hasV6MOps())
+ return ARMBuildAttrs::v6S_M;
+ else if (Subtarget->hasV6Ops())
+ return ARMBuildAttrs::v6;
+ else if (Subtarget->hasV5TEOps())
+ return ARMBuildAttrs::v5TE;
+ else if (Subtarget->hasV5TOps())
+ return ARMBuildAttrs::v5T;
+ else if (Subtarget->hasV4TOps())
+ return ARMBuildAttrs::v4T;
+ else
+ return ARMBuildAttrs::v4;
+}
+
+void ARMAsmPrinter::emitAttributes() {
+ MCTargetStreamer &TS = *OutStreamer->getTargetStreamer();
+ ARMTargetStreamer &ATS = static_cast<ARMTargetStreamer &>(TS);
+
+ ATS.emitTextAttribute(ARMBuildAttrs::conformance, "2.09");
+
+ ATS.switchVendor("aeabi");
+
+ // Compute ARM ELF Attributes based on the default subtarget that
+ // we'd have constructed. The existing ARM behavior isn't LTO clean
+ // anyhow.
+ // FIXME: For ifunc related functions we could iterate over and look
+ // for a feature string that doesn't match the default one.
+ const Triple &TT = TM.getTargetTriple();
+ StringRef CPU = TM.getTargetCPU();
+ StringRef FS = TM.getTargetFeatureString();
+ std::string ArchFS = ARM_MC::ParseARMTriple(TT, CPU);
+ if (!FS.empty()) {
+ if (!ArchFS.empty())
+ ArchFS = (Twine(ArchFS) + "," + FS).str();
+ else
+ ArchFS = FS;
+ }
+ const ARMBaseTargetMachine &ATM =
+ static_cast<const ARMBaseTargetMachine &>(TM);
+ const ARMSubtarget STI(TT, CPU, ArchFS, ATM, ATM.isLittleEndian());
+
+ std::string CPUString = STI.getCPUString();
+
+ if (CPUString.find("generic") != 0) { //CPUString doesn't start with "generic"
+ // FIXME: remove krait check when GNU tools support krait cpu
+ if (STI.isKrait()) {
+ ATS.emitTextAttribute(ARMBuildAttrs::CPU_name, "cortex-a9");
+ // We consider krait as a "cortex-a9" + hwdiv CPU
+ // Enable hwdiv through ".arch_extension idiv"
+ if (STI.hasDivide() || STI.hasDivideInARMMode())
+ ATS.emitArchExtension(ARM::AEK_HWDIV | ARM::AEK_HWDIVARM);
+ } else
+ ATS.emitTextAttribute(ARMBuildAttrs::CPU_name, CPUString);
+ }
+
+ ATS.emitAttribute(ARMBuildAttrs::CPU_arch, getArchForCPU(CPUString, &STI));
+
+ // Tag_CPU_arch_profile must have the default value of 0 when "Architecture
+ // profile is not applicable (e.g. pre v7, or cross-profile code)".
+ if (STI.hasV7Ops()) {
+ if (STI.isAClass()) {
+ ATS.emitAttribute(ARMBuildAttrs::CPU_arch_profile,
+ ARMBuildAttrs::ApplicationProfile);
+ } else if (STI.isRClass()) {
+ ATS.emitAttribute(ARMBuildAttrs::CPU_arch_profile,
+ ARMBuildAttrs::RealTimeProfile);
+ } else if (STI.isMClass()) {
+ ATS.emitAttribute(ARMBuildAttrs::CPU_arch_profile,
+ ARMBuildAttrs::MicroControllerProfile);
+ }
+ }
+
+ ATS.emitAttribute(ARMBuildAttrs::ARM_ISA_use,
+ STI.hasARMOps() ? ARMBuildAttrs::Allowed
+ : ARMBuildAttrs::Not_Allowed);
+ if (STI.isThumb1Only()) {
+ ATS.emitAttribute(ARMBuildAttrs::THUMB_ISA_use, ARMBuildAttrs::Allowed);
+ } else if (STI.hasThumb2()) {
+ ATS.emitAttribute(ARMBuildAttrs::THUMB_ISA_use,
+ ARMBuildAttrs::AllowThumb32);
+ }
+
+ if (STI.hasNEON()) {
+ /* NEON is not exactly a VFP architecture, but GAS emit one of
+ * neon/neon-fp-armv8/neon-vfpv4/vfpv3/vfpv2 for .fpu parameters */
+ if (STI.hasFPARMv8()) {
+ if (STI.hasCrypto())
+ ATS.emitFPU(ARM::FK_CRYPTO_NEON_FP_ARMV8);
+ else
+ ATS.emitFPU(ARM::FK_NEON_FP_ARMV8);
+ } else if (STI.hasVFP4())
+ ATS.emitFPU(ARM::FK_NEON_VFPV4);
+ else
+ ATS.emitFPU(STI.hasFP16() ? ARM::FK_NEON_FP16 : ARM::FK_NEON);
+ // Emit Tag_Advanced_SIMD_arch for ARMv8 architecture
+ if (STI.hasV8Ops())
+ ATS.emitAttribute(ARMBuildAttrs::Advanced_SIMD_arch,
+ STI.hasV8_1aOps() ? ARMBuildAttrs::AllowNeonARMv8_1a:
+ ARMBuildAttrs::AllowNeonARMv8);
+ } else {
+ if (STI.hasFPARMv8())
+ // FPv5 and FP-ARMv8 have the same instructions, so are modeled as one
+ // FPU, but there are two different names for it depending on the CPU.
+ ATS.emitFPU(STI.hasD16()
+ ? (STI.isFPOnlySP() ? ARM::FK_FPV5_SP_D16 : ARM::FK_FPV5_D16)
+ : ARM::FK_FP_ARMV8);
+ else if (STI.hasVFP4())
+ ATS.emitFPU(STI.hasD16()
+ ? (STI.isFPOnlySP() ? ARM::FK_FPV4_SP_D16 : ARM::FK_VFPV4_D16)
+ : ARM::FK_VFPV4);
+ else if (STI.hasVFP3())
+ ATS.emitFPU(STI.hasD16()
+ // +d16
+ ? (STI.isFPOnlySP()
+ ? (STI.hasFP16() ? ARM::FK_VFPV3XD_FP16 : ARM::FK_VFPV3XD)
+ : (STI.hasFP16() ? ARM::FK_VFPV3_D16_FP16 : ARM::FK_VFPV3_D16))
+ // -d16
+ : (STI.hasFP16() ? ARM::FK_VFPV3_FP16 : ARM::FK_VFPV3));
+ else if (STI.hasVFP2())
+ ATS.emitFPU(ARM::FK_VFPV2);
+ }
+
+ if (TM.getRelocationModel() == Reloc::PIC_) {
+ // PIC specific attributes.
+ ATS.emitAttribute(ARMBuildAttrs::ABI_PCS_RW_data,
+ ARMBuildAttrs::AddressRWPCRel);
+ ATS.emitAttribute(ARMBuildAttrs::ABI_PCS_RO_data,
+ ARMBuildAttrs::AddressROPCRel);
+ ATS.emitAttribute(ARMBuildAttrs::ABI_PCS_GOT_use,
+ ARMBuildAttrs::AddressGOT);
+ } else {
+ // Allow direct addressing of imported data for all other relocation models.
+ ATS.emitAttribute(ARMBuildAttrs::ABI_PCS_GOT_use,
+ ARMBuildAttrs::AddressDirect);
+ }
+
+ // Signal various FP modes.
+ if (!TM.Options.UnsafeFPMath) {
+ ATS.emitAttribute(ARMBuildAttrs::ABI_FP_denormal,
+ ARMBuildAttrs::IEEEDenormals);
+ ATS.emitAttribute(ARMBuildAttrs::ABI_FP_exceptions, ARMBuildAttrs::Allowed);
+
+ // If the user has permitted this code to choose the IEEE 754
+ // rounding at run-time, emit the rounding attribute.
+ if (TM.Options.HonorSignDependentRoundingFPMathOption)
+ ATS.emitAttribute(ARMBuildAttrs::ABI_FP_rounding, ARMBuildAttrs::Allowed);
+ } else {
+ if (!STI.hasVFP2()) {
+ // When the target doesn't have an FPU (by design or
+ // intention), the assumptions made on the software support
+ // mirror that of the equivalent hardware support *if it
+ // existed*. For v7 and better we indicate that denormals are
+ // flushed preserving sign, and for V6 we indicate that
+ // denormals are flushed to positive zero.
+ if (STI.hasV7Ops())
+ ATS.emitAttribute(ARMBuildAttrs::ABI_FP_denormal,
+ ARMBuildAttrs::PreserveFPSign);
+ } else if (STI.hasVFP3()) {
+ // In VFPv4, VFPv4U, VFPv3, or VFPv3U, it is preserved. That is,
+ // the sign bit of the zero matches the sign bit of the input or
+ // result that is being flushed to zero.
+ ATS.emitAttribute(ARMBuildAttrs::ABI_FP_denormal,
+ ARMBuildAttrs::PreserveFPSign);
+ }
+ // For VFPv2 implementations it is implementation defined as
+ // to whether denormals are flushed to positive zero or to
+ // whatever the sign of zero is (ARM v7AR ARM 2.7.5). Historically
+ // LLVM has chosen to flush this to positive zero (most likely for
+ // GCC compatibility), so that's the chosen value here (the
+ // absence of its emission implies zero).
+ }
+
+ // TM.Options.NoInfsFPMath && TM.Options.NoNaNsFPMath is the
+ // equivalent of GCC's -ffinite-math-only flag.
+ if (TM.Options.NoInfsFPMath && TM.Options.NoNaNsFPMath)
+ ATS.emitAttribute(ARMBuildAttrs::ABI_FP_number_model,
+ ARMBuildAttrs::Allowed);
+ else
+ ATS.emitAttribute(ARMBuildAttrs::ABI_FP_number_model,
+ ARMBuildAttrs::AllowIEE754);
+
+ if (STI.allowsUnalignedMem())
+ ATS.emitAttribute(ARMBuildAttrs::CPU_unaligned_access,
+ ARMBuildAttrs::Allowed);
+ else
+ ATS.emitAttribute(ARMBuildAttrs::CPU_unaligned_access,
+ ARMBuildAttrs::Not_Allowed);
+
+ // FIXME: add more flags to ARMBuildAttributes.h
+ // 8-bytes alignment stuff.
+ ATS.emitAttribute(ARMBuildAttrs::ABI_align_needed, 1);
+ ATS.emitAttribute(ARMBuildAttrs::ABI_align_preserved, 1);
+
+ // ABI_HardFP_use attribute to indicate single precision FP.
+ if (STI.isFPOnlySP())
+ ATS.emitAttribute(ARMBuildAttrs::ABI_HardFP_use,
+ ARMBuildAttrs::HardFPSinglePrecision);
+
+ // Hard float. Use both S and D registers and conform to AAPCS-VFP.
+ if (STI.isAAPCS_ABI() && TM.Options.FloatABIType == FloatABI::Hard)
+ ATS.emitAttribute(ARMBuildAttrs::ABI_VFP_args, ARMBuildAttrs::HardFPAAPCS);
+
+ // FIXME: Should we signal R9 usage?
+
+ if (STI.hasFP16())
+ ATS.emitAttribute(ARMBuildAttrs::FP_HP_extension, ARMBuildAttrs::AllowHPFP);
+
+ // FIXME: To support emitting this build attribute as GCC does, the
+ // -mfp16-format option and associated plumbing must be
+ // supported. For now the __fp16 type is exposed by default, so this
+ // attribute should be emitted with value 1.
+ ATS.emitAttribute(ARMBuildAttrs::ABI_FP_16bit_format,
+ ARMBuildAttrs::FP16FormatIEEE);
+
+ if (STI.hasMPExtension())
+ ATS.emitAttribute(ARMBuildAttrs::MPextension_use, ARMBuildAttrs::AllowMP);
+
+ // Hardware divide in ARM mode is part of base arch, starting from ARMv8.
+ // If only Thumb hwdiv is present, it must also be in base arch (ARMv7-R/M).
+ // It is not possible to produce DisallowDIV: if hwdiv is present in the base
+ // arch, supplying -hwdiv downgrades the effective arch, via ClearImpliedBits.
+ // AllowDIVExt is only emitted if hwdiv isn't available in the base arch;
+ // otherwise, the default value (AllowDIVIfExists) applies.
+ if (STI.hasDivideInARMMode() && !STI.hasV8Ops())
+ ATS.emitAttribute(ARMBuildAttrs::DIV_use, ARMBuildAttrs::AllowDIVExt);
+
+ if (MMI) {
+ if (const Module *SourceModule = MMI->getModule()) {
+ // ABI_PCS_wchar_t to indicate wchar_t width
+ // FIXME: There is no way to emit value 0 (wchar_t prohibited).
+ if (auto WCharWidthValue = mdconst::extract_or_null<ConstantInt>(
+ SourceModule->getModuleFlag("wchar_size"))) {
+ int WCharWidth = WCharWidthValue->getZExtValue();
+ assert((WCharWidth == 2 || WCharWidth == 4) &&
+ "wchar_t width must be 2 or 4 bytes");
+ ATS.emitAttribute(ARMBuildAttrs::ABI_PCS_wchar_t, WCharWidth);
+ }
+
+ // ABI_enum_size to indicate enum width
+ // FIXME: There is no way to emit value 0 (enums prohibited) or value 3
+ // (all enums contain a value needing 32 bits to encode).
+ if (auto EnumWidthValue = mdconst::extract_or_null<ConstantInt>(
+ SourceModule->getModuleFlag("min_enum_size"))) {
+ int EnumWidth = EnumWidthValue->getZExtValue();
+ assert((EnumWidth == 1 || EnumWidth == 4) &&
+ "Minimum enum width must be 1 or 4 bytes");
+ int EnumBuildAttr = EnumWidth == 1 ? 1 : 2;
+ ATS.emitAttribute(ARMBuildAttrs::ABI_enum_size, EnumBuildAttr);
+ }
+ }
+ }
+
+ // TODO: We currently only support either reserving the register, or treating
+ // it as another callee-saved register, but not as SB or a TLS pointer; It
+ // would instead be nicer to push this from the frontend as metadata, as we do
+ // for the wchar and enum size tags
+ if (STI.isR9Reserved())
+ ATS.emitAttribute(ARMBuildAttrs::ABI_PCS_R9_use, ARMBuildAttrs::R9Reserved);
+ else
+ ATS.emitAttribute(ARMBuildAttrs::ABI_PCS_R9_use, ARMBuildAttrs::R9IsGPR);
+
+ if (STI.hasTrustZone() && STI.hasVirtualization())
+ ATS.emitAttribute(ARMBuildAttrs::Virtualization_use,
+ ARMBuildAttrs::AllowTZVirtualization);
+ else if (STI.hasTrustZone())
+ ATS.emitAttribute(ARMBuildAttrs::Virtualization_use,
+ ARMBuildAttrs::AllowTZ);
+ else if (STI.hasVirtualization())
+ ATS.emitAttribute(ARMBuildAttrs::Virtualization_use,
+ ARMBuildAttrs::AllowVirtualization);
+}
+
+//===----------------------------------------------------------------------===//
+
+static MCSymbol *getPICLabel(const char *Prefix, unsigned FunctionNumber,
+ unsigned LabelId, MCContext &Ctx) {
+
+ MCSymbol *Label = Ctx.getOrCreateSymbol(Twine(Prefix)
+ + "PC" + Twine(FunctionNumber) + "_" + Twine(LabelId));
+ return Label;
+}
+
+static MCSymbolRefExpr::VariantKind
+getModifierVariantKind(ARMCP::ARMCPModifier Modifier) {
+ switch (Modifier) {
+ case ARMCP::no_modifier: return MCSymbolRefExpr::VK_None;
+ case ARMCP::TLSGD: return MCSymbolRefExpr::VK_TLSGD;
+ case ARMCP::TPOFF: return MCSymbolRefExpr::VK_TPOFF;
+ case ARMCP::GOTTPOFF: return MCSymbolRefExpr::VK_GOTTPOFF;
+ case ARMCP::GOT_PREL: return MCSymbolRefExpr::VK_ARM_GOT_PREL;
+ }
+ llvm_unreachable("Invalid ARMCPModifier!");
+}
+
+MCSymbol *ARMAsmPrinter::GetARMGVSymbol(const GlobalValue *GV,
+ unsigned char TargetFlags) {
+ if (Subtarget->isTargetMachO()) {
+ bool IsIndirect = (TargetFlags & ARMII::MO_NONLAZY) &&
+ Subtarget->GVIsIndirectSymbol(GV, TM.getRelocationModel());
+
+ if (!IsIndirect)
+ return getSymbol(GV);
+
+ // FIXME: Remove this when Darwin transition to @GOT like syntax.
+ MCSymbol *MCSym = getSymbolWithGlobalValueBase(GV, "$non_lazy_ptr");
+ MachineModuleInfoMachO &MMIMachO =
+ MMI->getObjFileInfo<MachineModuleInfoMachO>();
+ MachineModuleInfoImpl::StubValueTy &StubSym =
+ GV->hasHiddenVisibility() ? MMIMachO.getHiddenGVStubEntry(MCSym)
+ : MMIMachO.getGVStubEntry(MCSym);
+ if (!StubSym.getPointer())
+ StubSym = MachineModuleInfoImpl::StubValueTy(getSymbol(GV),
+ !GV->hasInternalLinkage());
+ return MCSym;
+ } else if (Subtarget->isTargetCOFF()) {
+ assert(Subtarget->isTargetWindows() &&
+ "Windows is the only supported COFF target");
+
+ bool IsIndirect = (TargetFlags & ARMII::MO_DLLIMPORT);
+ if (!IsIndirect)
+ return getSymbol(GV);
+
+ SmallString<128> Name;
+ Name = "__imp_";
+ getNameWithPrefix(Name, GV);
+
+ return OutContext.getOrCreateSymbol(Name);
+ } else if (Subtarget->isTargetELF()) {
+ return getSymbol(GV);
+ }
+ llvm_unreachable("unexpected target");
+}
+
+void ARMAsmPrinter::
+EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) {
+ const DataLayout &DL = getDataLayout();
+ int Size = DL.getTypeAllocSize(MCPV->getType());
+
+ ARMConstantPoolValue *ACPV = static_cast<ARMConstantPoolValue*>(MCPV);
+
+ MCSymbol *MCSym;
+ if (ACPV->isLSDA()) {
+ MCSym = getCurExceptionSym();
+ } else if (ACPV->isBlockAddress()) {
+ const BlockAddress *BA =
+ cast<ARMConstantPoolConstant>(ACPV)->getBlockAddress();
+ MCSym = GetBlockAddressSymbol(BA);
+ } else if (ACPV->isGlobalValue()) {
+ const GlobalValue *GV = cast<ARMConstantPoolConstant>(ACPV)->getGV();
+
+ // On Darwin, const-pool entries may get the "FOO$non_lazy_ptr" mangling, so
+ // flag the global as MO_NONLAZY.
+ unsigned char TF = Subtarget->isTargetMachO() ? ARMII::MO_NONLAZY : 0;
+ MCSym = GetARMGVSymbol(GV, TF);
+ } else if (ACPV->isMachineBasicBlock()) {
+ const MachineBasicBlock *MBB = cast<ARMConstantPoolMBB>(ACPV)->getMBB();
+ MCSym = MBB->getSymbol();
+ } else {
+ assert(ACPV->isExtSymbol() && "unrecognized constant pool value");
+ const char *Sym = cast<ARMConstantPoolSymbol>(ACPV)->getSymbol();
+ MCSym = GetExternalSymbolSymbol(Sym);
+ }
+
+ // Create an MCSymbol for the reference.
+ const MCExpr *Expr =
+ MCSymbolRefExpr::create(MCSym, getModifierVariantKind(ACPV->getModifier()),
+ OutContext);
+
+ if (ACPV->getPCAdjustment()) {
+ MCSymbol *PCLabel =
+ getPICLabel(DL.getPrivateGlobalPrefix(), getFunctionNumber(),
+ ACPV->getLabelId(), OutContext);
+ const MCExpr *PCRelExpr = MCSymbolRefExpr::create(PCLabel, OutContext);
+ PCRelExpr =
+ MCBinaryExpr::createAdd(PCRelExpr,
+ MCConstantExpr::create(ACPV->getPCAdjustment(),
+ OutContext),
+ OutContext);
+ if (ACPV->mustAddCurrentAddress()) {
+ // We want "(<expr> - .)", but MC doesn't have a concept of the '.'
+ // label, so just emit a local label end reference that instead.
+ MCSymbol *DotSym = OutContext.createTempSymbol();
+ OutStreamer->EmitLabel(DotSym);
+ const MCExpr *DotExpr = MCSymbolRefExpr::create(DotSym, OutContext);
+ PCRelExpr = MCBinaryExpr::createSub(PCRelExpr, DotExpr, OutContext);
+ }
+ Expr = MCBinaryExpr::createSub(Expr, PCRelExpr, OutContext);
+ }
+ OutStreamer->EmitValue(Expr, Size);
+}
+
+void ARMAsmPrinter::EmitJumpTableAddrs(const MachineInstr *MI) {
+ const MachineOperand &MO1 = MI->getOperand(1);
+ unsigned JTI = MO1.getIndex();
+
+ // Make sure the Thumb jump table is 4-byte aligned. This will be a nop for
+ // ARM mode tables.
+ EmitAlignment(2);
+
+ // Emit a label for the jump table.
+ MCSymbol *JTISymbol = GetARMJTIPICJumpTableLabel(JTI);
+ OutStreamer->EmitLabel(JTISymbol);
+
+ // Mark the jump table as data-in-code.
+ OutStreamer->EmitDataRegion(MCDR_DataRegionJT32);
+
+ // Emit each entry of the table.
+ const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
+ const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
+ const std::vector<MachineBasicBlock*> &JTBBs = JT[JTI].MBBs;
+
+ for (unsigned i = 0, e = JTBBs.size(); i != e; ++i) {
+ MachineBasicBlock *MBB = JTBBs[i];
+ // Construct an MCExpr for the entry. We want a value of the form:
+ // (BasicBlockAddr - TableBeginAddr)
+ //
+ // For example, a table with entries jumping to basic blocks BB0 and BB1
+ // would look like:
+ // LJTI_0_0:
+ // .word (LBB0 - LJTI_0_0)
+ // .word (LBB1 - LJTI_0_0)
+ const MCExpr *Expr = MCSymbolRefExpr::create(MBB->getSymbol(), OutContext);
+
+ if (TM.getRelocationModel() == Reloc::PIC_)
+ Expr = MCBinaryExpr::createSub(Expr, MCSymbolRefExpr::create(JTISymbol,
+ OutContext),
+ OutContext);
+ // If we're generating a table of Thumb addresses in static relocation
+ // model, we need to add one to keep interworking correctly.
+ else if (AFI->isThumbFunction())
+ Expr = MCBinaryExpr::createAdd(Expr, MCConstantExpr::create(1,OutContext),
+ OutContext);
+ OutStreamer->EmitValue(Expr, 4);
+ }
+ // Mark the end of jump table data-in-code region.
+ OutStreamer->EmitDataRegion(MCDR_DataRegionEnd);
+}
+
+void ARMAsmPrinter::EmitJumpTableInsts(const MachineInstr *MI) {
+ const MachineOperand &MO1 = MI->getOperand(1);
+ unsigned JTI = MO1.getIndex();
+
+ MCSymbol *JTISymbol = GetARMJTIPICJumpTableLabel(JTI);
+ OutStreamer->EmitLabel(JTISymbol);
+
+ // Emit each entry of the table.
+ const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
+ const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
+ const std::vector<MachineBasicBlock*> &JTBBs = JT[JTI].MBBs;
+
+ for (unsigned i = 0, e = JTBBs.size(); i != e; ++i) {
+ MachineBasicBlock *MBB = JTBBs[i];
+ const MCExpr *MBBSymbolExpr = MCSymbolRefExpr::create(MBB->getSymbol(),
+ OutContext);
+ // If this isn't a TBB or TBH, the entries are direct branch instructions.
+ EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::t2B)
+ .addExpr(MBBSymbolExpr)
+ .addImm(ARMCC::AL)
+ .addReg(0));
+ }
+}
+
+void ARMAsmPrinter::EmitJumpTableTBInst(const MachineInstr *MI,
+ unsigned OffsetWidth) {
+ assert((OffsetWidth == 1 || OffsetWidth == 2) && "invalid tbb/tbh width");
+ const MachineOperand &MO1 = MI->getOperand(1);
+ unsigned JTI = MO1.getIndex();
+
+ MCSymbol *JTISymbol = GetARMJTIPICJumpTableLabel(JTI);
+ OutStreamer->EmitLabel(JTISymbol);
+
+ // Emit each entry of the table.
+ const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
+ const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
+ const std::vector<MachineBasicBlock*> &JTBBs = JT[JTI].MBBs;
+
+ // Mark the jump table as data-in-code.
+ OutStreamer->EmitDataRegion(OffsetWidth == 1 ? MCDR_DataRegionJT8
+ : MCDR_DataRegionJT16);
+
+ for (auto MBB : JTBBs) {
+ const MCExpr *MBBSymbolExpr = MCSymbolRefExpr::create(MBB->getSymbol(),
+ OutContext);
+ // Otherwise it's an offset from the dispatch instruction. Construct an
+ // MCExpr for the entry. We want a value of the form:
+ // (BasicBlockAddr - TBBInstAddr + 4) / 2
+ //
+ // For example, a TBB table with entries jumping to basic blocks BB0 and BB1
+ // would look like:
+ // LJTI_0_0:
+ // .byte (LBB0 - (LCPI0_0 + 4)) / 2
+ // .byte (LBB1 - (LCPI0_0 + 4)) / 2
+ // where LCPI0_0 is a label defined just before the TBB instruction using
+ // this table.
+ MCSymbol *TBInstPC = GetCPISymbol(MI->getOperand(0).getImm());
+ const MCExpr *Expr = MCBinaryExpr::createAdd(
+ MCSymbolRefExpr::create(TBInstPC, OutContext),
+ MCConstantExpr::create(4, OutContext), OutContext);
+ Expr = MCBinaryExpr::createSub(MBBSymbolExpr, Expr, OutContext);
+ Expr = MCBinaryExpr::createDiv(Expr, MCConstantExpr::create(2, OutContext),
+ OutContext);
+ OutStreamer->EmitValue(Expr, OffsetWidth);
+ }
+ // Mark the end of jump table data-in-code region. 32-bit offsets use
+ // actual branch instructions here, so we don't mark those as a data-region
+ // at all.
+ OutStreamer->EmitDataRegion(MCDR_DataRegionEnd);
+
+ // Make sure the next instruction is 2-byte aligned.
+ EmitAlignment(1);
+}
+
+void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) {
+ assert(MI->getFlag(MachineInstr::FrameSetup) &&
+ "Only instruction which are involved into frame setup code are allowed");
+
+ MCTargetStreamer &TS = *OutStreamer->getTargetStreamer();
+ ARMTargetStreamer &ATS = static_cast<ARMTargetStreamer &>(TS);
+ const MachineFunction &MF = *MI->getParent()->getParent();
+ const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
+ const ARMFunctionInfo &AFI = *MF.getInfo<ARMFunctionInfo>();
+
+ unsigned FramePtr = RegInfo->getFrameRegister(MF);
+ unsigned Opc = MI->getOpcode();
+ unsigned SrcReg, DstReg;
+
+ if (Opc == ARM::tPUSH || Opc == ARM::tLDRpci) {
+ // Two special cases:
+ // 1) tPUSH does not have src/dst regs.
+ // 2) for Thumb1 code we sometimes materialize the constant via constpool
+ // load. Yes, this is pretty fragile, but for now I don't see better
+ // way... :(
+ SrcReg = DstReg = ARM::SP;
+ } else {
+ SrcReg = MI->getOperand(1).getReg();
+ DstReg = MI->getOperand(0).getReg();
+ }
+
+ // Try to figure out the unwinding opcode out of src / dst regs.
+ if (MI->mayStore()) {
+ // Register saves.
+ assert(DstReg == ARM::SP &&
+ "Only stack pointer as a destination reg is supported");
+
+ SmallVector<unsigned, 4> RegList;
+ // Skip src & dst reg, and pred ops.
+ unsigned StartOp = 2 + 2;
+ // Use all the operands.
+ unsigned NumOffset = 0;
+
+ switch (Opc) {
+ default:
+ MI->dump();
+ llvm_unreachable("Unsupported opcode for unwinding information");
+ case ARM::tPUSH:
+ // Special case here: no src & dst reg, but two extra imp ops.
+ StartOp = 2; NumOffset = 2;
+ case ARM::STMDB_UPD:
+ case ARM::t2STMDB_UPD:
+ case ARM::VSTMDDB_UPD:
+ assert(SrcReg == ARM::SP &&
+ "Only stack pointer as a source reg is supported");
+ for (unsigned i = StartOp, NumOps = MI->getNumOperands() - NumOffset;
+ i != NumOps; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ // Actually, there should never be any impdef stuff here. Skip it
+ // temporary to workaround PR11902.
+ if (MO.isImplicit())
+ continue;
+ RegList.push_back(MO.getReg());
+ }
+ break;
+ case ARM::STR_PRE_IMM:
+ case ARM::STR_PRE_REG:
+ case ARM::t2STR_PRE:
+ assert(MI->getOperand(2).getReg() == ARM::SP &&
+ "Only stack pointer as a source reg is supported");
+ RegList.push_back(SrcReg);
+ break;
+ }
+ if (MAI->getExceptionHandlingType() == ExceptionHandling::ARM)
+ ATS.emitRegSave(RegList, Opc == ARM::VSTMDDB_UPD);
+ } else {
+ // Changes of stack / frame pointer.
+ if (SrcReg == ARM::SP) {
+ int64_t Offset = 0;
+ switch (Opc) {
+ default:
+ MI->dump();
+ llvm_unreachable("Unsupported opcode for unwinding information");
+ case ARM::MOVr:
+ case ARM::tMOVr:
+ Offset = 0;
+ break;
+ case ARM::ADDri:
+ case ARM::t2ADDri:
+ Offset = -MI->getOperand(2).getImm();
+ break;
+ case ARM::SUBri:
+ case ARM::t2SUBri:
+ Offset = MI->getOperand(2).getImm();
+ break;
+ case ARM::tSUBspi:
+ Offset = MI->getOperand(2).getImm()*4;
+ break;
+ case ARM::tADDspi:
+ case ARM::tADDrSPi:
+ Offset = -MI->getOperand(2).getImm()*4;
+ break;
+ case ARM::tLDRpci: {
+ // Grab the constpool index and check, whether it corresponds to
+ // original or cloned constpool entry.
+ unsigned CPI = MI->getOperand(1).getIndex();
+ const MachineConstantPool *MCP = MF.getConstantPool();
+ if (CPI >= MCP->getConstants().size())
+ CPI = AFI.getOriginalCPIdx(CPI);
+ assert(CPI != -1U && "Invalid constpool index");
+
+ // Derive the actual offset.
+ const MachineConstantPoolEntry &CPE = MCP->getConstants()[CPI];
+ assert(!CPE.isMachineConstantPoolEntry() && "Invalid constpool entry");
+ // FIXME: Check for user, it should be "add" instruction!
+ Offset = -cast<ConstantInt>(CPE.Val.ConstVal)->getSExtValue();
+ break;
+ }
+ }
+
+ if (MAI->getExceptionHandlingType() == ExceptionHandling::ARM) {
+ if (DstReg == FramePtr && FramePtr != ARM::SP)
+ // Set-up of the frame pointer. Positive values correspond to "add"
+ // instruction.
+ ATS.emitSetFP(FramePtr, ARM::SP, -Offset);
+ else if (DstReg == ARM::SP) {
+ // Change of SP by an offset. Positive values correspond to "sub"
+ // instruction.
+ ATS.emitPad(Offset);
+ } else {
+ // Move of SP to a register. Positive values correspond to an "add"
+ // instruction.
+ ATS.emitMovSP(DstReg, -Offset);
+ }
+ }
+ } else if (DstReg == ARM::SP) {
+ MI->dump();
+ llvm_unreachable("Unsupported opcode for unwinding information");
+ }
+ else {
+ MI->dump();
+ llvm_unreachable("Unsupported opcode for unwinding information");
+ }
+ }
+}
+
+// Simple pseudo-instructions have their lowering (with expansion to real
+// instructions) auto-generated.
+#include "ARMGenMCPseudoLowering.inc"
+
+void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
+ const DataLayout &DL = getDataLayout();
+
+ // If we just ended a constant pool, mark it as such.
+ if (InConstantPool && MI->getOpcode() != ARM::CONSTPOOL_ENTRY) {
+ OutStreamer->EmitDataRegion(MCDR_DataRegionEnd);
+ InConstantPool = false;
+ }
+
+ // Emit unwinding stuff for frame-related instructions
+ if (Subtarget->isTargetEHABICompatible() &&
+ MI->getFlag(MachineInstr::FrameSetup))
+ EmitUnwindingInstruction(MI);
+
+ // Do any auto-generated pseudo lowerings.
+ if (emitPseudoExpansionLowering(*OutStreamer, MI))
+ return;
+
+ assert(!convertAddSubFlagsOpcode(MI->getOpcode()) &&
+ "Pseudo flag setting opcode should be expanded early");
+
+ // Check for manual lowerings.
+ unsigned Opc = MI->getOpcode();
+ switch (Opc) {
+ case ARM::t2MOVi32imm: llvm_unreachable("Should be lowered by thumb2it pass");
+ case ARM::DBG_VALUE: llvm_unreachable("Should be handled by generic printing");
+ case ARM::LEApcrel:
+ case ARM::tLEApcrel:
+ case ARM::t2LEApcrel: {
+ // FIXME: Need to also handle globals and externals
+ MCSymbol *CPISymbol = GetCPISymbol(MI->getOperand(1).getIndex());
+ EmitToStreamer(*OutStreamer, MCInstBuilder(MI->getOpcode() ==
+ ARM::t2LEApcrel ? ARM::t2ADR
+ : (MI->getOpcode() == ARM::tLEApcrel ? ARM::tADR
+ : ARM::ADR))
+ .addReg(MI->getOperand(0).getReg())
+ .addExpr(MCSymbolRefExpr::create(CPISymbol, OutContext))
+ // Add predicate operands.
+ .addImm(MI->getOperand(2).getImm())
+ .addReg(MI->getOperand(3).getReg()));
+ return;
+ }
+ case ARM::LEApcrelJT:
+ case ARM::tLEApcrelJT:
+ case ARM::t2LEApcrelJT: {
+ MCSymbol *JTIPICSymbol =
+ GetARMJTIPICJumpTableLabel(MI->getOperand(1).getIndex());
+ EmitToStreamer(*OutStreamer, MCInstBuilder(MI->getOpcode() ==
+ ARM::t2LEApcrelJT ? ARM::t2ADR
+ : (MI->getOpcode() == ARM::tLEApcrelJT ? ARM::tADR
+ : ARM::ADR))
+ .addReg(MI->getOperand(0).getReg())
+ .addExpr(MCSymbolRefExpr::create(JTIPICSymbol, OutContext))
+ // Add predicate operands.
+ .addImm(MI->getOperand(2).getImm())
+ .addReg(MI->getOperand(3).getReg()));
+ return;
+ }
+ // Darwin call instructions are just normal call instructions with different
+ // clobber semantics (they clobber R9).
+ case ARM::BX_CALL: {
+ EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::MOVr)
+ .addReg(ARM::LR)
+ .addReg(ARM::PC)
+ // Add predicate operands.
+ .addImm(ARMCC::AL)
+ .addReg(0)
+ // Add 's' bit operand (always reg0 for this)
+ .addReg(0));
+
+ EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::BX)
+ .addReg(MI->getOperand(0).getReg()));
+ return;
+ }
+ case ARM::tBX_CALL: {
+ if (Subtarget->hasV5TOps())
+ llvm_unreachable("Expected BLX to be selected for v5t+");
+
+ // On ARM v4t, when doing a call from thumb mode, we need to ensure
+ // that the saved lr has its LSB set correctly (the arch doesn't
+ // have blx).
+ // So here we generate a bl to a small jump pad that does bx rN.
+ // The jump pads are emitted after the function body.
+
+ unsigned TReg = MI->getOperand(0).getReg();
+ MCSymbol *TRegSym = nullptr;
+ for (unsigned i = 0, e = ThumbIndirectPads.size(); i < e; i++) {
+ if (ThumbIndirectPads[i].first == TReg) {
+ TRegSym = ThumbIndirectPads[i].second;
+ break;
+ }
+ }
+
+ if (!TRegSym) {
+ TRegSym = OutContext.createTempSymbol();
+ ThumbIndirectPads.push_back(std::make_pair(TReg, TRegSym));
+ }
+
+ // Create a link-saving branch to the Reg Indirect Jump Pad.
+ EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::tBL)
+ // Predicate comes first here.
+ .addImm(ARMCC::AL).addReg(0)
+ .addExpr(MCSymbolRefExpr::create(TRegSym, OutContext)));
+ return;
+ }
+ case ARM::BMOVPCRX_CALL: {
+ EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::MOVr)
+ .addReg(ARM::LR)
+ .addReg(ARM::PC)
+ // Add predicate operands.
+ .addImm(ARMCC::AL)
+ .addReg(0)
+ // Add 's' bit operand (always reg0 for this)
+ .addReg(0));
+
+ EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::MOVr)
+ .addReg(ARM::PC)
+ .addReg(MI->getOperand(0).getReg())
+ // Add predicate operands.
+ .addImm(ARMCC::AL)
+ .addReg(0)
+ // Add 's' bit operand (always reg0 for this)
+ .addReg(0));
+ return;
+ }
+ case ARM::BMOVPCB_CALL: {
+ EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::MOVr)
+ .addReg(ARM::LR)
+ .addReg(ARM::PC)
+ // Add predicate operands.
+ .addImm(ARMCC::AL)
+ .addReg(0)
+ // Add 's' bit operand (always reg0 for this)
+ .addReg(0));
+
+ const MachineOperand &Op = MI->getOperand(0);
+ const GlobalValue *GV = Op.getGlobal();
+ const unsigned TF = Op.getTargetFlags();
+ MCSymbol *GVSym = GetARMGVSymbol(GV, TF);
+ const MCExpr *GVSymExpr = MCSymbolRefExpr::create(GVSym, OutContext);
+ EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::Bcc)
+ .addExpr(GVSymExpr)
+ // Add predicate operands.
+ .addImm(ARMCC::AL)
+ .addReg(0));
+ return;
+ }
+ case ARM::MOVi16_ga_pcrel:
+ case ARM::t2MOVi16_ga_pcrel: {
+ MCInst TmpInst;
+ TmpInst.setOpcode(Opc == ARM::MOVi16_ga_pcrel? ARM::MOVi16 : ARM::t2MOVi16);
+ TmpInst.addOperand(MCOperand::createReg(MI->getOperand(0).getReg()));
+
+ unsigned TF = MI->getOperand(1).getTargetFlags();
+ const GlobalValue *GV = MI->getOperand(1).getGlobal();
+ MCSymbol *GVSym = GetARMGVSymbol(GV, TF);
+ const MCExpr *GVSymExpr = MCSymbolRefExpr::create(GVSym, OutContext);
+
+ MCSymbol *LabelSym =
+ getPICLabel(DL.getPrivateGlobalPrefix(), getFunctionNumber(),
+ MI->getOperand(2).getImm(), OutContext);
+ const MCExpr *LabelSymExpr= MCSymbolRefExpr::create(LabelSym, OutContext);
+ unsigned PCAdj = (Opc == ARM::MOVi16_ga_pcrel) ? 8 : 4;
+ const MCExpr *PCRelExpr =
+ ARMMCExpr::createLower16(MCBinaryExpr::createSub(GVSymExpr,
+ MCBinaryExpr::createAdd(LabelSymExpr,
+ MCConstantExpr::create(PCAdj, OutContext),
+ OutContext), OutContext), OutContext);
+ TmpInst.addOperand(MCOperand::createExpr(PCRelExpr));
+
+ // Add predicate operands.
+ TmpInst.addOperand(MCOperand::createImm(ARMCC::AL));
+ TmpInst.addOperand(MCOperand::createReg(0));
+ // Add 's' bit operand (always reg0 for this)
+ TmpInst.addOperand(MCOperand::createReg(0));
+ EmitToStreamer(*OutStreamer, TmpInst);
+ return;
+ }
+ case ARM::MOVTi16_ga_pcrel:
+ case ARM::t2MOVTi16_ga_pcrel: {
+ MCInst TmpInst;
+ TmpInst.setOpcode(Opc == ARM::MOVTi16_ga_pcrel
+ ? ARM::MOVTi16 : ARM::t2MOVTi16);
+ TmpInst.addOperand(MCOperand::createReg(MI->getOperand(0).getReg()));
+ TmpInst.addOperand(MCOperand::createReg(MI->getOperand(1).getReg()));
+
+ unsigned TF = MI->getOperand(2).getTargetFlags();
+ const GlobalValue *GV = MI->getOperand(2).getGlobal();
+ MCSymbol *GVSym = GetARMGVSymbol(GV, TF);
+ const MCExpr *GVSymExpr = MCSymbolRefExpr::create(GVSym, OutContext);
+
+ MCSymbol *LabelSym =
+ getPICLabel(DL.getPrivateGlobalPrefix(), getFunctionNumber(),
+ MI->getOperand(3).getImm(), OutContext);
+ const MCExpr *LabelSymExpr= MCSymbolRefExpr::create(LabelSym, OutContext);
+ unsigned PCAdj = (Opc == ARM::MOVTi16_ga_pcrel) ? 8 : 4;
+ const MCExpr *PCRelExpr =
+ ARMMCExpr::createUpper16(MCBinaryExpr::createSub(GVSymExpr,
+ MCBinaryExpr::createAdd(LabelSymExpr,
+ MCConstantExpr::create(PCAdj, OutContext),
+ OutContext), OutContext), OutContext);
+ TmpInst.addOperand(MCOperand::createExpr(PCRelExpr));
+ // Add predicate operands.
+ TmpInst.addOperand(MCOperand::createImm(ARMCC::AL));
+ TmpInst.addOperand(MCOperand::createReg(0));
+ // Add 's' bit operand (always reg0 for this)
+ TmpInst.addOperand(MCOperand::createReg(0));
+ EmitToStreamer(*OutStreamer, TmpInst);
+ return;
+ }
+ case ARM::tPICADD: {
+ // This is a pseudo op for a label + instruction sequence, which looks like:
+ // LPC0:
+ // add r0, pc
+ // This adds the address of LPC0 to r0.
+
+ // Emit the label.
+ OutStreamer->EmitLabel(getPICLabel(DL.getPrivateGlobalPrefix(),
+ getFunctionNumber(),
+ MI->getOperand(2).getImm(), OutContext));
+
+ // Form and emit the add.
+ EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::tADDhirr)
+ .addReg(MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(0).getReg())
+ .addReg(ARM::PC)
+ // Add predicate operands.
+ .addImm(ARMCC::AL)
+ .addReg(0));
+ return;
+ }
+ case ARM::PICADD: {
+ // This is a pseudo op for a label + instruction sequence, which looks like:
+ // LPC0:
+ // add r0, pc, r0
+ // This adds the address of LPC0 to r0.
+
+ // Emit the label.
+ OutStreamer->EmitLabel(getPICLabel(DL.getPrivateGlobalPrefix(),
+ getFunctionNumber(),
+ MI->getOperand(2).getImm(), OutContext));
+
+ // Form and emit the add.
+ EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::ADDrr)
+ .addReg(MI->getOperand(0).getReg())
+ .addReg(ARM::PC)
+ .addReg(MI->getOperand(1).getReg())
+ // Add predicate operands.
+ .addImm(MI->getOperand(3).getImm())
+ .addReg(MI->getOperand(4).getReg())
+ // Add 's' bit operand (always reg0 for this)
+ .addReg(0));
+ return;
+ }
+ case ARM::PICSTR:
+ case ARM::PICSTRB:
+ case ARM::PICSTRH:
+ case ARM::PICLDR:
+ case ARM::PICLDRB:
+ case ARM::PICLDRH:
+ case ARM::PICLDRSB:
+ case ARM::PICLDRSH: {
+ // This is a pseudo op for a label + instruction sequence, which looks like:
+ // LPC0:
+ // OP r0, [pc, r0]
+ // The LCP0 label is referenced by a constant pool entry in order to get
+ // a PC-relative address at the ldr instruction.
+
+ // Emit the label.
+ OutStreamer->EmitLabel(getPICLabel(DL.getPrivateGlobalPrefix(),
+ getFunctionNumber(),
+ MI->getOperand(2).getImm(), OutContext));
+
+ // Form and emit the load
+ unsigned Opcode;
+ switch (MI->getOpcode()) {
+ default:
+ llvm_unreachable("Unexpected opcode!");
+ case ARM::PICSTR: Opcode = ARM::STRrs; break;
+ case ARM::PICSTRB: Opcode = ARM::STRBrs; break;
+ case ARM::PICSTRH: Opcode = ARM::STRH; break;
+ case ARM::PICLDR: Opcode = ARM::LDRrs; break;
+ case ARM::PICLDRB: Opcode = ARM::LDRBrs; break;
+ case ARM::PICLDRH: Opcode = ARM::LDRH; break;
+ case ARM::PICLDRSB: Opcode = ARM::LDRSB; break;
+ case ARM::PICLDRSH: Opcode = ARM::LDRSH; break;
+ }
+ EmitToStreamer(*OutStreamer, MCInstBuilder(Opcode)
+ .addReg(MI->getOperand(0).getReg())
+ .addReg(ARM::PC)
+ .addReg(MI->getOperand(1).getReg())
+ .addImm(0)
+ // Add predicate operands.
+ .addImm(MI->getOperand(3).getImm())
+ .addReg(MI->getOperand(4).getReg()));
+
+ return;
+ }
+ case ARM::CONSTPOOL_ENTRY: {
+ /// CONSTPOOL_ENTRY - This instruction represents a floating constant pool
+ /// in the function. The first operand is the ID# for this instruction, the
+ /// second is the index into the MachineConstantPool that this is, the third
+ /// is the size in bytes of this constant pool entry.
+ /// The required alignment is specified on the basic block holding this MI.
+ unsigned LabelId = (unsigned)MI->getOperand(0).getImm();
+ unsigned CPIdx = (unsigned)MI->getOperand(1).getIndex();
+
+ // If this is the first entry of the pool, mark it.
+ if (!InConstantPool) {
+ OutStreamer->EmitDataRegion(MCDR_DataRegion);
+ InConstantPool = true;
+ }
+
+ OutStreamer->EmitLabel(GetCPISymbol(LabelId));
+
+ const MachineConstantPoolEntry &MCPE = MCP->getConstants()[CPIdx];
+ if (MCPE.isMachineConstantPoolEntry())
+ EmitMachineConstantPoolValue(MCPE.Val.MachineCPVal);
+ else
+ EmitGlobalConstant(DL, MCPE.Val.ConstVal);
+ return;
+ }
+ case ARM::JUMPTABLE_ADDRS:
+ EmitJumpTableAddrs(MI);
+ return;
+ case ARM::JUMPTABLE_INSTS:
+ EmitJumpTableInsts(MI);
+ return;
+ case ARM::JUMPTABLE_TBB:
+ case ARM::JUMPTABLE_TBH:
+ EmitJumpTableTBInst(MI, MI->getOpcode() == ARM::JUMPTABLE_TBB ? 1 : 2);
+ return;
+ case ARM::t2BR_JT: {
+ // Lower and emit the instruction itself, then the jump table following it.
+ EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::tMOVr)
+ .addReg(ARM::PC)
+ .addReg(MI->getOperand(0).getReg())
+ // Add predicate operands.
+ .addImm(ARMCC::AL)
+ .addReg(0));
+ return;
+ }
+ case ARM::t2TBB_JT:
+ case ARM::t2TBH_JT: {
+ unsigned Opc = MI->getOpcode() == ARM::t2TBB_JT ? ARM::t2TBB : ARM::t2TBH;
+ // Lower and emit the PC label, then the instruction itself.
+ OutStreamer->EmitLabel(GetCPISymbol(MI->getOperand(3).getImm()));
+ EmitToStreamer(*OutStreamer, MCInstBuilder(Opc)
+ .addReg(MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(1).getReg())
+ // Add predicate operands.
+ .addImm(ARMCC::AL)
+ .addReg(0));
+ return;
+ }
+ case ARM::tBR_JTr:
+ case ARM::BR_JTr: {
+ // Lower and emit the instruction itself, then the jump table following it.
+ // mov pc, target
+ MCInst TmpInst;
+ unsigned Opc = MI->getOpcode() == ARM::BR_JTr ?
+ ARM::MOVr : ARM::tMOVr;
+ TmpInst.setOpcode(Opc);
+ TmpInst.addOperand(MCOperand::createReg(ARM::PC));
+ TmpInst.addOperand(MCOperand::createReg(MI->getOperand(0).getReg()));
+ // Add predicate operands.
+ TmpInst.addOperand(MCOperand::createImm(ARMCC::AL));
+ TmpInst.addOperand(MCOperand::createReg(0));
+ // Add 's' bit operand (always reg0 for this)
+ if (Opc == ARM::MOVr)
+ TmpInst.addOperand(MCOperand::createReg(0));
+ EmitToStreamer(*OutStreamer, TmpInst);
+ return;
+ }
+ case ARM::BR_JTm: {
+ // Lower and emit the instruction itself, then the jump table following it.
+ // ldr pc, target
+ MCInst TmpInst;
+ if (MI->getOperand(1).getReg() == 0) {
+ // literal offset
+ TmpInst.setOpcode(ARM::LDRi12);
+ TmpInst.addOperand(MCOperand::createReg(ARM::PC));
+ TmpInst.addOperand(MCOperand::createReg(MI->getOperand(0).getReg()));
+ TmpInst.addOperand(MCOperand::createImm(MI->getOperand(2).getImm()));
+ } else {
+ TmpInst.setOpcode(ARM::LDRrs);
+ TmpInst.addOperand(MCOperand::createReg(ARM::PC));
+ TmpInst.addOperand(MCOperand::createReg(MI->getOperand(0).getReg()));
+ TmpInst.addOperand(MCOperand::createReg(MI->getOperand(1).getReg()));
+ TmpInst.addOperand(MCOperand::createImm(0));
+ }
+ // Add predicate operands.
+ TmpInst.addOperand(MCOperand::createImm(ARMCC::AL));
+ TmpInst.addOperand(MCOperand::createReg(0));
+ EmitToStreamer(*OutStreamer, TmpInst);
+ return;
+ }
+ case ARM::BR_JTadd: {
+ // Lower and emit the instruction itself, then the jump table following it.
+ // add pc, target, idx
+ EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::ADDrr)
+ .addReg(ARM::PC)
+ .addReg(MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(1).getReg())
+ // Add predicate operands.
+ .addImm(ARMCC::AL)
+ .addReg(0)
+ // Add 's' bit operand (always reg0 for this)
+ .addReg(0));
+ return;
+ }
+ case ARM::SPACE:
+ OutStreamer->EmitZeros(MI->getOperand(1).getImm());
+ return;
+ case ARM::TRAP: {
+ // Non-Darwin binutils don't yet support the "trap" mnemonic.
+ // FIXME: Remove this special case when they do.
+ if (!Subtarget->isTargetMachO()) {
+ //.long 0xe7ffdefe @ trap
+ uint32_t Val = 0xe7ffdefeUL;
+ OutStreamer->AddComment("trap");
+ OutStreamer->EmitIntValue(Val, 4);
+ return;
+ }
+ break;
+ }
+ case ARM::TRAPNaCl: {
+ //.long 0xe7fedef0 @ trap
+ uint32_t Val = 0xe7fedef0UL;
+ OutStreamer->AddComment("trap");
+ OutStreamer->EmitIntValue(Val, 4);
+ return;
+ }
+ case ARM::tTRAP: {
+ // Non-Darwin binutils don't yet support the "trap" mnemonic.
+ // FIXME: Remove this special case when they do.
+ if (!Subtarget->isTargetMachO()) {
+ //.short 57086 @ trap
+ uint16_t Val = 0xdefe;
+ OutStreamer->AddComment("trap");
+ OutStreamer->EmitIntValue(Val, 2);
+ return;
+ }
+ break;
+ }
+ case ARM::t2Int_eh_sjlj_setjmp:
+ case ARM::t2Int_eh_sjlj_setjmp_nofp:
+ case ARM::tInt_eh_sjlj_setjmp: {
+ // Two incoming args: GPR:$src, GPR:$val
+ // mov $val, pc
+ // adds $val, #7
+ // str $val, [$src, #4]
+ // movs r0, #0
+ // b LSJLJEH
+ // movs r0, #1
+ // LSJLJEH:
+ unsigned SrcReg = MI->getOperand(0).getReg();
+ unsigned ValReg = MI->getOperand(1).getReg();
+ MCSymbol *Label = OutContext.createTempSymbol("SJLJEH", false, true);
+ OutStreamer->AddComment("eh_setjmp begin");
+ EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::tMOVr)
+ .addReg(ValReg)
+ .addReg(ARM::PC)
+ // Predicate.
+ .addImm(ARMCC::AL)
+ .addReg(0));
+
+ EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::tADDi3)
+ .addReg(ValReg)
+ // 's' bit operand
+ .addReg(ARM::CPSR)
+ .addReg(ValReg)
+ .addImm(7)
+ // Predicate.
+ .addImm(ARMCC::AL)
+ .addReg(0));
+
+ EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::tSTRi)
+ .addReg(ValReg)
+ .addReg(SrcReg)
+ // The offset immediate is #4. The operand value is scaled by 4 for the
+ // tSTR instruction.
+ .addImm(1)
+ // Predicate.
+ .addImm(ARMCC::AL)
+ .addReg(0));
+
+ EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::tMOVi8)
+ .addReg(ARM::R0)
+ .addReg(ARM::CPSR)
+ .addImm(0)
+ // Predicate.
+ .addImm(ARMCC::AL)
+ .addReg(0));
+
+ const MCExpr *SymbolExpr = MCSymbolRefExpr::create(Label, OutContext);
+ EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::tB)
+ .addExpr(SymbolExpr)
+ .addImm(ARMCC::AL)
+ .addReg(0));
+
+ OutStreamer->AddComment("eh_setjmp end");
+ EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::tMOVi8)
+ .addReg(ARM::R0)
+ .addReg(ARM::CPSR)
+ .addImm(1)
+ // Predicate.
+ .addImm(ARMCC::AL)
+ .addReg(0));
+
+ OutStreamer->EmitLabel(Label);
+ return;
+ }
+
+ case ARM::Int_eh_sjlj_setjmp_nofp:
+ case ARM::Int_eh_sjlj_setjmp: {
+ // Two incoming args: GPR:$src, GPR:$val
+ // add $val, pc, #8
+ // str $val, [$src, #+4]
+ // mov r0, #0
+ // add pc, pc, #0
+ // mov r0, #1
+ unsigned SrcReg = MI->getOperand(0).getReg();
+ unsigned ValReg = MI->getOperand(1).getReg();
+
+ OutStreamer->AddComment("eh_setjmp begin");
+ EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::ADDri)
+ .addReg(ValReg)
+ .addReg(ARM::PC)
+ .addImm(8)
+ // Predicate.
+ .addImm(ARMCC::AL)
+ .addReg(0)
+ // 's' bit operand (always reg0 for this).
+ .addReg(0));
+
+ EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::STRi12)
+ .addReg(ValReg)
+ .addReg(SrcReg)
+ .addImm(4)
+ // Predicate.
+ .addImm(ARMCC::AL)
+ .addReg(0));
+
+ EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::MOVi)
+ .addReg(ARM::R0)
+ .addImm(0)
+ // Predicate.
+ .addImm(ARMCC::AL)
+ .addReg(0)
+ // 's' bit operand (always reg0 for this).
+ .addReg(0));
+
+ EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::ADDri)
+ .addReg(ARM::PC)
+ .addReg(ARM::PC)
+ .addImm(0)
+ // Predicate.
+ .addImm(ARMCC::AL)
+ .addReg(0)
+ // 's' bit operand (always reg0 for this).
+ .addReg(0));
+
+ OutStreamer->AddComment("eh_setjmp end");
+ EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::MOVi)
+ .addReg(ARM::R0)
+ .addImm(1)
+ // Predicate.
+ .addImm(ARMCC::AL)
+ .addReg(0)
+ // 's' bit operand (always reg0 for this).
+ .addReg(0));
+ return;
+ }
+ case ARM::Int_eh_sjlj_longjmp: {
+ // ldr sp, [$src, #8]
+ // ldr $scratch, [$src, #4]
+ // ldr r7, [$src]
+ // bx $scratch
+ unsigned SrcReg = MI->getOperand(0).getReg();
+ unsigned ScratchReg = MI->getOperand(1).getReg();
+ EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::LDRi12)
+ .addReg(ARM::SP)
+ .addReg(SrcReg)
+ .addImm(8)
+ // Predicate.
+ .addImm(ARMCC::AL)
+ .addReg(0));
+
+ EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::LDRi12)
+ .addReg(ScratchReg)
+ .addReg(SrcReg)
+ .addImm(4)
+ // Predicate.
+ .addImm(ARMCC::AL)
+ .addReg(0));
+
+ EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::LDRi12)
+ .addReg(ARM::R7)
+ .addReg(SrcReg)
+ .addImm(0)
+ // Predicate.
+ .addImm(ARMCC::AL)
+ .addReg(0));
+
+ EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::BX)
+ .addReg(ScratchReg)
+ // Predicate.
+ .addImm(ARMCC::AL)
+ .addReg(0));
+ return;
+ }
+ case ARM::tInt_eh_sjlj_longjmp: {
+ // ldr $scratch, [$src, #8]
+ // mov sp, $scratch
+ // ldr $scratch, [$src, #4]
+ // ldr r7, [$src]
+ // bx $scratch
+ unsigned SrcReg = MI->getOperand(0).getReg();
+ unsigned ScratchReg = MI->getOperand(1).getReg();
+ EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::tLDRi)
+ .addReg(ScratchReg)
+ .addReg(SrcReg)
+ // The offset immediate is #8. The operand value is scaled by 4 for the
+ // tLDR instruction.
+ .addImm(2)
+ // Predicate.
+ .addImm(ARMCC::AL)
+ .addReg(0));
+
+ EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::tMOVr)
+ .addReg(ARM::SP)
+ .addReg(ScratchReg)
+ // Predicate.
+ .addImm(ARMCC::AL)
+ .addReg(0));
+
+ EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::tLDRi)
+ .addReg(ScratchReg)
+ .addReg(SrcReg)
+ .addImm(1)
+ // Predicate.
+ .addImm(ARMCC::AL)
+ .addReg(0));
+
+ EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::tLDRi)
+ .addReg(ARM::R7)
+ .addReg(SrcReg)
+ .addImm(0)
+ // Predicate.
+ .addImm(ARMCC::AL)
+ .addReg(0));
+
+ EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::tBX)
+ .addReg(ScratchReg)
+ // Predicate.
+ .addImm(ARMCC::AL)
+ .addReg(0));
+ return;
+ }
+ }
+
+ MCInst TmpInst;
+ LowerARMMachineInstrToMCInst(MI, TmpInst, *this);
+
+ EmitToStreamer(*OutStreamer, TmpInst);
+}
+
+//===----------------------------------------------------------------------===//
+// Target Registry Stuff
+//===----------------------------------------------------------------------===//
+
+// Force static initialization.
+extern "C" void LLVMInitializeARMAsmPrinter() {
+ RegisterAsmPrinter<ARMAsmPrinter> X(TheARMLETarget);
+ RegisterAsmPrinter<ARMAsmPrinter> Y(TheARMBETarget);
+ RegisterAsmPrinter<ARMAsmPrinter> A(TheThumbLETarget);
+ RegisterAsmPrinter<ARMAsmPrinter> B(TheThumbBETarget);
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.h b/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.h
new file mode 100644
index 0000000..ed7be2d
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.h
@@ -0,0 +1,136 @@
+//===-- ARMAsmPrinter.h - ARM implementation of AsmPrinter ------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_ARM_ARMASMPRINTER_H
+#define LLVM_LIB_TARGET_ARM_ARMASMPRINTER_H
+
+#include "ARMSubtarget.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+
+class ARMFunctionInfo;
+class MCOperand;
+class MachineConstantPool;
+class MachineOperand;
+class MCSymbol;
+
+namespace ARM {
+ enum DW_ISA {
+ DW_ISA_ARM_thumb = 1,
+ DW_ISA_ARM_arm = 2
+ };
+}
+
+class LLVM_LIBRARY_VISIBILITY ARMAsmPrinter : public AsmPrinter {
+
+ /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
+ /// make the right decision when printing asm code for different targets.
+ const ARMSubtarget *Subtarget;
+
+ /// AFI - Keep a pointer to ARMFunctionInfo for the current
+ /// MachineFunction.
+ ARMFunctionInfo *AFI;
+
+ /// MCP - Keep a pointer to constantpool entries of the current
+ /// MachineFunction.
+ const MachineConstantPool *MCP;
+
+ /// InConstantPool - Maintain state when emitting a sequence of constant
+ /// pool entries so we can properly mark them as data regions.
+ bool InConstantPool;
+
+ /// ThumbIndirectPads - These maintain a per-function list of jump pad
+ /// labels used for ARMv4t thumb code to make register indirect calls.
+ SmallVector<std::pair<unsigned, MCSymbol*>, 4> ThumbIndirectPads;
+
+ /// OptimizationGoals - Maintain a combined optimization goal for all
+ /// functions in a module: one of Tag_ABI_optimization_goals values,
+ /// -1 if uninitialized, 0 if conflicting goals
+ int OptimizationGoals;
+
+public:
+ explicit ARMAsmPrinter(TargetMachine &TM,
+ std::unique_ptr<MCStreamer> Streamer);
+
+ const char *getPassName() const override {
+ return "ARM Assembly / Object Emitter";
+ }
+
+ void printOperand(const MachineInstr *MI, int OpNum, raw_ostream &O);
+
+ bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
+ unsigned AsmVariant, const char *ExtraCode,
+ raw_ostream &O) override;
+ bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNum,
+ unsigned AsmVariant, const char *ExtraCode,
+ raw_ostream &O) override;
+
+ void emitInlineAsmEnd(const MCSubtargetInfo &StartInfo,
+ const MCSubtargetInfo *EndInfo) const override;
+
+ void EmitJumpTableAddrs(const MachineInstr *MI);
+ void EmitJumpTableInsts(const MachineInstr *MI);
+ void EmitJumpTableTBInst(const MachineInstr *MI, unsigned OffsetWidth);
+ void EmitInstruction(const MachineInstr *MI) override;
+ bool runOnMachineFunction(MachineFunction &F) override;
+
+ void EmitConstantPool() override {
+ // we emit constant pools customly!
+ }
+ void EmitFunctionBodyEnd() override;
+ void EmitFunctionEntryLabel() override;
+ void EmitStartOfAsmFile(Module &M) override;
+ void EmitEndOfAsmFile(Module &M) override;
+ void EmitXXStructor(const DataLayout &DL, const Constant *CV) override;
+
+ // lowerOperand - Convert a MachineOperand into the equivalent MCOperand.
+ bool lowerOperand(const MachineOperand &MO, MCOperand &MCOp);
+
+private:
+ // Helpers for EmitStartOfAsmFile() and EmitEndOfAsmFile()
+ void emitAttributes();
+
+ // Generic helper used to emit e.g. ARMv5 mul pseudos
+ void EmitPatchedInstruction(const MachineInstr *MI, unsigned TargetOpc);
+
+ void EmitUnwindingInstruction(const MachineInstr *MI);
+
+ // emitPseudoExpansionLowering - tblgen'erated.
+ bool emitPseudoExpansionLowering(MCStreamer &OutStreamer,
+ const MachineInstr *MI);
+
+public:
+ unsigned getISAEncoding() override {
+ // ARM/Darwin adds ISA to the DWARF info for each function.
+ const Triple &TT = TM.getTargetTriple();
+ if (!TT.isOSBinFormatMachO())
+ return 0;
+ bool isThumb = TT.getArch() == Triple::thumb ||
+ TT.getArch() == Triple::thumbeb ||
+ TT.getSubArch() == Triple::ARMSubArch_v7m ||
+ TT.getSubArch() == Triple::ARMSubArch_v6m;
+ return isThumb ? ARM::DW_ISA_ARM_thumb : ARM::DW_ISA_ARM_arm;
+ }
+
+private:
+ MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol);
+ MCSymbol *GetARMJTIPICJumpTableLabel(unsigned uid) const;
+
+ MCSymbol *GetARMGVSymbol(const GlobalValue *GV, unsigned char TargetFlags);
+
+public:
+ /// EmitMachineConstantPoolValue - Print a machine constantpool value to
+ /// the .s file.
+ void EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) override;
+};
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
new file mode 100644
index 0000000..49f3288
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -0,0 +1,4631 @@
+//===-- ARMBaseInstrInfo.cpp - ARM Instruction Information ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Base ARM implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARM.h"
+#include "ARMBaseInstrInfo.h"
+#include "ARMBaseRegisterInfo.h"
+#include "ARMConstantPoolValue.h"
+#include "ARMFeatures.h"
+#include "ARMHazardRecognizer.h"
+#include "ARMMachineFunctionInfo.h"
+#include "MCTargetDesc/ARMAddressingModes.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/CodeGen/TargetSchedule.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/Support/BranchProbability.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "arm-instrinfo"
+
+#define GET_INSTRINFO_CTOR_DTOR
+#include "ARMGenInstrInfo.inc"
+
+static cl::opt<bool>
+EnableARM3Addr("enable-arm-3-addr-conv", cl::Hidden,
+ cl::desc("Enable ARM 2-addr to 3-addr conv"));
+
+static cl::opt<bool>
+WidenVMOVS("widen-vmovs", cl::Hidden, cl::init(true),
+ cl::desc("Widen ARM vmovs to vmovd when possible"));
+
+static cl::opt<unsigned>
+SwiftPartialUpdateClearance("swift-partial-update-clearance",
+ cl::Hidden, cl::init(12),
+ cl::desc("Clearance before partial register updates"));
+
+/// ARM_MLxEntry - Record information about MLA / MLS instructions.
+struct ARM_MLxEntry {
+ uint16_t MLxOpc; // MLA / MLS opcode
+ uint16_t MulOpc; // Expanded multiplication opcode
+ uint16_t AddSubOpc; // Expanded add / sub opcode
+ bool NegAcc; // True if the acc is negated before the add / sub.
+ bool HasLane; // True if instruction has an extra "lane" operand.
+};
+
+static const ARM_MLxEntry ARM_MLxTable[] = {
+ // MLxOpc, MulOpc, AddSubOpc, NegAcc, HasLane
+ // fp scalar ops
+ { ARM::VMLAS, ARM::VMULS, ARM::VADDS, false, false },
+ { ARM::VMLSS, ARM::VMULS, ARM::VSUBS, false, false },
+ { ARM::VMLAD, ARM::VMULD, ARM::VADDD, false, false },
+ { ARM::VMLSD, ARM::VMULD, ARM::VSUBD, false, false },
+ { ARM::VNMLAS, ARM::VNMULS, ARM::VSUBS, true, false },
+ { ARM::VNMLSS, ARM::VMULS, ARM::VSUBS, true, false },
+ { ARM::VNMLAD, ARM::VNMULD, ARM::VSUBD, true, false },
+ { ARM::VNMLSD, ARM::VMULD, ARM::VSUBD, true, false },
+
+ // fp SIMD ops
+ { ARM::VMLAfd, ARM::VMULfd, ARM::VADDfd, false, false },
+ { ARM::VMLSfd, ARM::VMULfd, ARM::VSUBfd, false, false },
+ { ARM::VMLAfq, ARM::VMULfq, ARM::VADDfq, false, false },
+ { ARM::VMLSfq, ARM::VMULfq, ARM::VSUBfq, false, false },
+ { ARM::VMLAslfd, ARM::VMULslfd, ARM::VADDfd, false, true },
+ { ARM::VMLSslfd, ARM::VMULslfd, ARM::VSUBfd, false, true },
+ { ARM::VMLAslfq, ARM::VMULslfq, ARM::VADDfq, false, true },
+ { ARM::VMLSslfq, ARM::VMULslfq, ARM::VSUBfq, false, true },
+};
+
+ARMBaseInstrInfo::ARMBaseInstrInfo(const ARMSubtarget& STI)
+ : ARMGenInstrInfo(ARM::ADJCALLSTACKDOWN, ARM::ADJCALLSTACKUP),
+ Subtarget(STI) {
+ for (unsigned i = 0, e = array_lengthof(ARM_MLxTable); i != e; ++i) {
+ if (!MLxEntryMap.insert(std::make_pair(ARM_MLxTable[i].MLxOpc, i)).second)
+ llvm_unreachable("Duplicated entries?");
+ MLxHazardOpcodes.insert(ARM_MLxTable[i].AddSubOpc);
+ MLxHazardOpcodes.insert(ARM_MLxTable[i].MulOpc);
+ }
+}
+
+// Use a ScoreboardHazardRecognizer for prepass ARM scheduling. TargetInstrImpl
+// currently defaults to no prepass hazard recognizer.
+ScheduleHazardRecognizer *
+ARMBaseInstrInfo::CreateTargetHazardRecognizer(const TargetSubtargetInfo *STI,
+ const ScheduleDAG *DAG) const {
+ if (usePreRAHazardRecognizer()) {
+ const InstrItineraryData *II =
+ static_cast<const ARMSubtarget *>(STI)->getInstrItineraryData();
+ return new ScoreboardHazardRecognizer(II, DAG, "pre-RA-sched");
+ }
+ return TargetInstrInfo::CreateTargetHazardRecognizer(STI, DAG);
+}
+
+ScheduleHazardRecognizer *ARMBaseInstrInfo::
+CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II,
+ const ScheduleDAG *DAG) const {
+ if (Subtarget.isThumb2() || Subtarget.hasVFP2())
+ return (ScheduleHazardRecognizer *)new ARMHazardRecognizer(II, DAG);
+ return TargetInstrInfo::CreateTargetPostRAHazardRecognizer(II, DAG);
+}
+
+MachineInstr *
+ARMBaseInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
+ MachineBasicBlock::iterator &MBBI,
+ LiveVariables *LV) const {
+ // FIXME: Thumb2 support.
+
+ if (!EnableARM3Addr)
+ return nullptr;
+
+ MachineInstr *MI = MBBI;
+ MachineFunction &MF = *MI->getParent()->getParent();
+ uint64_t TSFlags = MI->getDesc().TSFlags;
+ bool isPre = false;
+ switch ((TSFlags & ARMII::IndexModeMask) >> ARMII::IndexModeShift) {
+ default: return nullptr;
+ case ARMII::IndexModePre:
+ isPre = true;
+ break;
+ case ARMII::IndexModePost:
+ break;
+ }
+
+ // Try splitting an indexed load/store to an un-indexed one plus an add/sub
+ // operation.
+ unsigned MemOpc = getUnindexedOpcode(MI->getOpcode());
+ if (MemOpc == 0)
+ return nullptr;
+
+ MachineInstr *UpdateMI = nullptr;
+ MachineInstr *MemMI = nullptr;
+ unsigned AddrMode = (TSFlags & ARMII::AddrModeMask);
+ const MCInstrDesc &MCID = MI->getDesc();
+ unsigned NumOps = MCID.getNumOperands();
+ bool isLoad = !MI->mayStore();
+ const MachineOperand &WB = isLoad ? MI->getOperand(1) : MI->getOperand(0);
+ const MachineOperand &Base = MI->getOperand(2);
+ const MachineOperand &Offset = MI->getOperand(NumOps-3);
+ unsigned WBReg = WB.getReg();
+ unsigned BaseReg = Base.getReg();
+ unsigned OffReg = Offset.getReg();
+ unsigned OffImm = MI->getOperand(NumOps-2).getImm();
+ ARMCC::CondCodes Pred = (ARMCC::CondCodes)MI->getOperand(NumOps-1).getImm();
+ switch (AddrMode) {
+ default: llvm_unreachable("Unknown indexed op!");
+ case ARMII::AddrMode2: {
+ bool isSub = ARM_AM::getAM2Op(OffImm) == ARM_AM::sub;
+ unsigned Amt = ARM_AM::getAM2Offset(OffImm);
+ if (OffReg == 0) {
+ if (ARM_AM::getSOImmVal(Amt) == -1)
+ // Can't encode it in a so_imm operand. This transformation will
+ // add more than 1 instruction. Abandon!
+ return nullptr;
+ UpdateMI = BuildMI(MF, MI->getDebugLoc(),
+ get(isSub ? ARM::SUBri : ARM::ADDri), WBReg)
+ .addReg(BaseReg).addImm(Amt)
+ .addImm(Pred).addReg(0).addReg(0);
+ } else if (Amt != 0) {
+ ARM_AM::ShiftOpc ShOpc = ARM_AM::getAM2ShiftOpc(OffImm);
+ unsigned SOOpc = ARM_AM::getSORegOpc(ShOpc, Amt);
+ UpdateMI = BuildMI(MF, MI->getDebugLoc(),
+ get(isSub ? ARM::SUBrsi : ARM::ADDrsi), WBReg)
+ .addReg(BaseReg).addReg(OffReg).addReg(0).addImm(SOOpc)
+ .addImm(Pred).addReg(0).addReg(0);
+ } else
+ UpdateMI = BuildMI(MF, MI->getDebugLoc(),
+ get(isSub ? ARM::SUBrr : ARM::ADDrr), WBReg)
+ .addReg(BaseReg).addReg(OffReg)
+ .addImm(Pred).addReg(0).addReg(0);
+ break;
+ }
+ case ARMII::AddrMode3 : {
+ bool isSub = ARM_AM::getAM3Op(OffImm) == ARM_AM::sub;
+ unsigned Amt = ARM_AM::getAM3Offset(OffImm);
+ if (OffReg == 0)
+ // Immediate is 8-bits. It's guaranteed to fit in a so_imm operand.
+ UpdateMI = BuildMI(MF, MI->getDebugLoc(),
+ get(isSub ? ARM::SUBri : ARM::ADDri), WBReg)
+ .addReg(BaseReg).addImm(Amt)
+ .addImm(Pred).addReg(0).addReg(0);
+ else
+ UpdateMI = BuildMI(MF, MI->getDebugLoc(),
+ get(isSub ? ARM::SUBrr : ARM::ADDrr), WBReg)
+ .addReg(BaseReg).addReg(OffReg)
+ .addImm(Pred).addReg(0).addReg(0);
+ break;
+ }
+ }
+
+ std::vector<MachineInstr*> NewMIs;
+ if (isPre) {
+ if (isLoad)
+ MemMI = BuildMI(MF, MI->getDebugLoc(),
+ get(MemOpc), MI->getOperand(0).getReg())
+ .addReg(WBReg).addImm(0).addImm(Pred);
+ else
+ MemMI = BuildMI(MF, MI->getDebugLoc(),
+ get(MemOpc)).addReg(MI->getOperand(1).getReg())
+ .addReg(WBReg).addReg(0).addImm(0).addImm(Pred);
+ NewMIs.push_back(MemMI);
+ NewMIs.push_back(UpdateMI);
+ } else {
+ if (isLoad)
+ MemMI = BuildMI(MF, MI->getDebugLoc(),
+ get(MemOpc), MI->getOperand(0).getReg())
+ .addReg(BaseReg).addImm(0).addImm(Pred);
+ else
+ MemMI = BuildMI(MF, MI->getDebugLoc(),
+ get(MemOpc)).addReg(MI->getOperand(1).getReg())
+ .addReg(BaseReg).addReg(0).addImm(0).addImm(Pred);
+ if (WB.isDead())
+ UpdateMI->getOperand(0).setIsDead();
+ NewMIs.push_back(UpdateMI);
+ NewMIs.push_back(MemMI);
+ }
+
+ // Transfer LiveVariables states, kill / dead info.
+ if (LV) {
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
+ unsigned Reg = MO.getReg();
+
+ LiveVariables::VarInfo &VI = LV->getVarInfo(Reg);
+ if (MO.isDef()) {
+ MachineInstr *NewMI = (Reg == WBReg) ? UpdateMI : MemMI;
+ if (MO.isDead())
+ LV->addVirtualRegisterDead(Reg, NewMI);
+ }
+ if (MO.isUse() && MO.isKill()) {
+ for (unsigned j = 0; j < 2; ++j) {
+ // Look at the two new MI's in reverse order.
+ MachineInstr *NewMI = NewMIs[j];
+ if (!NewMI->readsRegister(Reg))
+ continue;
+ LV->addVirtualRegisterKilled(Reg, NewMI);
+ if (VI.removeKill(MI))
+ VI.Kills.push_back(NewMI);
+ break;
+ }
+ }
+ }
+ }
+ }
+
+ MFI->insert(MBBI, NewMIs[1]);
+ MFI->insert(MBBI, NewMIs[0]);
+ return NewMIs[0];
+}
+
+// Branch analysis.
+bool
+ARMBaseInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond,
+ bool AllowModify) const {
+ TBB = nullptr;
+ FBB = nullptr;
+
+ MachineBasicBlock::iterator I = MBB.end();
+ if (I == MBB.begin())
+ return false; // Empty blocks are easy.
+ --I;
+
+ // Walk backwards from the end of the basic block until the branch is
+ // analyzed or we give up.
+ while (isPredicated(I) || I->isTerminator() || I->isDebugValue()) {
+
+ // Flag to be raised on unanalyzeable instructions. This is useful in cases
+ // where we want to clean up on the end of the basic block before we bail
+ // out.
+ bool CantAnalyze = false;
+
+ // Skip over DEBUG values and predicated nonterminators.
+ while (I->isDebugValue() || !I->isTerminator()) {
+ if (I == MBB.begin())
+ return false;
+ --I;
+ }
+
+ if (isIndirectBranchOpcode(I->getOpcode()) ||
+ isJumpTableBranchOpcode(I->getOpcode())) {
+ // Indirect branches and jump tables can't be analyzed, but we still want
+ // to clean up any instructions at the tail of the basic block.
+ CantAnalyze = true;
+ } else if (isUncondBranchOpcode(I->getOpcode())) {
+ TBB = I->getOperand(0).getMBB();
+ } else if (isCondBranchOpcode(I->getOpcode())) {
+ // Bail out if we encounter multiple conditional branches.
+ if (!Cond.empty())
+ return true;
+
+ assert(!FBB && "FBB should have been null.");
+ FBB = TBB;
+ TBB = I->getOperand(0).getMBB();
+ Cond.push_back(I->getOperand(1));
+ Cond.push_back(I->getOperand(2));
+ } else if (I->isReturn()) {
+ // Returns can't be analyzed, but we should run cleanup.
+ CantAnalyze = !isPredicated(I);
+ } else {
+ // We encountered other unrecognized terminator. Bail out immediately.
+ return true;
+ }
+
+ // Cleanup code - to be run for unpredicated unconditional branches and
+ // returns.
+ if (!isPredicated(I) &&
+ (isUncondBranchOpcode(I->getOpcode()) ||
+ isIndirectBranchOpcode(I->getOpcode()) ||
+ isJumpTableBranchOpcode(I->getOpcode()) ||
+ I->isReturn())) {
+ // Forget any previous condition branch information - it no longer applies.
+ Cond.clear();
+ FBB = nullptr;
+
+ // If we can modify the function, delete everything below this
+ // unconditional branch.
+ if (AllowModify) {
+ MachineBasicBlock::iterator DI = std::next(I);
+ while (DI != MBB.end()) {
+ MachineInstr *InstToDelete = DI;
+ ++DI;
+ InstToDelete->eraseFromParent();
+ }
+ }
+ }
+
+ if (CantAnalyze)
+ return true;
+
+ if (I == MBB.begin())
+ return false;
+
+ --I;
+ }
+
+ // We made it past the terminators without bailing out - we must have
+ // analyzed this branch successfully.
+ return false;
+}
+
+
+unsigned ARMBaseInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
+ MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
+ if (I == MBB.end())
+ return 0;
+
+ if (!isUncondBranchOpcode(I->getOpcode()) &&
+ !isCondBranchOpcode(I->getOpcode()))
+ return 0;
+
+ // Remove the branch.
+ I->eraseFromParent();
+
+ I = MBB.end();
+
+ if (I == MBB.begin()) return 1;
+ --I;
+ if (!isCondBranchOpcode(I->getOpcode()))
+ return 1;
+
+ // Remove the branch.
+ I->eraseFromParent();
+ return 2;
+}
+
+unsigned
+ARMBaseInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ ArrayRef<MachineOperand> Cond,
+ DebugLoc DL) const {
+ ARMFunctionInfo *AFI = MBB.getParent()->getInfo<ARMFunctionInfo>();
+ int BOpc = !AFI->isThumbFunction()
+ ? ARM::B : (AFI->isThumb2Function() ? ARM::t2B : ARM::tB);
+ int BccOpc = !AFI->isThumbFunction()
+ ? ARM::Bcc : (AFI->isThumb2Function() ? ARM::t2Bcc : ARM::tBcc);
+ bool isThumb = AFI->isThumbFunction() || AFI->isThumb2Function();
+
+ // Shouldn't be a fall through.
+ assert(TBB && "InsertBranch must not be told to insert a fallthrough");
+ assert((Cond.size() == 2 || Cond.size() == 0) &&
+ "ARM branch conditions have two components!");
+
+ // For conditional branches, we use addOperand to preserve CPSR flags.
+
+ if (!FBB) {
+ if (Cond.empty()) { // Unconditional branch?
+ if (isThumb)
+ BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB).addImm(ARMCC::AL).addReg(0);
+ else
+ BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB);
+ } else
+ BuildMI(&MBB, DL, get(BccOpc)).addMBB(TBB)
+ .addImm(Cond[0].getImm()).addOperand(Cond[1]);
+ return 1;
+ }
+
+ // Two-way conditional branch.
+ BuildMI(&MBB, DL, get(BccOpc)).addMBB(TBB)
+ .addImm(Cond[0].getImm()).addOperand(Cond[1]);
+ if (isThumb)
+ BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB).addImm(ARMCC::AL).addReg(0);
+ else
+ BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB);
+ return 2;
+}
+
+bool ARMBaseInstrInfo::
+ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
+ ARMCC::CondCodes CC = (ARMCC::CondCodes)(int)Cond[0].getImm();
+ Cond[0].setImm(ARMCC::getOppositeCondition(CC));
+ return false;
+}
+
+bool ARMBaseInstrInfo::isPredicated(const MachineInstr *MI) const {
+ if (MI->isBundle()) {
+ MachineBasicBlock::const_instr_iterator I = MI->getIterator();
+ MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end();
+ while (++I != E && I->isInsideBundle()) {
+ int PIdx = I->findFirstPredOperandIdx();
+ if (PIdx != -1 && I->getOperand(PIdx).getImm() != ARMCC::AL)
+ return true;
+ }
+ return false;
+ }
+
+ int PIdx = MI->findFirstPredOperandIdx();
+ return PIdx != -1 && MI->getOperand(PIdx).getImm() != ARMCC::AL;
+}
+
+bool ARMBaseInstrInfo::
+PredicateInstruction(MachineInstr *MI, ArrayRef<MachineOperand> Pred) const {
+ unsigned Opc = MI->getOpcode();
+ if (isUncondBranchOpcode(Opc)) {
+ MI->setDesc(get(getMatchingCondBranchOpcode(Opc)));
+ MachineInstrBuilder(*MI->getParent()->getParent(), MI)
+ .addImm(Pred[0].getImm())
+ .addReg(Pred[1].getReg());
+ return true;
+ }
+
+ int PIdx = MI->findFirstPredOperandIdx();
+ if (PIdx != -1) {
+ MachineOperand &PMO = MI->getOperand(PIdx);
+ PMO.setImm(Pred[0].getImm());
+ MI->getOperand(PIdx+1).setReg(Pred[1].getReg());
+ return true;
+ }
+ return false;
+}
+
+bool ARMBaseInstrInfo::SubsumesPredicate(ArrayRef<MachineOperand> Pred1,
+ ArrayRef<MachineOperand> Pred2) const {
+ if (Pred1.size() > 2 || Pred2.size() > 2)
+ return false;
+
+ ARMCC::CondCodes CC1 = (ARMCC::CondCodes)Pred1[0].getImm();
+ ARMCC::CondCodes CC2 = (ARMCC::CondCodes)Pred2[0].getImm();
+ if (CC1 == CC2)
+ return true;
+
+ switch (CC1) {
+ default:
+ return false;
+ case ARMCC::AL:
+ return true;
+ case ARMCC::HS:
+ return CC2 == ARMCC::HI;
+ case ARMCC::LS:
+ return CC2 == ARMCC::LO || CC2 == ARMCC::EQ;
+ case ARMCC::GE:
+ return CC2 == ARMCC::GT;
+ case ARMCC::LE:
+ return CC2 == ARMCC::LT;
+ }
+}
+
+bool ARMBaseInstrInfo::DefinesPredicate(MachineInstr *MI,
+ std::vector<MachineOperand> &Pred) const {
+ bool Found = false;
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if ((MO.isRegMask() && MO.clobbersPhysReg(ARM::CPSR)) ||
+ (MO.isReg() && MO.isDef() && MO.getReg() == ARM::CPSR)) {
+ Pred.push_back(MO);
+ Found = true;
+ }
+ }
+
+ return Found;
+}
+
+static bool isCPSRDefined(const MachineInstr *MI) {
+ for (const auto &MO : MI->operands())
+ if (MO.isReg() && MO.getReg() == ARM::CPSR && MO.isDef() && !MO.isDead())
+ return true;
+ return false;
+}
+
+static bool isEligibleForITBlock(const MachineInstr *MI) {
+ switch (MI->getOpcode()) {
+ default: return true;
+ case ARM::tADC: // ADC (register) T1
+ case ARM::tADDi3: // ADD (immediate) T1
+ case ARM::tADDi8: // ADD (immediate) T2
+ case ARM::tADDrr: // ADD (register) T1
+ case ARM::tAND: // AND (register) T1
+ case ARM::tASRri: // ASR (immediate) T1
+ case ARM::tASRrr: // ASR (register) T1
+ case ARM::tBIC: // BIC (register) T1
+ case ARM::tEOR: // EOR (register) T1
+ case ARM::tLSLri: // LSL (immediate) T1
+ case ARM::tLSLrr: // LSL (register) T1
+ case ARM::tLSRri: // LSR (immediate) T1
+ case ARM::tLSRrr: // LSR (register) T1
+ case ARM::tMUL: // MUL T1
+ case ARM::tMVN: // MVN (register) T1
+ case ARM::tORR: // ORR (register) T1
+ case ARM::tROR: // ROR (register) T1
+ case ARM::tRSB: // RSB (immediate) T1
+ case ARM::tSBC: // SBC (register) T1
+ case ARM::tSUBi3: // SUB (immediate) T1
+ case ARM::tSUBi8: // SUB (immediate) T2
+ case ARM::tSUBrr: // SUB (register) T1
+ return !isCPSRDefined(MI);
+ }
+}
+
+/// isPredicable - Return true if the specified instruction can be predicated.
+/// By default, this returns true for every instruction with a
+/// PredicateOperand.
+bool ARMBaseInstrInfo::isPredicable(MachineInstr *MI) const {
+ if (!MI->isPredicable())
+ return false;
+
+ if (!isEligibleForITBlock(MI))
+ return false;
+
+ ARMFunctionInfo *AFI =
+ MI->getParent()->getParent()->getInfo<ARMFunctionInfo>();
+
+ if (AFI->isThumb2Function()) {
+ if (getSubtarget().restrictIT())
+ return isV8EligibleForIT(MI);
+ } else { // non-Thumb
+ if ((MI->getDesc().TSFlags & ARMII::DomainMask) == ARMII::DomainNEON)
+ return false;
+ }
+
+ return true;
+}
+
+namespace llvm {
+template <> bool IsCPSRDead<MachineInstr>(MachineInstr *MI) {
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || MO.isUndef() || MO.isUse())
+ continue;
+ if (MO.getReg() != ARM::CPSR)
+ continue;
+ if (!MO.isDead())
+ return false;
+ }
+ // all definitions of CPSR are dead
+ return true;
+}
+}
+
+/// GetInstSize - Return the size of the specified MachineInstr.
+///
+unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
+ const MachineBasicBlock &MBB = *MI->getParent();
+ const MachineFunction *MF = MBB.getParent();
+ const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo();
+
+ const MCInstrDesc &MCID = MI->getDesc();
+ if (MCID.getSize())
+ return MCID.getSize();
+
+ // If this machine instr is an inline asm, measure it.
+ if (MI->getOpcode() == ARM::INLINEASM)
+ return getInlineAsmLength(MI->getOperand(0).getSymbolName(), *MAI);
+ unsigned Opc = MI->getOpcode();
+ switch (Opc) {
+ default:
+ // pseudo-instruction sizes are zero.
+ return 0;
+ case TargetOpcode::BUNDLE:
+ return getInstBundleLength(MI);
+ case ARM::MOVi16_ga_pcrel:
+ case ARM::MOVTi16_ga_pcrel:
+ case ARM::t2MOVi16_ga_pcrel:
+ case ARM::t2MOVTi16_ga_pcrel:
+ return 4;
+ case ARM::MOVi32imm:
+ case ARM::t2MOVi32imm:
+ return 8;
+ case ARM::CONSTPOOL_ENTRY:
+ case ARM::JUMPTABLE_INSTS:
+ case ARM::JUMPTABLE_ADDRS:
+ case ARM::JUMPTABLE_TBB:
+ case ARM::JUMPTABLE_TBH:
+ // If this machine instr is a constant pool entry, its size is recorded as
+ // operand #2.
+ return MI->getOperand(2).getImm();
+ case ARM::Int_eh_sjlj_longjmp:
+ return 16;
+ case ARM::tInt_eh_sjlj_longjmp:
+ return 10;
+ case ARM::Int_eh_sjlj_setjmp:
+ case ARM::Int_eh_sjlj_setjmp_nofp:
+ return 20;
+ case ARM::tInt_eh_sjlj_setjmp:
+ case ARM::t2Int_eh_sjlj_setjmp:
+ case ARM::t2Int_eh_sjlj_setjmp_nofp:
+ return 12;
+ case ARM::SPACE:
+ return MI->getOperand(1).getImm();
+ }
+}
+
+unsigned ARMBaseInstrInfo::getInstBundleLength(const MachineInstr *MI) const {
+ unsigned Size = 0;
+ MachineBasicBlock::const_instr_iterator I = MI->getIterator();
+ MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end();
+ while (++I != E && I->isInsideBundle()) {
+ assert(!I->isBundle() && "No nested bundle!");
+ Size += GetInstSizeInBytes(&*I);
+ }
+ return Size;
+}
+
+void ARMBaseInstrInfo::copyFromCPSR(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ unsigned DestReg, bool KillSrc,
+ const ARMSubtarget &Subtarget) const {
+ unsigned Opc = Subtarget.isThumb()
+ ? (Subtarget.isMClass() ? ARM::t2MRS_M : ARM::t2MRS_AR)
+ : ARM::MRS;
+
+ MachineInstrBuilder MIB =
+ BuildMI(MBB, I, I->getDebugLoc(), get(Opc), DestReg);
+
+ // There is only 1 A/R class MRS instruction, and it always refers to
+ // APSR. However, there are lots of other possibilities on M-class cores.
+ if (Subtarget.isMClass())
+ MIB.addImm(0x800);
+
+ AddDefaultPred(MIB);
+
+ MIB.addReg(ARM::CPSR, RegState::Implicit | getKillRegState(KillSrc));
+}
+
+void ARMBaseInstrInfo::copyToCPSR(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ unsigned SrcReg, bool KillSrc,
+ const ARMSubtarget &Subtarget) const {
+ unsigned Opc = Subtarget.isThumb()
+ ? (Subtarget.isMClass() ? ARM::t2MSR_M : ARM::t2MSR_AR)
+ : ARM::MSR;
+
+ MachineInstrBuilder MIB = BuildMI(MBB, I, I->getDebugLoc(), get(Opc));
+
+ if (Subtarget.isMClass())
+ MIB.addImm(0x800);
+ else
+ MIB.addImm(8);
+
+ MIB.addReg(SrcReg, getKillRegState(KillSrc));
+
+ AddDefaultPred(MIB);
+
+ MIB.addReg(ARM::CPSR, RegState::Implicit | RegState::Define);
+}
+
+void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const {
+ bool GPRDest = ARM::GPRRegClass.contains(DestReg);
+ bool GPRSrc = ARM::GPRRegClass.contains(SrcReg);
+
+ if (GPRDest && GPRSrc) {
+ AddDefaultCC(AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::MOVr), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc))));
+ return;
+ }
+
+ bool SPRDest = ARM::SPRRegClass.contains(DestReg);
+ bool SPRSrc = ARM::SPRRegClass.contains(SrcReg);
+
+ unsigned Opc = 0;
+ if (SPRDest && SPRSrc)
+ Opc = ARM::VMOVS;
+ else if (GPRDest && SPRSrc)
+ Opc = ARM::VMOVRS;
+ else if (SPRDest && GPRSrc)
+ Opc = ARM::VMOVSR;
+ else if (ARM::DPRRegClass.contains(DestReg, SrcReg) && !Subtarget.isFPOnlySP())
+ Opc = ARM::VMOVD;
+ else if (ARM::QPRRegClass.contains(DestReg, SrcReg))
+ Opc = ARM::VORRq;
+
+ if (Opc) {
+ MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opc), DestReg);
+ MIB.addReg(SrcReg, getKillRegState(KillSrc));
+ if (Opc == ARM::VORRq)
+ MIB.addReg(SrcReg, getKillRegState(KillSrc));
+ AddDefaultPred(MIB);
+ return;
+ }
+
+ // Handle register classes that require multiple instructions.
+ unsigned BeginIdx = 0;
+ unsigned SubRegs = 0;
+ int Spacing = 1;
+
+ // Use VORRq when possible.
+ if (ARM::QQPRRegClass.contains(DestReg, SrcReg)) {
+ Opc = ARM::VORRq;
+ BeginIdx = ARM::qsub_0;
+ SubRegs = 2;
+ } else if (ARM::QQQQPRRegClass.contains(DestReg, SrcReg)) {
+ Opc = ARM::VORRq;
+ BeginIdx = ARM::qsub_0;
+ SubRegs = 4;
+ // Fall back to VMOVD.
+ } else if (ARM::DPairRegClass.contains(DestReg, SrcReg)) {
+ Opc = ARM::VMOVD;
+ BeginIdx = ARM::dsub_0;
+ SubRegs = 2;
+ } else if (ARM::DTripleRegClass.contains(DestReg, SrcReg)) {
+ Opc = ARM::VMOVD;
+ BeginIdx = ARM::dsub_0;
+ SubRegs = 3;
+ } else if (ARM::DQuadRegClass.contains(DestReg, SrcReg)) {
+ Opc = ARM::VMOVD;
+ BeginIdx = ARM::dsub_0;
+ SubRegs = 4;
+ } else if (ARM::GPRPairRegClass.contains(DestReg, SrcReg)) {
+ Opc = Subtarget.isThumb2() ? ARM::tMOVr : ARM::MOVr;
+ BeginIdx = ARM::gsub_0;
+ SubRegs = 2;
+ } else if (ARM::DPairSpcRegClass.contains(DestReg, SrcReg)) {
+ Opc = ARM::VMOVD;
+ BeginIdx = ARM::dsub_0;
+ SubRegs = 2;
+ Spacing = 2;
+ } else if (ARM::DTripleSpcRegClass.contains(DestReg, SrcReg)) {
+ Opc = ARM::VMOVD;
+ BeginIdx = ARM::dsub_0;
+ SubRegs = 3;
+ Spacing = 2;
+ } else if (ARM::DQuadSpcRegClass.contains(DestReg, SrcReg)) {
+ Opc = ARM::VMOVD;
+ BeginIdx = ARM::dsub_0;
+ SubRegs = 4;
+ Spacing = 2;
+ } else if (ARM::DPRRegClass.contains(DestReg, SrcReg) && Subtarget.isFPOnlySP()) {
+ Opc = ARM::VMOVS;
+ BeginIdx = ARM::ssub_0;
+ SubRegs = 2;
+ } else if (SrcReg == ARM::CPSR) {
+ copyFromCPSR(MBB, I, DestReg, KillSrc, Subtarget);
+ return;
+ } else if (DestReg == ARM::CPSR) {
+ copyToCPSR(MBB, I, SrcReg, KillSrc, Subtarget);
+ return;
+ }
+
+ assert(Opc && "Impossible reg-to-reg copy");
+
+ const TargetRegisterInfo *TRI = &getRegisterInfo();
+ MachineInstrBuilder Mov;
+
+ // Copy register tuples backward when the first Dest reg overlaps with SrcReg.
+ if (TRI->regsOverlap(SrcReg, TRI->getSubReg(DestReg, BeginIdx))) {
+ BeginIdx = BeginIdx + ((SubRegs - 1) * Spacing);
+ Spacing = -Spacing;
+ }
+#ifndef NDEBUG
+ SmallSet<unsigned, 4> DstRegs;
+#endif
+ for (unsigned i = 0; i != SubRegs; ++i) {
+ unsigned Dst = TRI->getSubReg(DestReg, BeginIdx + i * Spacing);
+ unsigned Src = TRI->getSubReg(SrcReg, BeginIdx + i * Spacing);
+ assert(Dst && Src && "Bad sub-register");
+#ifndef NDEBUG
+ assert(!DstRegs.count(Src) && "destructive vector copy");
+ DstRegs.insert(Dst);
+#endif
+ Mov = BuildMI(MBB, I, I->getDebugLoc(), get(Opc), Dst).addReg(Src);
+ // VORR takes two source operands.
+ if (Opc == ARM::VORRq)
+ Mov.addReg(Src);
+ Mov = AddDefaultPred(Mov);
+ // MOVr can set CC.
+ if (Opc == ARM::MOVr)
+ Mov = AddDefaultCC(Mov);
+ }
+ // Add implicit super-register defs and kills to the last instruction.
+ Mov->addRegisterDefined(DestReg, TRI);
+ if (KillSrc)
+ Mov->addRegisterKilled(SrcReg, TRI);
+}
+
+const MachineInstrBuilder &
+ARMBaseInstrInfo::AddDReg(MachineInstrBuilder &MIB, unsigned Reg,
+ unsigned SubIdx, unsigned State,
+ const TargetRegisterInfo *TRI) const {
+ if (!SubIdx)
+ return MIB.addReg(Reg, State);
+
+ if (TargetRegisterInfo::isPhysicalRegister(Reg))
+ return MIB.addReg(TRI->getSubReg(Reg, SubIdx), State);
+ return MIB.addReg(Reg, State, SubIdx);
+}
+
+void ARMBaseInstrInfo::
+storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+ unsigned SrcReg, bool isKill, int FI,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
+ DebugLoc DL;
+ if (I != MBB.end()) DL = I->getDebugLoc();
+ MachineFunction &MF = *MBB.getParent();
+ MachineFrameInfo &MFI = *MF.getFrameInfo();
+ unsigned Align = MFI.getObjectAlignment(FI);
+
+ MachineMemOperand *MMO = MF.getMachineMemOperand(
+ MachinePointerInfo::getFixedStack(MF, FI), MachineMemOperand::MOStore,
+ MFI.getObjectSize(FI), Align);
+
+ switch (RC->getSize()) {
+ case 4:
+ if (ARM::GPRRegClass.hasSubClassEq(RC)) {
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::STRi12))
+ .addReg(SrcReg, getKillRegState(isKill))
+ .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
+ } else if (ARM::SPRRegClass.hasSubClassEq(RC)) {
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTRS))
+ .addReg(SrcReg, getKillRegState(isKill))
+ .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
+ } else
+ llvm_unreachable("Unknown reg class!");
+ break;
+ case 8:
+ if (ARM::DPRRegClass.hasSubClassEq(RC)) {
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTRD))
+ .addReg(SrcReg, getKillRegState(isKill))
+ .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
+ } else if (ARM::GPRPairRegClass.hasSubClassEq(RC)) {
+ if (Subtarget.hasV5TEOps()) {
+ MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::STRD));
+ AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill), TRI);
+ AddDReg(MIB, SrcReg, ARM::gsub_1, 0, TRI);
+ MIB.addFrameIndex(FI).addReg(0).addImm(0).addMemOperand(MMO);
+
+ AddDefaultPred(MIB);
+ } else {
+ // Fallback to STM instruction, which has existed since the dawn of
+ // time.
+ MachineInstrBuilder MIB =
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::STMIA))
+ .addFrameIndex(FI).addMemOperand(MMO));
+ AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill), TRI);
+ AddDReg(MIB, SrcReg, ARM::gsub_1, 0, TRI);
+ }
+ } else
+ llvm_unreachable("Unknown reg class!");
+ break;
+ case 16:
+ if (ARM::DPairRegClass.hasSubClassEq(RC)) {
+ // Use aligned spills if the stack can be realigned.
+ if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VST1q64))
+ .addFrameIndex(FI).addImm(16)
+ .addReg(SrcReg, getKillRegState(isKill))
+ .addMemOperand(MMO));
+ } else {
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMQIA))
+ .addReg(SrcReg, getKillRegState(isKill))
+ .addFrameIndex(FI)
+ .addMemOperand(MMO));
+ }
+ } else
+ llvm_unreachable("Unknown reg class!");
+ break;
+ case 24:
+ if (ARM::DTripleRegClass.hasSubClassEq(RC)) {
+ // Use aligned spills if the stack can be realigned.
+ if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VST1d64TPseudo))
+ .addFrameIndex(FI).addImm(16)
+ .addReg(SrcReg, getKillRegState(isKill))
+ .addMemOperand(MMO));
+ } else {
+ MachineInstrBuilder MIB =
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMDIA))
+ .addFrameIndex(FI))
+ .addMemOperand(MMO);
+ MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI);
+ MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI);
+ AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI);
+ }
+ } else
+ llvm_unreachable("Unknown reg class!");
+ break;
+ case 32:
+ if (ARM::QQPRRegClass.hasSubClassEq(RC) || ARM::DQuadRegClass.hasSubClassEq(RC)) {
+ if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
+ // FIXME: It's possible to only store part of the QQ register if the
+ // spilled def has a sub-register index.
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VST1d64QPseudo))
+ .addFrameIndex(FI).addImm(16)
+ .addReg(SrcReg, getKillRegState(isKill))
+ .addMemOperand(MMO));
+ } else {
+ MachineInstrBuilder MIB =
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMDIA))
+ .addFrameIndex(FI))
+ .addMemOperand(MMO);
+ MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI);
+ MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI);
+ MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI);
+ AddDReg(MIB, SrcReg, ARM::dsub_3, 0, TRI);
+ }
+ } else
+ llvm_unreachable("Unknown reg class!");
+ break;
+ case 64:
+ if (ARM::QQQQPRRegClass.hasSubClassEq(RC)) {
+ MachineInstrBuilder MIB =
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMDIA))
+ .addFrameIndex(FI))
+ .addMemOperand(MMO);
+ MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI);
+ MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI);
+ MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI);
+ MIB = AddDReg(MIB, SrcReg, ARM::dsub_3, 0, TRI);
+ MIB = AddDReg(MIB, SrcReg, ARM::dsub_4, 0, TRI);
+ MIB = AddDReg(MIB, SrcReg, ARM::dsub_5, 0, TRI);
+ MIB = AddDReg(MIB, SrcReg, ARM::dsub_6, 0, TRI);
+ AddDReg(MIB, SrcReg, ARM::dsub_7, 0, TRI);
+ } else
+ llvm_unreachable("Unknown reg class!");
+ break;
+ default:
+ llvm_unreachable("Unknown reg class!");
+ }
+}
+
+unsigned
+ARMBaseInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const {
+ switch (MI->getOpcode()) {
+ default: break;
+ case ARM::STRrs:
+ case ARM::t2STRs: // FIXME: don't use t2STRs to access frame.
+ if (MI->getOperand(1).isFI() &&
+ MI->getOperand(2).isReg() &&
+ MI->getOperand(3).isImm() &&
+ MI->getOperand(2).getReg() == 0 &&
+ MI->getOperand(3).getImm() == 0) {
+ FrameIndex = MI->getOperand(1).getIndex();
+ return MI->getOperand(0).getReg();
+ }
+ break;
+ case ARM::STRi12:
+ case ARM::t2STRi12:
+ case ARM::tSTRspi:
+ case ARM::VSTRD:
+ case ARM::VSTRS:
+ if (MI->getOperand(1).isFI() &&
+ MI->getOperand(2).isImm() &&
+ MI->getOperand(2).getImm() == 0) {
+ FrameIndex = MI->getOperand(1).getIndex();
+ return MI->getOperand(0).getReg();
+ }
+ break;
+ case ARM::VST1q64:
+ case ARM::VST1d64TPseudo:
+ case ARM::VST1d64QPseudo:
+ if (MI->getOperand(0).isFI() &&
+ MI->getOperand(2).getSubReg() == 0) {
+ FrameIndex = MI->getOperand(0).getIndex();
+ return MI->getOperand(2).getReg();
+ }
+ break;
+ case ARM::VSTMQIA:
+ if (MI->getOperand(1).isFI() &&
+ MI->getOperand(0).getSubReg() == 0) {
+ FrameIndex = MI->getOperand(1).getIndex();
+ return MI->getOperand(0).getReg();
+ }
+ break;
+ }
+
+ return 0;
+}
+
+unsigned ARMBaseInstrInfo::isStoreToStackSlotPostFE(const MachineInstr *MI,
+ int &FrameIndex) const {
+ const MachineMemOperand *Dummy;
+ return MI->mayStore() && hasStoreToStackSlot(MI, Dummy, FrameIndex);
+}
+
+void ARMBaseInstrInfo::
+loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+ unsigned DestReg, int FI,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
+ DebugLoc DL;
+ if (I != MBB.end()) DL = I->getDebugLoc();
+ MachineFunction &MF = *MBB.getParent();
+ MachineFrameInfo &MFI = *MF.getFrameInfo();
+ unsigned Align = MFI.getObjectAlignment(FI);
+ MachineMemOperand *MMO = MF.getMachineMemOperand(
+ MachinePointerInfo::getFixedStack(MF, FI), MachineMemOperand::MOLoad,
+ MFI.getObjectSize(FI), Align);
+
+ switch (RC->getSize()) {
+ case 4:
+ if (ARM::GPRRegClass.hasSubClassEq(RC)) {
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::LDRi12), DestReg)
+ .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
+
+ } else if (ARM::SPRRegClass.hasSubClassEq(RC)) {
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDRS), DestReg)
+ .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
+ } else
+ llvm_unreachable("Unknown reg class!");
+ break;
+ case 8:
+ if (ARM::DPRRegClass.hasSubClassEq(RC)) {
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDRD), DestReg)
+ .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
+ } else if (ARM::GPRPairRegClass.hasSubClassEq(RC)) {
+ MachineInstrBuilder MIB;
+
+ if (Subtarget.hasV5TEOps()) {
+ MIB = BuildMI(MBB, I, DL, get(ARM::LDRD));
+ AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead, TRI);
+ AddDReg(MIB, DestReg, ARM::gsub_1, RegState::DefineNoRead, TRI);
+ MIB.addFrameIndex(FI).addReg(0).addImm(0).addMemOperand(MMO);
+
+ AddDefaultPred(MIB);
+ } else {
+ // Fallback to LDM instruction, which has existed since the dawn of
+ // time.
+ MIB = AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::LDMIA))
+ .addFrameIndex(FI).addMemOperand(MMO));
+ MIB = AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead, TRI);
+ MIB = AddDReg(MIB, DestReg, ARM::gsub_1, RegState::DefineNoRead, TRI);
+ }
+
+ if (TargetRegisterInfo::isPhysicalRegister(DestReg))
+ MIB.addReg(DestReg, RegState::ImplicitDefine);
+ } else
+ llvm_unreachable("Unknown reg class!");
+ break;
+ case 16:
+ if (ARM::DPairRegClass.hasSubClassEq(RC)) {
+ if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLD1q64), DestReg)
+ .addFrameIndex(FI).addImm(16)
+ .addMemOperand(MMO));
+ } else {
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMQIA), DestReg)
+ .addFrameIndex(FI)
+ .addMemOperand(MMO));
+ }
+ } else
+ llvm_unreachable("Unknown reg class!");
+ break;
+ case 24:
+ if (ARM::DTripleRegClass.hasSubClassEq(RC)) {
+ if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLD1d64TPseudo), DestReg)
+ .addFrameIndex(FI).addImm(16)
+ .addMemOperand(MMO));
+ } else {
+ MachineInstrBuilder MIB =
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMDIA))
+ .addFrameIndex(FI)
+ .addMemOperand(MMO));
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead, TRI);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI);
+ if (TargetRegisterInfo::isPhysicalRegister(DestReg))
+ MIB.addReg(DestReg, RegState::ImplicitDefine);
+ }
+ } else
+ llvm_unreachable("Unknown reg class!");
+ break;
+ case 32:
+ if (ARM::QQPRRegClass.hasSubClassEq(RC) || ARM::DQuadRegClass.hasSubClassEq(RC)) {
+ if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLD1d64QPseudo), DestReg)
+ .addFrameIndex(FI).addImm(16)
+ .addMemOperand(MMO));
+ } else {
+ MachineInstrBuilder MIB =
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMDIA))
+ .addFrameIndex(FI))
+ .addMemOperand(MMO);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead, TRI);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::DefineNoRead, TRI);
+ if (TargetRegisterInfo::isPhysicalRegister(DestReg))
+ MIB.addReg(DestReg, RegState::ImplicitDefine);
+ }
+ } else
+ llvm_unreachable("Unknown reg class!");
+ break;
+ case 64:
+ if (ARM::QQQQPRRegClass.hasSubClassEq(RC)) {
+ MachineInstrBuilder MIB =
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMDIA))
+ .addFrameIndex(FI))
+ .addMemOperand(MMO);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead, TRI);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::DefineNoRead, TRI);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_4, RegState::DefineNoRead, TRI);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_5, RegState::DefineNoRead, TRI);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_6, RegState::DefineNoRead, TRI);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_7, RegState::DefineNoRead, TRI);
+ if (TargetRegisterInfo::isPhysicalRegister(DestReg))
+ MIB.addReg(DestReg, RegState::ImplicitDefine);
+ } else
+ llvm_unreachable("Unknown reg class!");
+ break;
+ default:
+ llvm_unreachable("Unknown regclass!");
+ }
+}
+
+unsigned
+ARMBaseInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const {
+ switch (MI->getOpcode()) {
+ default: break;
+ case ARM::LDRrs:
+ case ARM::t2LDRs: // FIXME: don't use t2LDRs to access frame.
+ if (MI->getOperand(1).isFI() &&
+ MI->getOperand(2).isReg() &&
+ MI->getOperand(3).isImm() &&
+ MI->getOperand(2).getReg() == 0 &&
+ MI->getOperand(3).getImm() == 0) {
+ FrameIndex = MI->getOperand(1).getIndex();
+ return MI->getOperand(0).getReg();
+ }
+ break;
+ case ARM::LDRi12:
+ case ARM::t2LDRi12:
+ case ARM::tLDRspi:
+ case ARM::VLDRD:
+ case ARM::VLDRS:
+ if (MI->getOperand(1).isFI() &&
+ MI->getOperand(2).isImm() &&
+ MI->getOperand(2).getImm() == 0) {
+ FrameIndex = MI->getOperand(1).getIndex();
+ return MI->getOperand(0).getReg();
+ }
+ break;
+ case ARM::VLD1q64:
+ case ARM::VLD1d64TPseudo:
+ case ARM::VLD1d64QPseudo:
+ if (MI->getOperand(1).isFI() &&
+ MI->getOperand(0).getSubReg() == 0) {
+ FrameIndex = MI->getOperand(1).getIndex();
+ return MI->getOperand(0).getReg();
+ }
+ break;
+ case ARM::VLDMQIA:
+ if (MI->getOperand(1).isFI() &&
+ MI->getOperand(0).getSubReg() == 0) {
+ FrameIndex = MI->getOperand(1).getIndex();
+ return MI->getOperand(0).getReg();
+ }
+ break;
+ }
+
+ return 0;
+}
+
+unsigned ARMBaseInstrInfo::isLoadFromStackSlotPostFE(const MachineInstr *MI,
+ int &FrameIndex) const {
+ const MachineMemOperand *Dummy;
+ return MI->mayLoad() && hasLoadFromStackSlot(MI, Dummy, FrameIndex);
+}
+
+/// \brief Expands MEMCPY to either LDMIA/STMIA or LDMIA_UPD/STMID_UPD
+/// depending on whether the result is used.
+void ARMBaseInstrInfo::expandMEMCPY(MachineBasicBlock::iterator MBBI) const {
+ bool isThumb1 = Subtarget.isThumb1Only();
+ bool isThumb2 = Subtarget.isThumb2();
+ const ARMBaseInstrInfo *TII = Subtarget.getInstrInfo();
+
+ MachineInstr *MI = MBBI;
+ DebugLoc dl = MI->getDebugLoc();
+ MachineBasicBlock *BB = MI->getParent();
+
+ MachineInstrBuilder LDM, STM;
+ if (isThumb1 || !MI->getOperand(1).isDead()) {
+ LDM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2LDMIA_UPD
+ : isThumb1 ? ARM::tLDMIA_UPD
+ : ARM::LDMIA_UPD))
+ .addOperand(MI->getOperand(1));
+ } else {
+ LDM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2LDMIA : ARM::LDMIA));
+ }
+
+ if (isThumb1 || !MI->getOperand(0).isDead()) {
+ STM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2STMIA_UPD
+ : isThumb1 ? ARM::tSTMIA_UPD
+ : ARM::STMIA_UPD))
+ .addOperand(MI->getOperand(0));
+ } else {
+ STM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2STMIA : ARM::STMIA));
+ }
+
+ AddDefaultPred(LDM.addOperand(MI->getOperand(3)));
+ AddDefaultPred(STM.addOperand(MI->getOperand(2)));
+
+ // Sort the scratch registers into ascending order.
+ const TargetRegisterInfo &TRI = getRegisterInfo();
+ llvm::SmallVector<unsigned, 6> ScratchRegs;
+ for(unsigned I = 5; I < MI->getNumOperands(); ++I)
+ ScratchRegs.push_back(MI->getOperand(I).getReg());
+ std::sort(ScratchRegs.begin(), ScratchRegs.end(),
+ [&TRI](const unsigned &Reg1,
+ const unsigned &Reg2) -> bool {
+ return TRI.getEncodingValue(Reg1) <
+ TRI.getEncodingValue(Reg2);
+ });
+
+ for (const auto &Reg : ScratchRegs) {
+ LDM.addReg(Reg, RegState::Define);
+ STM.addReg(Reg, RegState::Kill);
+ }
+
+ BB->erase(MBBI);
+}
+
+
+bool
+ARMBaseInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
+ MachineFunction &MF = *MI->getParent()->getParent();
+ Reloc::Model RM = MF.getTarget().getRelocationModel();
+
+ if (MI->getOpcode() == TargetOpcode::LOAD_STACK_GUARD) {
+ assert(getSubtarget().getTargetTriple().isOSBinFormatMachO() &&
+ "LOAD_STACK_GUARD currently supported only for MachO.");
+ expandLoadStackGuard(MI, RM);
+ MI->getParent()->erase(MI);
+ return true;
+ }
+
+ if (MI->getOpcode() == ARM::MEMCPY) {
+ expandMEMCPY(MI);
+ return true;
+ }
+
+ // This hook gets to expand COPY instructions before they become
+ // copyPhysReg() calls. Look for VMOVS instructions that can legally be
+ // widened to VMOVD. We prefer the VMOVD when possible because it may be
+ // changed into a VORR that can go down the NEON pipeline.
+ if (!WidenVMOVS || !MI->isCopy() || Subtarget.isCortexA15() ||
+ Subtarget.isFPOnlySP())
+ return false;
+
+ // Look for a copy between even S-registers. That is where we keep floats
+ // when using NEON v2f32 instructions for f32 arithmetic.
+ unsigned DstRegS = MI->getOperand(0).getReg();
+ unsigned SrcRegS = MI->getOperand(1).getReg();
+ if (!ARM::SPRRegClass.contains(DstRegS, SrcRegS))
+ return false;
+
+ const TargetRegisterInfo *TRI = &getRegisterInfo();
+ unsigned DstRegD = TRI->getMatchingSuperReg(DstRegS, ARM::ssub_0,
+ &ARM::DPRRegClass);
+ unsigned SrcRegD = TRI->getMatchingSuperReg(SrcRegS, ARM::ssub_0,
+ &ARM::DPRRegClass);
+ if (!DstRegD || !SrcRegD)
+ return false;
+
+ // We want to widen this into a DstRegD = VMOVD SrcRegD copy. This is only
+ // legal if the COPY already defines the full DstRegD, and it isn't a
+ // sub-register insertion.
+ if (!MI->definesRegister(DstRegD, TRI) || MI->readsRegister(DstRegD, TRI))
+ return false;
+
+ // A dead copy shouldn't show up here, but reject it just in case.
+ if (MI->getOperand(0).isDead())
+ return false;
+
+ // All clear, widen the COPY.
+ DEBUG(dbgs() << "widening: " << *MI);
+ MachineInstrBuilder MIB(*MI->getParent()->getParent(), MI);
+
+ // Get rid of the old <imp-def> of DstRegD. Leave it if it defines a Q-reg
+ // or some other super-register.
+ int ImpDefIdx = MI->findRegisterDefOperandIdx(DstRegD);
+ if (ImpDefIdx != -1)
+ MI->RemoveOperand(ImpDefIdx);
+
+ // Change the opcode and operands.
+ MI->setDesc(get(ARM::VMOVD));
+ MI->getOperand(0).setReg(DstRegD);
+ MI->getOperand(1).setReg(SrcRegD);
+ AddDefaultPred(MIB);
+
+ // We are now reading SrcRegD instead of SrcRegS. This may upset the
+ // register scavenger and machine verifier, so we need to indicate that we
+ // are reading an undefined value from SrcRegD, but a proper value from
+ // SrcRegS.
+ MI->getOperand(1).setIsUndef();
+ MIB.addReg(SrcRegS, RegState::Implicit);
+
+ // SrcRegD may actually contain an unrelated value in the ssub_1
+ // sub-register. Don't kill it. Only kill the ssub_0 sub-register.
+ if (MI->getOperand(1).isKill()) {
+ MI->getOperand(1).setIsKill(false);
+ MI->addRegisterKilled(SrcRegS, TRI, true);
+ }
+
+ DEBUG(dbgs() << "replaced by: " << *MI);
+ return true;
+}
+
+/// Create a copy of a const pool value. Update CPI to the new index and return
+/// the label UID.
+static unsigned duplicateCPV(MachineFunction &MF, unsigned &CPI) {
+ MachineConstantPool *MCP = MF.getConstantPool();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+
+ const MachineConstantPoolEntry &MCPE = MCP->getConstants()[CPI];
+ assert(MCPE.isMachineConstantPoolEntry() &&
+ "Expecting a machine constantpool entry!");
+ ARMConstantPoolValue *ACPV =
+ static_cast<ARMConstantPoolValue*>(MCPE.Val.MachineCPVal);
+
+ unsigned PCLabelId = AFI->createPICLabelUId();
+ ARMConstantPoolValue *NewCPV = nullptr;
+
+ // FIXME: The below assumes PIC relocation model and that the function
+ // is Thumb mode (t1 or t2). PCAdjustment would be 8 for ARM mode PIC, and
+ // zero for non-PIC in ARM or Thumb. The callers are all of thumb LDR
+ // instructions, so that's probably OK, but is PIC always correct when
+ // we get here?
+ if (ACPV->isGlobalValue())
+ NewCPV = ARMConstantPoolConstant::Create(
+ cast<ARMConstantPoolConstant>(ACPV)->getGV(), PCLabelId, ARMCP::CPValue,
+ 4, ACPV->getModifier(), ACPV->mustAddCurrentAddress());
+ else if (ACPV->isExtSymbol())
+ NewCPV = ARMConstantPoolSymbol::
+ Create(MF.getFunction()->getContext(),
+ cast<ARMConstantPoolSymbol>(ACPV)->getSymbol(), PCLabelId, 4);
+ else if (ACPV->isBlockAddress())
+ NewCPV = ARMConstantPoolConstant::
+ Create(cast<ARMConstantPoolConstant>(ACPV)->getBlockAddress(), PCLabelId,
+ ARMCP::CPBlockAddress, 4);
+ else if (ACPV->isLSDA())
+ NewCPV = ARMConstantPoolConstant::Create(MF.getFunction(), PCLabelId,
+ ARMCP::CPLSDA, 4);
+ else if (ACPV->isMachineBasicBlock())
+ NewCPV = ARMConstantPoolMBB::
+ Create(MF.getFunction()->getContext(),
+ cast<ARMConstantPoolMBB>(ACPV)->getMBB(), PCLabelId, 4);
+ else
+ llvm_unreachable("Unexpected ARM constantpool value type!!");
+ CPI = MCP->getConstantPoolIndex(NewCPV, MCPE.getAlignment());
+ return PCLabelId;
+}
+
+void ARMBaseInstrInfo::
+reMaterialize(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ unsigned DestReg, unsigned SubIdx,
+ const MachineInstr *Orig,
+ const TargetRegisterInfo &TRI) const {
+ unsigned Opcode = Orig->getOpcode();
+ switch (Opcode) {
+ default: {
+ MachineInstr *MI = MBB.getParent()->CloneMachineInstr(Orig);
+ MI->substituteRegister(Orig->getOperand(0).getReg(), DestReg, SubIdx, TRI);
+ MBB.insert(I, MI);
+ break;
+ }
+ case ARM::tLDRpci_pic:
+ case ARM::t2LDRpci_pic: {
+ MachineFunction &MF = *MBB.getParent();
+ unsigned CPI = Orig->getOperand(1).getIndex();
+ unsigned PCLabelId = duplicateCPV(MF, CPI);
+ MachineInstrBuilder MIB = BuildMI(MBB, I, Orig->getDebugLoc(), get(Opcode),
+ DestReg)
+ .addConstantPoolIndex(CPI).addImm(PCLabelId);
+ MIB->setMemRefs(Orig->memoperands_begin(), Orig->memoperands_end());
+ break;
+ }
+ }
+}
+
+MachineInstr *
+ARMBaseInstrInfo::duplicate(MachineInstr *Orig, MachineFunction &MF) const {
+ MachineInstr *MI = TargetInstrInfo::duplicate(Orig, MF);
+ switch(Orig->getOpcode()) {
+ case ARM::tLDRpci_pic:
+ case ARM::t2LDRpci_pic: {
+ unsigned CPI = Orig->getOperand(1).getIndex();
+ unsigned PCLabelId = duplicateCPV(MF, CPI);
+ Orig->getOperand(1).setIndex(CPI);
+ Orig->getOperand(2).setImm(PCLabelId);
+ break;
+ }
+ }
+ return MI;
+}
+
+bool ARMBaseInstrInfo::produceSameValue(const MachineInstr *MI0,
+ const MachineInstr *MI1,
+ const MachineRegisterInfo *MRI) const {
+ unsigned Opcode = MI0->getOpcode();
+ if (Opcode == ARM::t2LDRpci ||
+ Opcode == ARM::t2LDRpci_pic ||
+ Opcode == ARM::tLDRpci ||
+ Opcode == ARM::tLDRpci_pic ||
+ Opcode == ARM::LDRLIT_ga_pcrel ||
+ Opcode == ARM::LDRLIT_ga_pcrel_ldr ||
+ Opcode == ARM::tLDRLIT_ga_pcrel ||
+ Opcode == ARM::MOV_ga_pcrel ||
+ Opcode == ARM::MOV_ga_pcrel_ldr ||
+ Opcode == ARM::t2MOV_ga_pcrel) {
+ if (MI1->getOpcode() != Opcode)
+ return false;
+ if (MI0->getNumOperands() != MI1->getNumOperands())
+ return false;
+
+ const MachineOperand &MO0 = MI0->getOperand(1);
+ const MachineOperand &MO1 = MI1->getOperand(1);
+ if (MO0.getOffset() != MO1.getOffset())
+ return false;
+
+ if (Opcode == ARM::LDRLIT_ga_pcrel ||
+ Opcode == ARM::LDRLIT_ga_pcrel_ldr ||
+ Opcode == ARM::tLDRLIT_ga_pcrel ||
+ Opcode == ARM::MOV_ga_pcrel ||
+ Opcode == ARM::MOV_ga_pcrel_ldr ||
+ Opcode == ARM::t2MOV_ga_pcrel)
+ // Ignore the PC labels.
+ return MO0.getGlobal() == MO1.getGlobal();
+
+ const MachineFunction *MF = MI0->getParent()->getParent();
+ const MachineConstantPool *MCP = MF->getConstantPool();
+ int CPI0 = MO0.getIndex();
+ int CPI1 = MO1.getIndex();
+ const MachineConstantPoolEntry &MCPE0 = MCP->getConstants()[CPI0];
+ const MachineConstantPoolEntry &MCPE1 = MCP->getConstants()[CPI1];
+ bool isARMCP0 = MCPE0.isMachineConstantPoolEntry();
+ bool isARMCP1 = MCPE1.isMachineConstantPoolEntry();
+ if (isARMCP0 && isARMCP1) {
+ ARMConstantPoolValue *ACPV0 =
+ static_cast<ARMConstantPoolValue*>(MCPE0.Val.MachineCPVal);
+ ARMConstantPoolValue *ACPV1 =
+ static_cast<ARMConstantPoolValue*>(MCPE1.Val.MachineCPVal);
+ return ACPV0->hasSameValue(ACPV1);
+ } else if (!isARMCP0 && !isARMCP1) {
+ return MCPE0.Val.ConstVal == MCPE1.Val.ConstVal;
+ }
+ return false;
+ } else if (Opcode == ARM::PICLDR) {
+ if (MI1->getOpcode() != Opcode)
+ return false;
+ if (MI0->getNumOperands() != MI1->getNumOperands())
+ return false;
+
+ unsigned Addr0 = MI0->getOperand(1).getReg();
+ unsigned Addr1 = MI1->getOperand(1).getReg();
+ if (Addr0 != Addr1) {
+ if (!MRI ||
+ !TargetRegisterInfo::isVirtualRegister(Addr0) ||
+ !TargetRegisterInfo::isVirtualRegister(Addr1))
+ return false;
+
+ // This assumes SSA form.
+ MachineInstr *Def0 = MRI->getVRegDef(Addr0);
+ MachineInstr *Def1 = MRI->getVRegDef(Addr1);
+ // Check if the loaded value, e.g. a constantpool of a global address, are
+ // the same.
+ if (!produceSameValue(Def0, Def1, MRI))
+ return false;
+ }
+
+ for (unsigned i = 3, e = MI0->getNumOperands(); i != e; ++i) {
+ // %vreg12<def> = PICLDR %vreg11, 0, pred:14, pred:%noreg
+ const MachineOperand &MO0 = MI0->getOperand(i);
+ const MachineOperand &MO1 = MI1->getOperand(i);
+ if (!MO0.isIdenticalTo(MO1))
+ return false;
+ }
+ return true;
+ }
+
+ return MI0->isIdenticalTo(MI1, MachineInstr::IgnoreVRegDefs);
+}
+
+/// areLoadsFromSameBasePtr - This is used by the pre-regalloc scheduler to
+/// determine if two loads are loading from the same base address. It should
+/// only return true if the base pointers are the same and the only differences
+/// between the two addresses is the offset. It also returns the offsets by
+/// reference.
+///
+/// FIXME: remove this in favor of the MachineInstr interface once pre-RA-sched
+/// is permanently disabled.
+bool ARMBaseInstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2,
+ int64_t &Offset1,
+ int64_t &Offset2) const {
+ // Don't worry about Thumb: just ARM and Thumb2.
+ if (Subtarget.isThumb1Only()) return false;
+
+ if (!Load1->isMachineOpcode() || !Load2->isMachineOpcode())
+ return false;
+
+ switch (Load1->getMachineOpcode()) {
+ default:
+ return false;
+ case ARM::LDRi12:
+ case ARM::LDRBi12:
+ case ARM::LDRD:
+ case ARM::LDRH:
+ case ARM::LDRSB:
+ case ARM::LDRSH:
+ case ARM::VLDRD:
+ case ARM::VLDRS:
+ case ARM::t2LDRi8:
+ case ARM::t2LDRBi8:
+ case ARM::t2LDRDi8:
+ case ARM::t2LDRSHi8:
+ case ARM::t2LDRi12:
+ case ARM::t2LDRBi12:
+ case ARM::t2LDRSHi12:
+ break;
+ }
+
+ switch (Load2->getMachineOpcode()) {
+ default:
+ return false;
+ case ARM::LDRi12:
+ case ARM::LDRBi12:
+ case ARM::LDRD:
+ case ARM::LDRH:
+ case ARM::LDRSB:
+ case ARM::LDRSH:
+ case ARM::VLDRD:
+ case ARM::VLDRS:
+ case ARM::t2LDRi8:
+ case ARM::t2LDRBi8:
+ case ARM::t2LDRSHi8:
+ case ARM::t2LDRi12:
+ case ARM::t2LDRBi12:
+ case ARM::t2LDRSHi12:
+ break;
+ }
+
+ // Check if base addresses and chain operands match.
+ if (Load1->getOperand(0) != Load2->getOperand(0) ||
+ Load1->getOperand(4) != Load2->getOperand(4))
+ return false;
+
+ // Index should be Reg0.
+ if (Load1->getOperand(3) != Load2->getOperand(3))
+ return false;
+
+ // Determine the offsets.
+ if (isa<ConstantSDNode>(Load1->getOperand(1)) &&
+ isa<ConstantSDNode>(Load2->getOperand(1))) {
+ Offset1 = cast<ConstantSDNode>(Load1->getOperand(1))->getSExtValue();
+ Offset2 = cast<ConstantSDNode>(Load2->getOperand(1))->getSExtValue();
+ return true;
+ }
+
+ return false;
+}
+
+/// shouldScheduleLoadsNear - This is a used by the pre-regalloc scheduler to
+/// determine (in conjunction with areLoadsFromSameBasePtr) if two loads should
+/// be scheduled togther. On some targets if two loads are loading from
+/// addresses in the same cache line, it's better if they are scheduled
+/// together. This function takes two integers that represent the load offsets
+/// from the common base address. It returns true if it decides it's desirable
+/// to schedule the two loads together. "NumLoads" is the number of loads that
+/// have already been scheduled after Load1.
+///
+/// FIXME: remove this in favor of the MachineInstr interface once pre-RA-sched
+/// is permanently disabled.
+bool ARMBaseInstrInfo::shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2,
+ int64_t Offset1, int64_t Offset2,
+ unsigned NumLoads) const {
+ // Don't worry about Thumb: just ARM and Thumb2.
+ if (Subtarget.isThumb1Only()) return false;
+
+ assert(Offset2 > Offset1);
+
+ if ((Offset2 - Offset1) / 8 > 64)
+ return false;
+
+ // Check if the machine opcodes are different. If they are different
+ // then we consider them to not be of the same base address,
+ // EXCEPT in the case of Thumb2 byte loads where one is LDRBi8 and the other LDRBi12.
+ // In this case, they are considered to be the same because they are different
+ // encoding forms of the same basic instruction.
+ if ((Load1->getMachineOpcode() != Load2->getMachineOpcode()) &&
+ !((Load1->getMachineOpcode() == ARM::t2LDRBi8 &&
+ Load2->getMachineOpcode() == ARM::t2LDRBi12) ||
+ (Load1->getMachineOpcode() == ARM::t2LDRBi12 &&
+ Load2->getMachineOpcode() == ARM::t2LDRBi8)))
+ return false; // FIXME: overly conservative?
+
+ // Four loads in a row should be sufficient.
+ if (NumLoads >= 3)
+ return false;
+
+ return true;
+}
+
+bool ARMBaseInstrInfo::isSchedulingBoundary(const MachineInstr *MI,
+ const MachineBasicBlock *MBB,
+ const MachineFunction &MF) const {
+ // Debug info is never a scheduling boundary. It's necessary to be explicit
+ // due to the special treatment of IT instructions below, otherwise a
+ // dbg_value followed by an IT will result in the IT instruction being
+ // considered a scheduling hazard, which is wrong. It should be the actual
+ // instruction preceding the dbg_value instruction(s), just like it is
+ // when debug info is not present.
+ if (MI->isDebugValue())
+ return false;
+
+ // Terminators and labels can't be scheduled around.
+ if (MI->isTerminator() || MI->isPosition())
+ return true;
+
+ // Treat the start of the IT block as a scheduling boundary, but schedule
+ // t2IT along with all instructions following it.
+ // FIXME: This is a big hammer. But the alternative is to add all potential
+ // true and anti dependencies to IT block instructions as implicit operands
+ // to the t2IT instruction. The added compile time and complexity does not
+ // seem worth it.
+ MachineBasicBlock::const_iterator I = MI;
+ // Make sure to skip any dbg_value instructions
+ while (++I != MBB->end() && I->isDebugValue())
+ ;
+ if (I != MBB->end() && I->getOpcode() == ARM::t2IT)
+ return true;
+
+ // Don't attempt to schedule around any instruction that defines
+ // a stack-oriented pointer, as it's unlikely to be profitable. This
+ // saves compile time, because it doesn't require every single
+ // stack slot reference to depend on the instruction that does the
+ // modification.
+ // Calls don't actually change the stack pointer, even if they have imp-defs.
+ // No ARM calling conventions change the stack pointer. (X86 calling
+ // conventions sometimes do).
+ if (!MI->isCall() && MI->definesRegister(ARM::SP))
+ return true;
+
+ return false;
+}
+
+bool ARMBaseInstrInfo::
+isProfitableToIfCvt(MachineBasicBlock &MBB,
+ unsigned NumCycles, unsigned ExtraPredCycles,
+ BranchProbability Probability) const {
+ if (!NumCycles)
+ return false;
+
+ // If we are optimizing for size, see if the branch in the predecessor can be
+ // lowered to cbn?z by the constant island lowering pass, and return false if
+ // so. This results in a shorter instruction sequence.
+ if (MBB.getParent()->getFunction()->optForSize()) {
+ MachineBasicBlock *Pred = *MBB.pred_begin();
+ if (!Pred->empty()) {
+ MachineInstr *LastMI = &*Pred->rbegin();
+ if (LastMI->getOpcode() == ARM::t2Bcc) {
+ MachineBasicBlock::iterator CmpMI = LastMI;
+ if (CmpMI != Pred->begin()) {
+ --CmpMI;
+ if (CmpMI->getOpcode() == ARM::tCMPi8 ||
+ CmpMI->getOpcode() == ARM::t2CMPri) {
+ unsigned Reg = CmpMI->getOperand(0).getReg();
+ unsigned PredReg = 0;
+ ARMCC::CondCodes P = getInstrPredicate(CmpMI, PredReg);
+ if (P == ARMCC::AL && CmpMI->getOperand(1).getImm() == 0 &&
+ isARMLowRegister(Reg))
+ return false;
+ }
+ }
+ }
+ }
+ }
+
+ // Attempt to estimate the relative costs of predication versus branching.
+ // Here we scale up each component of UnpredCost to avoid precision issue when
+ // scaling NumCycles by Probability.
+ const unsigned ScalingUpFactor = 1024;
+ unsigned UnpredCost = Probability.scale(NumCycles * ScalingUpFactor);
+ UnpredCost += ScalingUpFactor; // The branch itself
+ UnpredCost += Subtarget.getMispredictionPenalty() * ScalingUpFactor / 10;
+
+ return (NumCycles + ExtraPredCycles) * ScalingUpFactor <= UnpredCost;
+}
+
+bool ARMBaseInstrInfo::
+isProfitableToIfCvt(MachineBasicBlock &TMBB,
+ unsigned TCycles, unsigned TExtra,
+ MachineBasicBlock &FMBB,
+ unsigned FCycles, unsigned FExtra,
+ BranchProbability Probability) const {
+ if (!TCycles || !FCycles)
+ return false;
+
+ // Attempt to estimate the relative costs of predication versus branching.
+ // Here we scale up each component of UnpredCost to avoid precision issue when
+ // scaling TCycles/FCycles by Probability.
+ const unsigned ScalingUpFactor = 1024;
+ unsigned TUnpredCost = Probability.scale(TCycles * ScalingUpFactor);
+ unsigned FUnpredCost =
+ Probability.getCompl().scale(FCycles * ScalingUpFactor);
+ unsigned UnpredCost = TUnpredCost + FUnpredCost;
+ UnpredCost += 1 * ScalingUpFactor; // The branch itself
+ UnpredCost += Subtarget.getMispredictionPenalty() * ScalingUpFactor / 10;
+
+ return (TCycles + FCycles + TExtra + FExtra) * ScalingUpFactor <= UnpredCost;
+}
+
+bool
+ARMBaseInstrInfo::isProfitableToUnpredicate(MachineBasicBlock &TMBB,
+ MachineBasicBlock &FMBB) const {
+ // Reduce false anti-dependencies to let Swift's out-of-order execution
+ // engine do its thing.
+ return Subtarget.isSwift();
+}
+
+/// getInstrPredicate - If instruction is predicated, returns its predicate
+/// condition, otherwise returns AL. It also returns the condition code
+/// register by reference.
+ARMCC::CondCodes
+llvm::getInstrPredicate(const MachineInstr *MI, unsigned &PredReg) {
+ int PIdx = MI->findFirstPredOperandIdx();
+ if (PIdx == -1) {
+ PredReg = 0;
+ return ARMCC::AL;
+ }
+
+ PredReg = MI->getOperand(PIdx+1).getReg();
+ return (ARMCC::CondCodes)MI->getOperand(PIdx).getImm();
+}
+
+
+unsigned llvm::getMatchingCondBranchOpcode(unsigned Opc) {
+ if (Opc == ARM::B)
+ return ARM::Bcc;
+ if (Opc == ARM::tB)
+ return ARM::tBcc;
+ if (Opc == ARM::t2B)
+ return ARM::t2Bcc;
+
+ llvm_unreachable("Unknown unconditional branch opcode!");
+}
+
+MachineInstr *ARMBaseInstrInfo::commuteInstructionImpl(MachineInstr *MI,
+ bool NewMI,
+ unsigned OpIdx1,
+ unsigned OpIdx2) const {
+ switch (MI->getOpcode()) {
+ case ARM::MOVCCr:
+ case ARM::t2MOVCCr: {
+ // MOVCC can be commuted by inverting the condition.
+ unsigned PredReg = 0;
+ ARMCC::CondCodes CC = getInstrPredicate(MI, PredReg);
+ // MOVCC AL can't be inverted. Shouldn't happen.
+ if (CC == ARMCC::AL || PredReg != ARM::CPSR)
+ return nullptr;
+ MI = TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2);
+ if (!MI)
+ return nullptr;
+ // After swapping the MOVCC operands, also invert the condition.
+ MI->getOperand(MI->findFirstPredOperandIdx())
+ .setImm(ARMCC::getOppositeCondition(CC));
+ return MI;
+ }
+ }
+ return TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2);
+}
+
+/// Identify instructions that can be folded into a MOVCC instruction, and
+/// return the defining instruction.
+static MachineInstr *canFoldIntoMOVCC(unsigned Reg,
+ const MachineRegisterInfo &MRI,
+ const TargetInstrInfo *TII) {
+ if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ return nullptr;
+ if (!MRI.hasOneNonDBGUse(Reg))
+ return nullptr;
+ MachineInstr *MI = MRI.getVRegDef(Reg);
+ if (!MI)
+ return nullptr;
+ // MI is folded into the MOVCC by predicating it.
+ if (!MI->isPredicable())
+ return nullptr;
+ // Check if MI has any non-dead defs or physreg uses. This also detects
+ // predicated instructions which will be reading CPSR.
+ for (unsigned i = 1, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ // Reject frame index operands, PEI can't handle the predicated pseudos.
+ if (MO.isFI() || MO.isCPI() || MO.isJTI())
+ return nullptr;
+ if (!MO.isReg())
+ continue;
+ // MI can't have any tied operands, that would conflict with predication.
+ if (MO.isTied())
+ return nullptr;
+ if (TargetRegisterInfo::isPhysicalRegister(MO.getReg()))
+ return nullptr;
+ if (MO.isDef() && !MO.isDead())
+ return nullptr;
+ }
+ bool DontMoveAcrossStores = true;
+ if (!MI->isSafeToMove(/* AliasAnalysis = */ nullptr, DontMoveAcrossStores))
+ return nullptr;
+ return MI;
+}
+
+bool ARMBaseInstrInfo::analyzeSelect(const MachineInstr *MI,
+ SmallVectorImpl<MachineOperand> &Cond,
+ unsigned &TrueOp, unsigned &FalseOp,
+ bool &Optimizable) const {
+ assert((MI->getOpcode() == ARM::MOVCCr || MI->getOpcode() == ARM::t2MOVCCr) &&
+ "Unknown select instruction");
+ // MOVCC operands:
+ // 0: Def.
+ // 1: True use.
+ // 2: False use.
+ // 3: Condition code.
+ // 4: CPSR use.
+ TrueOp = 1;
+ FalseOp = 2;
+ Cond.push_back(MI->getOperand(3));
+ Cond.push_back(MI->getOperand(4));
+ // We can always fold a def.
+ Optimizable = true;
+ return false;
+}
+
+MachineInstr *
+ARMBaseInstrInfo::optimizeSelect(MachineInstr *MI,
+ SmallPtrSetImpl<MachineInstr *> &SeenMIs,
+ bool PreferFalse) const {
+ assert((MI->getOpcode() == ARM::MOVCCr || MI->getOpcode() == ARM::t2MOVCCr) &&
+ "Unknown select instruction");
+ MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
+ MachineInstr *DefMI = canFoldIntoMOVCC(MI->getOperand(2).getReg(), MRI, this);
+ bool Invert = !DefMI;
+ if (!DefMI)
+ DefMI = canFoldIntoMOVCC(MI->getOperand(1).getReg(), MRI, this);
+ if (!DefMI)
+ return nullptr;
+
+ // Find new register class to use.
+ MachineOperand FalseReg = MI->getOperand(Invert ? 2 : 1);
+ unsigned DestReg = MI->getOperand(0).getReg();
+ const TargetRegisterClass *PreviousClass = MRI.getRegClass(FalseReg.getReg());
+ if (!MRI.constrainRegClass(DestReg, PreviousClass))
+ return nullptr;
+
+ // Create a new predicated version of DefMI.
+ // Rfalse is the first use.
+ MachineInstrBuilder NewMI = BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
+ DefMI->getDesc(), DestReg);
+
+ // Copy all the DefMI operands, excluding its (null) predicate.
+ const MCInstrDesc &DefDesc = DefMI->getDesc();
+ for (unsigned i = 1, e = DefDesc.getNumOperands();
+ i != e && !DefDesc.OpInfo[i].isPredicate(); ++i)
+ NewMI.addOperand(DefMI->getOperand(i));
+
+ unsigned CondCode = MI->getOperand(3).getImm();
+ if (Invert)
+ NewMI.addImm(ARMCC::getOppositeCondition(ARMCC::CondCodes(CondCode)));
+ else
+ NewMI.addImm(CondCode);
+ NewMI.addOperand(MI->getOperand(4));
+
+ // DefMI is not the -S version that sets CPSR, so add an optional %noreg.
+ if (NewMI->hasOptionalDef())
+ AddDefaultCC(NewMI);
+
+ // The output register value when the predicate is false is an implicit
+ // register operand tied to the first def.
+ // The tie makes the register allocator ensure the FalseReg is allocated the
+ // same register as operand 0.
+ FalseReg.setImplicit();
+ NewMI.addOperand(FalseReg);
+ NewMI->tieOperands(0, NewMI->getNumOperands() - 1);
+
+ // Update SeenMIs set: register newly created MI and erase removed DefMI.
+ SeenMIs.insert(NewMI);
+ SeenMIs.erase(DefMI);
+
+ // If MI is inside a loop, and DefMI is outside the loop, then kill flags on
+ // DefMI would be invalid when tranferred inside the loop. Checking for a
+ // loop is expensive, but at least remove kill flags if they are in different
+ // BBs.
+ if (DefMI->getParent() != MI->getParent())
+ NewMI->clearKillInfo();
+
+ // The caller will erase MI, but not DefMI.
+ DefMI->eraseFromParent();
+ return NewMI;
+}
+
+/// Map pseudo instructions that imply an 'S' bit onto real opcodes. Whether the
+/// instruction is encoded with an 'S' bit is determined by the optional CPSR
+/// def operand.
+///
+/// This will go away once we can teach tblgen how to set the optional CPSR def
+/// operand itself.
+struct AddSubFlagsOpcodePair {
+ uint16_t PseudoOpc;
+ uint16_t MachineOpc;
+};
+
+static const AddSubFlagsOpcodePair AddSubFlagsOpcodeMap[] = {
+ {ARM::ADDSri, ARM::ADDri},
+ {ARM::ADDSrr, ARM::ADDrr},
+ {ARM::ADDSrsi, ARM::ADDrsi},
+ {ARM::ADDSrsr, ARM::ADDrsr},
+
+ {ARM::SUBSri, ARM::SUBri},
+ {ARM::SUBSrr, ARM::SUBrr},
+ {ARM::SUBSrsi, ARM::SUBrsi},
+ {ARM::SUBSrsr, ARM::SUBrsr},
+
+ {ARM::RSBSri, ARM::RSBri},
+ {ARM::RSBSrsi, ARM::RSBrsi},
+ {ARM::RSBSrsr, ARM::RSBrsr},
+
+ {ARM::t2ADDSri, ARM::t2ADDri},
+ {ARM::t2ADDSrr, ARM::t2ADDrr},
+ {ARM::t2ADDSrs, ARM::t2ADDrs},
+
+ {ARM::t2SUBSri, ARM::t2SUBri},
+ {ARM::t2SUBSrr, ARM::t2SUBrr},
+ {ARM::t2SUBSrs, ARM::t2SUBrs},
+
+ {ARM::t2RSBSri, ARM::t2RSBri},
+ {ARM::t2RSBSrs, ARM::t2RSBrs},
+};
+
+unsigned llvm::convertAddSubFlagsOpcode(unsigned OldOpc) {
+ for (unsigned i = 0, e = array_lengthof(AddSubFlagsOpcodeMap); i != e; ++i)
+ if (OldOpc == AddSubFlagsOpcodeMap[i].PseudoOpc)
+ return AddSubFlagsOpcodeMap[i].MachineOpc;
+ return 0;
+}
+
+void llvm::emitARMRegPlusImmediate(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI, DebugLoc dl,
+ unsigned DestReg, unsigned BaseReg, int NumBytes,
+ ARMCC::CondCodes Pred, unsigned PredReg,
+ const ARMBaseInstrInfo &TII, unsigned MIFlags) {
+ if (NumBytes == 0 && DestReg != BaseReg) {
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), DestReg)
+ .addReg(BaseReg, RegState::Kill)
+ .addImm((unsigned)Pred).addReg(PredReg).addReg(0)
+ .setMIFlags(MIFlags);
+ return;
+ }
+
+ bool isSub = NumBytes < 0;
+ if (isSub) NumBytes = -NumBytes;
+
+ while (NumBytes) {
+ unsigned RotAmt = ARM_AM::getSOImmValRotate(NumBytes);
+ unsigned ThisVal = NumBytes & ARM_AM::rotr32(0xFF, RotAmt);
+ assert(ThisVal && "Didn't extract field correctly");
+
+ // We will handle these bits from offset, clear them.
+ NumBytes &= ~ThisVal;
+
+ assert(ARM_AM::getSOImmVal(ThisVal) != -1 && "Bit extraction didn't work?");
+
+ // Build the new ADD / SUB.
+ unsigned Opc = isSub ? ARM::SUBri : ARM::ADDri;
+ BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg)
+ .addReg(BaseReg, RegState::Kill).addImm(ThisVal)
+ .addImm((unsigned)Pred).addReg(PredReg).addReg(0)
+ .setMIFlags(MIFlags);
+ BaseReg = DestReg;
+ }
+}
+
+bool llvm::tryFoldSPUpdateIntoPushPop(const ARMSubtarget &Subtarget,
+ MachineFunction &MF, MachineInstr *MI,
+ unsigned NumBytes) {
+ // This optimisation potentially adds lots of load and store
+ // micro-operations, it's only really a great benefit to code-size.
+ if (!MF.getFunction()->optForMinSize())
+ return false;
+
+ // If only one register is pushed/popped, LLVM can use an LDR/STR
+ // instead. We can't modify those so make sure we're dealing with an
+ // instruction we understand.
+ bool IsPop = isPopOpcode(MI->getOpcode());
+ bool IsPush = isPushOpcode(MI->getOpcode());
+ if (!IsPush && !IsPop)
+ return false;
+
+ bool IsVFPPushPop = MI->getOpcode() == ARM::VSTMDDB_UPD ||
+ MI->getOpcode() == ARM::VLDMDIA_UPD;
+ bool IsT1PushPop = MI->getOpcode() == ARM::tPUSH ||
+ MI->getOpcode() == ARM::tPOP ||
+ MI->getOpcode() == ARM::tPOP_RET;
+
+ assert((IsT1PushPop || (MI->getOperand(0).getReg() == ARM::SP &&
+ MI->getOperand(1).getReg() == ARM::SP)) &&
+ "trying to fold sp update into non-sp-updating push/pop");
+
+ // The VFP push & pop act on D-registers, so we can only fold an adjustment
+ // by a multiple of 8 bytes in correctly. Similarly rN is 4-bytes. Don't try
+ // if this is violated.
+ if (NumBytes % (IsVFPPushPop ? 8 : 4) != 0)
+ return false;
+
+ // ARM and Thumb2 push/pop insts have explicit "sp, sp" operands (+
+ // pred) so the list starts at 4. Thumb1 starts after the predicate.
+ int RegListIdx = IsT1PushPop ? 2 : 4;
+
+ // Calculate the space we'll need in terms of registers.
+ unsigned FirstReg = MI->getOperand(RegListIdx).getReg();
+ unsigned RD0Reg, RegsNeeded;
+ if (IsVFPPushPop) {
+ RD0Reg = ARM::D0;
+ RegsNeeded = NumBytes / 8;
+ } else {
+ RD0Reg = ARM::R0;
+ RegsNeeded = NumBytes / 4;
+ }
+
+ // We're going to have to strip all list operands off before
+ // re-adding them since the order matters, so save the existing ones
+ // for later.
+ SmallVector<MachineOperand, 4> RegList;
+ for (int i = MI->getNumOperands() - 1; i >= RegListIdx; --i)
+ RegList.push_back(MI->getOperand(i));
+
+ const TargetRegisterInfo *TRI = MF.getRegInfo().getTargetRegisterInfo();
+ const MCPhysReg *CSRegs = TRI->getCalleeSavedRegs(&MF);
+
+ // Now try to find enough space in the reglist to allocate NumBytes.
+ for (unsigned CurReg = FirstReg - 1; CurReg >= RD0Reg && RegsNeeded;
+ --CurReg) {
+ if (!IsPop) {
+ // Pushing any register is completely harmless, mark the
+ // register involved as undef since we don't care about it in
+ // the slightest.
+ RegList.push_back(MachineOperand::CreateReg(CurReg, false, false,
+ false, false, true));
+ --RegsNeeded;
+ continue;
+ }
+
+ // However, we can only pop an extra register if it's not live. For
+ // registers live within the function we might clobber a return value
+ // register; the other way a register can be live here is if it's
+ // callee-saved.
+ if (isCalleeSavedRegister(CurReg, CSRegs) ||
+ MI->getParent()->computeRegisterLiveness(TRI, CurReg, MI) !=
+ MachineBasicBlock::LQR_Dead) {
+ // VFP pops don't allow holes in the register list, so any skip is fatal
+ // for our transformation. GPR pops do, so we should just keep looking.
+ if (IsVFPPushPop)
+ return false;
+ else
+ continue;
+ }
+
+ // Mark the unimportant registers as <def,dead> in the POP.
+ RegList.push_back(MachineOperand::CreateReg(CurReg, true, false, false,
+ true));
+ --RegsNeeded;
+ }
+
+ if (RegsNeeded > 0)
+ return false;
+
+ // Finally we know we can profitably perform the optimisation so go
+ // ahead: strip all existing registers off and add them back again
+ // in the right order.
+ for (int i = MI->getNumOperands() - 1; i >= RegListIdx; --i)
+ MI->RemoveOperand(i);
+
+ // Add the complete list back in.
+ MachineInstrBuilder MIB(MF, &*MI);
+ for (int i = RegList.size() - 1; i >= 0; --i)
+ MIB.addOperand(RegList[i]);
+
+ return true;
+}
+
+bool llvm::rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
+ unsigned FrameReg, int &Offset,
+ const ARMBaseInstrInfo &TII) {
+ unsigned Opcode = MI.getOpcode();
+ const MCInstrDesc &Desc = MI.getDesc();
+ unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask);
+ bool isSub = false;
+
+ // Memory operands in inline assembly always use AddrMode2.
+ if (Opcode == ARM::INLINEASM)
+ AddrMode = ARMII::AddrMode2;
+
+ if (Opcode == ARM::ADDri) {
+ Offset += MI.getOperand(FrameRegIdx+1).getImm();
+ if (Offset == 0) {
+ // Turn it into a move.
+ MI.setDesc(TII.get(ARM::MOVr));
+ MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
+ MI.RemoveOperand(FrameRegIdx+1);
+ Offset = 0;
+ return true;
+ } else if (Offset < 0) {
+ Offset = -Offset;
+ isSub = true;
+ MI.setDesc(TII.get(ARM::SUBri));
+ }
+
+ // Common case: small offset, fits into instruction.
+ if (ARM_AM::getSOImmVal(Offset) != -1) {
+ // Replace the FrameIndex with sp / fp
+ MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
+ MI.getOperand(FrameRegIdx+1).ChangeToImmediate(Offset);
+ Offset = 0;
+ return true;
+ }
+
+ // Otherwise, pull as much of the immedidate into this ADDri/SUBri
+ // as possible.
+ unsigned RotAmt = ARM_AM::getSOImmValRotate(Offset);
+ unsigned ThisImmVal = Offset & ARM_AM::rotr32(0xFF, RotAmt);
+
+ // We will handle these bits from offset, clear them.
+ Offset &= ~ThisImmVal;
+
+ // Get the properly encoded SOImmVal field.
+ assert(ARM_AM::getSOImmVal(ThisImmVal) != -1 &&
+ "Bit extraction didn't work?");
+ MI.getOperand(FrameRegIdx+1).ChangeToImmediate(ThisImmVal);
+ } else {
+ unsigned ImmIdx = 0;
+ int InstrOffs = 0;
+ unsigned NumBits = 0;
+ unsigned Scale = 1;
+ switch (AddrMode) {
+ case ARMII::AddrMode_i12: {
+ ImmIdx = FrameRegIdx + 1;
+ InstrOffs = MI.getOperand(ImmIdx).getImm();
+ NumBits = 12;
+ break;
+ }
+ case ARMII::AddrMode2: {
+ ImmIdx = FrameRegIdx+2;
+ InstrOffs = ARM_AM::getAM2Offset(MI.getOperand(ImmIdx).getImm());
+ if (ARM_AM::getAM2Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
+ InstrOffs *= -1;
+ NumBits = 12;
+ break;
+ }
+ case ARMII::AddrMode3: {
+ ImmIdx = FrameRegIdx+2;
+ InstrOffs = ARM_AM::getAM3Offset(MI.getOperand(ImmIdx).getImm());
+ if (ARM_AM::getAM3Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
+ InstrOffs *= -1;
+ NumBits = 8;
+ break;
+ }
+ case ARMII::AddrMode4:
+ case ARMII::AddrMode6:
+ // Can't fold any offset even if it's zero.
+ return false;
+ case ARMII::AddrMode5: {
+ ImmIdx = FrameRegIdx+1;
+ InstrOffs = ARM_AM::getAM5Offset(MI.getOperand(ImmIdx).getImm());
+ if (ARM_AM::getAM5Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
+ InstrOffs *= -1;
+ NumBits = 8;
+ Scale = 4;
+ break;
+ }
+ default:
+ llvm_unreachable("Unsupported addressing mode!");
+ }
+
+ Offset += InstrOffs * Scale;
+ assert((Offset & (Scale-1)) == 0 && "Can't encode this offset!");
+ if (Offset < 0) {
+ Offset = -Offset;
+ isSub = true;
+ }
+
+ // Attempt to fold address comp. if opcode has offset bits
+ if (NumBits > 0) {
+ // Common case: small offset, fits into instruction.
+ MachineOperand &ImmOp = MI.getOperand(ImmIdx);
+ int ImmedOffset = Offset / Scale;
+ unsigned Mask = (1 << NumBits) - 1;
+ if ((unsigned)Offset <= Mask * Scale) {
+ // Replace the FrameIndex with sp
+ MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
+ // FIXME: When addrmode2 goes away, this will simplify (like the
+ // T2 version), as the LDR.i12 versions don't need the encoding
+ // tricks for the offset value.
+ if (isSub) {
+ if (AddrMode == ARMII::AddrMode_i12)
+ ImmedOffset = -ImmedOffset;
+ else
+ ImmedOffset |= 1 << NumBits;
+ }
+ ImmOp.ChangeToImmediate(ImmedOffset);
+ Offset = 0;
+ return true;
+ }
+
+ // Otherwise, it didn't fit. Pull in what we can to simplify the immed.
+ ImmedOffset = ImmedOffset & Mask;
+ if (isSub) {
+ if (AddrMode == ARMII::AddrMode_i12)
+ ImmedOffset = -ImmedOffset;
+ else
+ ImmedOffset |= 1 << NumBits;
+ }
+ ImmOp.ChangeToImmediate(ImmedOffset);
+ Offset &= ~(Mask*Scale);
+ }
+ }
+
+ Offset = (isSub) ? -Offset : Offset;
+ return Offset == 0;
+}
+
+/// analyzeCompare - For a comparison instruction, return the source registers
+/// in SrcReg and SrcReg2 if having two register operands, and the value it
+/// compares against in CmpValue. Return true if the comparison instruction
+/// can be analyzed.
+bool ARMBaseInstrInfo::
+analyzeCompare(const MachineInstr *MI, unsigned &SrcReg, unsigned &SrcReg2,
+ int &CmpMask, int &CmpValue) const {
+ switch (MI->getOpcode()) {
+ default: break;
+ case ARM::CMPri:
+ case ARM::t2CMPri:
+ SrcReg = MI->getOperand(0).getReg();
+ SrcReg2 = 0;
+ CmpMask = ~0;
+ CmpValue = MI->getOperand(1).getImm();
+ return true;
+ case ARM::CMPrr:
+ case ARM::t2CMPrr:
+ SrcReg = MI->getOperand(0).getReg();
+ SrcReg2 = MI->getOperand(1).getReg();
+ CmpMask = ~0;
+ CmpValue = 0;
+ return true;
+ case ARM::TSTri:
+ case ARM::t2TSTri:
+ SrcReg = MI->getOperand(0).getReg();
+ SrcReg2 = 0;
+ CmpMask = MI->getOperand(1).getImm();
+ CmpValue = 0;
+ return true;
+ }
+
+ return false;
+}
+
+/// isSuitableForMask - Identify a suitable 'and' instruction that
+/// operates on the given source register and applies the same mask
+/// as a 'tst' instruction. Provide a limited look-through for copies.
+/// When successful, MI will hold the found instruction.
+static bool isSuitableForMask(MachineInstr *&MI, unsigned SrcReg,
+ int CmpMask, bool CommonUse) {
+ switch (MI->getOpcode()) {
+ case ARM::ANDri:
+ case ARM::t2ANDri:
+ if (CmpMask != MI->getOperand(2).getImm())
+ return false;
+ if (SrcReg == MI->getOperand(CommonUse ? 1 : 0).getReg())
+ return true;
+ break;
+ }
+
+ return false;
+}
+
+/// getSwappedCondition - assume the flags are set by MI(a,b), return
+/// the condition code if we modify the instructions such that flags are
+/// set by MI(b,a).
+inline static ARMCC::CondCodes getSwappedCondition(ARMCC::CondCodes CC) {
+ switch (CC) {
+ default: return ARMCC::AL;
+ case ARMCC::EQ: return ARMCC::EQ;
+ case ARMCC::NE: return ARMCC::NE;
+ case ARMCC::HS: return ARMCC::LS;
+ case ARMCC::LO: return ARMCC::HI;
+ case ARMCC::HI: return ARMCC::LO;
+ case ARMCC::LS: return ARMCC::HS;
+ case ARMCC::GE: return ARMCC::LE;
+ case ARMCC::LT: return ARMCC::GT;
+ case ARMCC::GT: return ARMCC::LT;
+ case ARMCC::LE: return ARMCC::GE;
+ }
+}
+
+/// isRedundantFlagInstr - check whether the first instruction, whose only
+/// purpose is to update flags, can be made redundant.
+/// CMPrr can be made redundant by SUBrr if the operands are the same.
+/// CMPri can be made redundant by SUBri if the operands are the same.
+/// This function can be extended later on.
+inline static bool isRedundantFlagInstr(MachineInstr *CmpI, unsigned SrcReg,
+ unsigned SrcReg2, int ImmValue,
+ MachineInstr *OI) {
+ if ((CmpI->getOpcode() == ARM::CMPrr ||
+ CmpI->getOpcode() == ARM::t2CMPrr) &&
+ (OI->getOpcode() == ARM::SUBrr ||
+ OI->getOpcode() == ARM::t2SUBrr) &&
+ ((OI->getOperand(1).getReg() == SrcReg &&
+ OI->getOperand(2).getReg() == SrcReg2) ||
+ (OI->getOperand(1).getReg() == SrcReg2 &&
+ OI->getOperand(2).getReg() == SrcReg)))
+ return true;
+
+ if ((CmpI->getOpcode() == ARM::CMPri ||
+ CmpI->getOpcode() == ARM::t2CMPri) &&
+ (OI->getOpcode() == ARM::SUBri ||
+ OI->getOpcode() == ARM::t2SUBri) &&
+ OI->getOperand(1).getReg() == SrcReg &&
+ OI->getOperand(2).getImm() == ImmValue)
+ return true;
+ return false;
+}
+
+/// optimizeCompareInstr - Convert the instruction supplying the argument to the
+/// comparison into one that sets the zero bit in the flags register;
+/// Remove a redundant Compare instruction if an earlier instruction can set the
+/// flags in the same way as Compare.
+/// E.g. SUBrr(r1,r2) and CMPrr(r1,r2). We also handle the case where two
+/// operands are swapped: SUBrr(r1,r2) and CMPrr(r2,r1), by updating the
+/// condition code of instructions which use the flags.
+bool ARMBaseInstrInfo::
+optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2,
+ int CmpMask, int CmpValue,
+ const MachineRegisterInfo *MRI) const {
+ // Get the unique definition of SrcReg.
+ MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg);
+ if (!MI) return false;
+
+ // Masked compares sometimes use the same register as the corresponding 'and'.
+ if (CmpMask != ~0) {
+ if (!isSuitableForMask(MI, SrcReg, CmpMask, false) || isPredicated(MI)) {
+ MI = nullptr;
+ for (MachineRegisterInfo::use_instr_iterator
+ UI = MRI->use_instr_begin(SrcReg), UE = MRI->use_instr_end();
+ UI != UE; ++UI) {
+ if (UI->getParent() != CmpInstr->getParent()) continue;
+ MachineInstr *PotentialAND = &*UI;
+ if (!isSuitableForMask(PotentialAND, SrcReg, CmpMask, true) ||
+ isPredicated(PotentialAND))
+ continue;
+ MI = PotentialAND;
+ break;
+ }
+ if (!MI) return false;
+ }
+ }
+
+ // Get ready to iterate backward from CmpInstr.
+ MachineBasicBlock::iterator I = CmpInstr, E = MI,
+ B = CmpInstr->getParent()->begin();
+
+ // Early exit if CmpInstr is at the beginning of the BB.
+ if (I == B) return false;
+
+ // There are two possible candidates which can be changed to set CPSR:
+ // One is MI, the other is a SUB instruction.
+ // For CMPrr(r1,r2), we are looking for SUB(r1,r2) or SUB(r2,r1).
+ // For CMPri(r1, CmpValue), we are looking for SUBri(r1, CmpValue).
+ MachineInstr *Sub = nullptr;
+ if (SrcReg2 != 0)
+ // MI is not a candidate for CMPrr.
+ MI = nullptr;
+ else if (MI->getParent() != CmpInstr->getParent() || CmpValue != 0) {
+ // Conservatively refuse to convert an instruction which isn't in the same
+ // BB as the comparison.
+ // For CMPri w/ CmpValue != 0, a Sub may still be a candidate.
+ // Thus we cannot return here.
+ if (CmpInstr->getOpcode() == ARM::CMPri ||
+ CmpInstr->getOpcode() == ARM::t2CMPri)
+ MI = nullptr;
+ else
+ return false;
+ }
+
+ // Check that CPSR isn't set between the comparison instruction and the one we
+ // want to change. At the same time, search for Sub.
+ const TargetRegisterInfo *TRI = &getRegisterInfo();
+ --I;
+ for (; I != E; --I) {
+ const MachineInstr &Instr = *I;
+
+ if (Instr.modifiesRegister(ARM::CPSR, TRI) ||
+ Instr.readsRegister(ARM::CPSR, TRI))
+ // This instruction modifies or uses CPSR after the one we want to
+ // change. We can't do this transformation.
+ return false;
+
+ // Check whether CmpInstr can be made redundant by the current instruction.
+ if (isRedundantFlagInstr(CmpInstr, SrcReg, SrcReg2, CmpValue, &*I)) {
+ Sub = &*I;
+ break;
+ }
+
+ if (I == B)
+ // The 'and' is below the comparison instruction.
+ return false;
+ }
+
+ // Return false if no candidates exist.
+ if (!MI && !Sub)
+ return false;
+
+ // The single candidate is called MI.
+ if (!MI) MI = Sub;
+
+ // We can't use a predicated instruction - it doesn't always write the flags.
+ if (isPredicated(MI))
+ return false;
+
+ switch (MI->getOpcode()) {
+ default: break;
+ case ARM::RSBrr:
+ case ARM::RSBri:
+ case ARM::RSCrr:
+ case ARM::RSCri:
+ case ARM::ADDrr:
+ case ARM::ADDri:
+ case ARM::ADCrr:
+ case ARM::ADCri:
+ case ARM::SUBrr:
+ case ARM::SUBri:
+ case ARM::SBCrr:
+ case ARM::SBCri:
+ case ARM::t2RSBri:
+ case ARM::t2ADDrr:
+ case ARM::t2ADDri:
+ case ARM::t2ADCrr:
+ case ARM::t2ADCri:
+ case ARM::t2SUBrr:
+ case ARM::t2SUBri:
+ case ARM::t2SBCrr:
+ case ARM::t2SBCri:
+ case ARM::ANDrr:
+ case ARM::ANDri:
+ case ARM::t2ANDrr:
+ case ARM::t2ANDri:
+ case ARM::ORRrr:
+ case ARM::ORRri:
+ case ARM::t2ORRrr:
+ case ARM::t2ORRri:
+ case ARM::EORrr:
+ case ARM::EORri:
+ case ARM::t2EORrr:
+ case ARM::t2EORri: {
+ // Scan forward for the use of CPSR
+ // When checking against MI: if it's a conditional code that requires
+ // checking of the V bit or C bit, then this is not safe to do.
+ // It is safe to remove CmpInstr if CPSR is redefined or killed.
+ // If we are done with the basic block, we need to check whether CPSR is
+ // live-out.
+ SmallVector<std::pair<MachineOperand*, ARMCC::CondCodes>, 4>
+ OperandsToUpdate;
+ bool isSafe = false;
+ I = CmpInstr;
+ E = CmpInstr->getParent()->end();
+ while (!isSafe && ++I != E) {
+ const MachineInstr &Instr = *I;
+ for (unsigned IO = 0, EO = Instr.getNumOperands();
+ !isSafe && IO != EO; ++IO) {
+ const MachineOperand &MO = Instr.getOperand(IO);
+ if (MO.isRegMask() && MO.clobbersPhysReg(ARM::CPSR)) {
+ isSafe = true;
+ break;
+ }
+ if (!MO.isReg() || MO.getReg() != ARM::CPSR)
+ continue;
+ if (MO.isDef()) {
+ isSafe = true;
+ break;
+ }
+ // Condition code is after the operand before CPSR except for VSELs.
+ ARMCC::CondCodes CC;
+ bool IsInstrVSel = true;
+ switch (Instr.getOpcode()) {
+ default:
+ IsInstrVSel = false;
+ CC = (ARMCC::CondCodes)Instr.getOperand(IO - 1).getImm();
+ break;
+ case ARM::VSELEQD:
+ case ARM::VSELEQS:
+ CC = ARMCC::EQ;
+ break;
+ case ARM::VSELGTD:
+ case ARM::VSELGTS:
+ CC = ARMCC::GT;
+ break;
+ case ARM::VSELGED:
+ case ARM::VSELGES:
+ CC = ARMCC::GE;
+ break;
+ case ARM::VSELVSS:
+ case ARM::VSELVSD:
+ CC = ARMCC::VS;
+ break;
+ }
+
+ if (Sub) {
+ ARMCC::CondCodes NewCC = getSwappedCondition(CC);
+ if (NewCC == ARMCC::AL)
+ return false;
+ // If we have SUB(r1, r2) and CMP(r2, r1), the condition code based
+ // on CMP needs to be updated to be based on SUB.
+ // Push the condition code operands to OperandsToUpdate.
+ // If it is safe to remove CmpInstr, the condition code of these
+ // operands will be modified.
+ if (SrcReg2 != 0 && Sub->getOperand(1).getReg() == SrcReg2 &&
+ Sub->getOperand(2).getReg() == SrcReg) {
+ // VSel doesn't support condition code update.
+ if (IsInstrVSel)
+ return false;
+ OperandsToUpdate.push_back(
+ std::make_pair(&((*I).getOperand(IO - 1)), NewCC));
+ }
+ } else {
+ // No Sub, so this is x = <op> y, z; cmp x, 0.
+ switch (CC) {
+ case ARMCC::EQ: // Z
+ case ARMCC::NE: // Z
+ case ARMCC::MI: // N
+ case ARMCC::PL: // N
+ case ARMCC::AL: // none
+ // CPSR can be used multiple times, we should continue.
+ break;
+ case ARMCC::HS: // C
+ case ARMCC::LO: // C
+ case ARMCC::VS: // V
+ case ARMCC::VC: // V
+ case ARMCC::HI: // C Z
+ case ARMCC::LS: // C Z
+ case ARMCC::GE: // N V
+ case ARMCC::LT: // N V
+ case ARMCC::GT: // Z N V
+ case ARMCC::LE: // Z N V
+ // The instruction uses the V bit or C bit which is not safe.
+ return false;
+ }
+ }
+ }
+ }
+
+ // If CPSR is not killed nor re-defined, we should check whether it is
+ // live-out. If it is live-out, do not optimize.
+ if (!isSafe) {
+ MachineBasicBlock *MBB = CmpInstr->getParent();
+ for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
+ SE = MBB->succ_end(); SI != SE; ++SI)
+ if ((*SI)->isLiveIn(ARM::CPSR))
+ return false;
+ }
+
+ // Toggle the optional operand to CPSR.
+ MI->getOperand(5).setReg(ARM::CPSR);
+ MI->getOperand(5).setIsDef(true);
+ assert(!isPredicated(MI) && "Can't use flags from predicated instruction");
+ CmpInstr->eraseFromParent();
+
+ // Modify the condition code of operands in OperandsToUpdate.
+ // Since we have SUB(r1, r2) and CMP(r2, r1), the condition code needs to
+ // be changed from r2 > r1 to r1 < r2, from r2 < r1 to r1 > r2, etc.
+ for (unsigned i = 0, e = OperandsToUpdate.size(); i < e; i++)
+ OperandsToUpdate[i].first->setImm(OperandsToUpdate[i].second);
+ return true;
+ }
+ }
+
+ return false;
+}
+
+bool ARMBaseInstrInfo::FoldImmediate(MachineInstr *UseMI,
+ MachineInstr *DefMI, unsigned Reg,
+ MachineRegisterInfo *MRI) const {
+ // Fold large immediates into add, sub, or, xor.
+ unsigned DefOpc = DefMI->getOpcode();
+ if (DefOpc != ARM::t2MOVi32imm && DefOpc != ARM::MOVi32imm)
+ return false;
+ if (!DefMI->getOperand(1).isImm())
+ // Could be t2MOVi32imm <ga:xx>
+ return false;
+
+ if (!MRI->hasOneNonDBGUse(Reg))
+ return false;
+
+ const MCInstrDesc &DefMCID = DefMI->getDesc();
+ if (DefMCID.hasOptionalDef()) {
+ unsigned NumOps = DefMCID.getNumOperands();
+ const MachineOperand &MO = DefMI->getOperand(NumOps-1);
+ if (MO.getReg() == ARM::CPSR && !MO.isDead())
+ // If DefMI defines CPSR and it is not dead, it's obviously not safe
+ // to delete DefMI.
+ return false;
+ }
+
+ const MCInstrDesc &UseMCID = UseMI->getDesc();
+ if (UseMCID.hasOptionalDef()) {
+ unsigned NumOps = UseMCID.getNumOperands();
+ if (UseMI->getOperand(NumOps-1).getReg() == ARM::CPSR)
+ // If the instruction sets the flag, do not attempt this optimization
+ // since it may change the semantics of the code.
+ return false;
+ }
+
+ unsigned UseOpc = UseMI->getOpcode();
+ unsigned NewUseOpc = 0;
+ uint32_t ImmVal = (uint32_t)DefMI->getOperand(1).getImm();
+ uint32_t SOImmValV1 = 0, SOImmValV2 = 0;
+ bool Commute = false;
+ switch (UseOpc) {
+ default: return false;
+ case ARM::SUBrr:
+ case ARM::ADDrr:
+ case ARM::ORRrr:
+ case ARM::EORrr:
+ case ARM::t2SUBrr:
+ case ARM::t2ADDrr:
+ case ARM::t2ORRrr:
+ case ARM::t2EORrr: {
+ Commute = UseMI->getOperand(2).getReg() != Reg;
+ switch (UseOpc) {
+ default: break;
+ case ARM::SUBrr: {
+ if (Commute)
+ return false;
+ ImmVal = -ImmVal;
+ NewUseOpc = ARM::SUBri;
+ // Fallthrough
+ }
+ case ARM::ADDrr:
+ case ARM::ORRrr:
+ case ARM::EORrr: {
+ if (!ARM_AM::isSOImmTwoPartVal(ImmVal))
+ return false;
+ SOImmValV1 = (uint32_t)ARM_AM::getSOImmTwoPartFirst(ImmVal);
+ SOImmValV2 = (uint32_t)ARM_AM::getSOImmTwoPartSecond(ImmVal);
+ switch (UseOpc) {
+ default: break;
+ case ARM::ADDrr: NewUseOpc = ARM::ADDri; break;
+ case ARM::ORRrr: NewUseOpc = ARM::ORRri; break;
+ case ARM::EORrr: NewUseOpc = ARM::EORri; break;
+ }
+ break;
+ }
+ case ARM::t2SUBrr: {
+ if (Commute)
+ return false;
+ ImmVal = -ImmVal;
+ NewUseOpc = ARM::t2SUBri;
+ // Fallthrough
+ }
+ case ARM::t2ADDrr:
+ case ARM::t2ORRrr:
+ case ARM::t2EORrr: {
+ if (!ARM_AM::isT2SOImmTwoPartVal(ImmVal))
+ return false;
+ SOImmValV1 = (uint32_t)ARM_AM::getT2SOImmTwoPartFirst(ImmVal);
+ SOImmValV2 = (uint32_t)ARM_AM::getT2SOImmTwoPartSecond(ImmVal);
+ switch (UseOpc) {
+ default: break;
+ case ARM::t2ADDrr: NewUseOpc = ARM::t2ADDri; break;
+ case ARM::t2ORRrr: NewUseOpc = ARM::t2ORRri; break;
+ case ARM::t2EORrr: NewUseOpc = ARM::t2EORri; break;
+ }
+ break;
+ }
+ }
+ }
+ }
+
+ unsigned OpIdx = Commute ? 2 : 1;
+ unsigned Reg1 = UseMI->getOperand(OpIdx).getReg();
+ bool isKill = UseMI->getOperand(OpIdx).isKill();
+ unsigned NewReg = MRI->createVirtualRegister(MRI->getRegClass(Reg));
+ AddDefaultCC(AddDefaultPred(BuildMI(*UseMI->getParent(),
+ UseMI, UseMI->getDebugLoc(),
+ get(NewUseOpc), NewReg)
+ .addReg(Reg1, getKillRegState(isKill))
+ .addImm(SOImmValV1)));
+ UseMI->setDesc(get(NewUseOpc));
+ UseMI->getOperand(1).setReg(NewReg);
+ UseMI->getOperand(1).setIsKill();
+ UseMI->getOperand(2).ChangeToImmediate(SOImmValV2);
+ DefMI->eraseFromParent();
+ return true;
+}
+
+static unsigned getNumMicroOpsSwiftLdSt(const InstrItineraryData *ItinData,
+ const MachineInstr *MI) {
+ switch (MI->getOpcode()) {
+ default: {
+ const MCInstrDesc &Desc = MI->getDesc();
+ int UOps = ItinData->getNumMicroOps(Desc.getSchedClass());
+ assert(UOps >= 0 && "bad # UOps");
+ return UOps;
+ }
+
+ case ARM::LDRrs:
+ case ARM::LDRBrs:
+ case ARM::STRrs:
+ case ARM::STRBrs: {
+ unsigned ShOpVal = MI->getOperand(3).getImm();
+ bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
+ unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
+ if (!isSub &&
+ (ShImm == 0 ||
+ ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
+ ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
+ return 1;
+ return 2;
+ }
+
+ case ARM::LDRH:
+ case ARM::STRH: {
+ if (!MI->getOperand(2).getReg())
+ return 1;
+
+ unsigned ShOpVal = MI->getOperand(3).getImm();
+ bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
+ unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
+ if (!isSub &&
+ (ShImm == 0 ||
+ ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
+ ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
+ return 1;
+ return 2;
+ }
+
+ case ARM::LDRSB:
+ case ARM::LDRSH:
+ return (ARM_AM::getAM3Op(MI->getOperand(3).getImm()) == ARM_AM::sub) ? 3:2;
+
+ case ARM::LDRSB_POST:
+ case ARM::LDRSH_POST: {
+ unsigned Rt = MI->getOperand(0).getReg();
+ unsigned Rm = MI->getOperand(3).getReg();
+ return (Rt == Rm) ? 4 : 3;
+ }
+
+ case ARM::LDR_PRE_REG:
+ case ARM::LDRB_PRE_REG: {
+ unsigned Rt = MI->getOperand(0).getReg();
+ unsigned Rm = MI->getOperand(3).getReg();
+ if (Rt == Rm)
+ return 3;
+ unsigned ShOpVal = MI->getOperand(4).getImm();
+ bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
+ unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
+ if (!isSub &&
+ (ShImm == 0 ||
+ ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
+ ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
+ return 2;
+ return 3;
+ }
+
+ case ARM::STR_PRE_REG:
+ case ARM::STRB_PRE_REG: {
+ unsigned ShOpVal = MI->getOperand(4).getImm();
+ bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
+ unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
+ if (!isSub &&
+ (ShImm == 0 ||
+ ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
+ ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
+ return 2;
+ return 3;
+ }
+
+ case ARM::LDRH_PRE:
+ case ARM::STRH_PRE: {
+ unsigned Rt = MI->getOperand(0).getReg();
+ unsigned Rm = MI->getOperand(3).getReg();
+ if (!Rm)
+ return 2;
+ if (Rt == Rm)
+ return 3;
+ return (ARM_AM::getAM3Op(MI->getOperand(4).getImm()) == ARM_AM::sub)
+ ? 3 : 2;
+ }
+
+ case ARM::LDR_POST_REG:
+ case ARM::LDRB_POST_REG:
+ case ARM::LDRH_POST: {
+ unsigned Rt = MI->getOperand(0).getReg();
+ unsigned Rm = MI->getOperand(3).getReg();
+ return (Rt == Rm) ? 3 : 2;
+ }
+
+ case ARM::LDR_PRE_IMM:
+ case ARM::LDRB_PRE_IMM:
+ case ARM::LDR_POST_IMM:
+ case ARM::LDRB_POST_IMM:
+ case ARM::STRB_POST_IMM:
+ case ARM::STRB_POST_REG:
+ case ARM::STRB_PRE_IMM:
+ case ARM::STRH_POST:
+ case ARM::STR_POST_IMM:
+ case ARM::STR_POST_REG:
+ case ARM::STR_PRE_IMM:
+ return 2;
+
+ case ARM::LDRSB_PRE:
+ case ARM::LDRSH_PRE: {
+ unsigned Rm = MI->getOperand(3).getReg();
+ if (Rm == 0)
+ return 3;
+ unsigned Rt = MI->getOperand(0).getReg();
+ if (Rt == Rm)
+ return 4;
+ unsigned ShOpVal = MI->getOperand(4).getImm();
+ bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
+ unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
+ if (!isSub &&
+ (ShImm == 0 ||
+ ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
+ ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
+ return 3;
+ return 4;
+ }
+
+ case ARM::LDRD: {
+ unsigned Rt = MI->getOperand(0).getReg();
+ unsigned Rn = MI->getOperand(2).getReg();
+ unsigned Rm = MI->getOperand(3).getReg();
+ if (Rm)
+ return (ARM_AM::getAM3Op(MI->getOperand(4).getImm()) == ARM_AM::sub) ?4:3;
+ return (Rt == Rn) ? 3 : 2;
+ }
+
+ case ARM::STRD: {
+ unsigned Rm = MI->getOperand(3).getReg();
+ if (Rm)
+ return (ARM_AM::getAM3Op(MI->getOperand(4).getImm()) == ARM_AM::sub) ?4:3;
+ return 2;
+ }
+
+ case ARM::LDRD_POST:
+ case ARM::t2LDRD_POST:
+ return 3;
+
+ case ARM::STRD_POST:
+ case ARM::t2STRD_POST:
+ return 4;
+
+ case ARM::LDRD_PRE: {
+ unsigned Rt = MI->getOperand(0).getReg();
+ unsigned Rn = MI->getOperand(3).getReg();
+ unsigned Rm = MI->getOperand(4).getReg();
+ if (Rm)
+ return (ARM_AM::getAM3Op(MI->getOperand(5).getImm()) == ARM_AM::sub) ?5:4;
+ return (Rt == Rn) ? 4 : 3;
+ }
+
+ case ARM::t2LDRD_PRE: {
+ unsigned Rt = MI->getOperand(0).getReg();
+ unsigned Rn = MI->getOperand(3).getReg();
+ return (Rt == Rn) ? 4 : 3;
+ }
+
+ case ARM::STRD_PRE: {
+ unsigned Rm = MI->getOperand(4).getReg();
+ if (Rm)
+ return (ARM_AM::getAM3Op(MI->getOperand(5).getImm()) == ARM_AM::sub) ?5:4;
+ return 3;
+ }
+
+ case ARM::t2STRD_PRE:
+ return 3;
+
+ case ARM::t2LDR_POST:
+ case ARM::t2LDRB_POST:
+ case ARM::t2LDRB_PRE:
+ case ARM::t2LDRSBi12:
+ case ARM::t2LDRSBi8:
+ case ARM::t2LDRSBpci:
+ case ARM::t2LDRSBs:
+ case ARM::t2LDRH_POST:
+ case ARM::t2LDRH_PRE:
+ case ARM::t2LDRSBT:
+ case ARM::t2LDRSB_POST:
+ case ARM::t2LDRSB_PRE:
+ case ARM::t2LDRSH_POST:
+ case ARM::t2LDRSH_PRE:
+ case ARM::t2LDRSHi12:
+ case ARM::t2LDRSHi8:
+ case ARM::t2LDRSHpci:
+ case ARM::t2LDRSHs:
+ return 2;
+
+ case ARM::t2LDRDi8: {
+ unsigned Rt = MI->getOperand(0).getReg();
+ unsigned Rn = MI->getOperand(2).getReg();
+ return (Rt == Rn) ? 3 : 2;
+ }
+
+ case ARM::t2STRB_POST:
+ case ARM::t2STRB_PRE:
+ case ARM::t2STRBs:
+ case ARM::t2STRDi8:
+ case ARM::t2STRH_POST:
+ case ARM::t2STRH_PRE:
+ case ARM::t2STRHs:
+ case ARM::t2STR_POST:
+ case ARM::t2STR_PRE:
+ case ARM::t2STRs:
+ return 2;
+ }
+}
+
+// Return the number of 32-bit words loaded by LDM or stored by STM. If this
+// can't be easily determined return 0 (missing MachineMemOperand).
+//
+// FIXME: The current MachineInstr design does not support relying on machine
+// mem operands to determine the width of a memory access. Instead, we expect
+// the target to provide this information based on the instruction opcode and
+// operands. However, using MachineMemOperand is the best solution now for
+// two reasons:
+//
+// 1) getNumMicroOps tries to infer LDM memory width from the total number of MI
+// operands. This is much more dangerous than using the MachineMemOperand
+// sizes because CodeGen passes can insert/remove optional machine operands. In
+// fact, it's totally incorrect for preRA passes and appears to be wrong for
+// postRA passes as well.
+//
+// 2) getNumLDMAddresses is only used by the scheduling machine model and any
+// machine model that calls this should handle the unknown (zero size) case.
+//
+// Long term, we should require a target hook that verifies MachineMemOperand
+// sizes during MC lowering. That target hook should be local to MC lowering
+// because we can't ensure that it is aware of other MI forms. Doing this will
+// ensure that MachineMemOperands are correctly propagated through all passes.
+unsigned ARMBaseInstrInfo::getNumLDMAddresses(const MachineInstr *MI) const {
+ unsigned Size = 0;
+ for (MachineInstr::mmo_iterator I = MI->memoperands_begin(),
+ E = MI->memoperands_end(); I != E; ++I) {
+ Size += (*I)->getSize();
+ }
+ return Size / 4;
+}
+
+unsigned
+ARMBaseInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData,
+ const MachineInstr *MI) const {
+ if (!ItinData || ItinData->isEmpty())
+ return 1;
+
+ const MCInstrDesc &Desc = MI->getDesc();
+ unsigned Class = Desc.getSchedClass();
+ int ItinUOps = ItinData->getNumMicroOps(Class);
+ if (ItinUOps >= 0) {
+ if (Subtarget.isSwift() && (Desc.mayLoad() || Desc.mayStore()))
+ return getNumMicroOpsSwiftLdSt(ItinData, MI);
+
+ return ItinUOps;
+ }
+
+ unsigned Opc = MI->getOpcode();
+ switch (Opc) {
+ default:
+ llvm_unreachable("Unexpected multi-uops instruction!");
+ case ARM::VLDMQIA:
+ case ARM::VSTMQIA:
+ return 2;
+
+ // The number of uOps for load / store multiple are determined by the number
+ // registers.
+ //
+ // On Cortex-A8, each pair of register loads / stores can be scheduled on the
+ // same cycle. The scheduling for the first load / store must be done
+ // separately by assuming the address is not 64-bit aligned.
+ //
+ // On Cortex-A9, the formula is simply (#reg / 2) + (#reg % 2). If the address
+ // is not 64-bit aligned, then AGU would take an extra cycle. For VFP / NEON
+ // load / store multiple, the formula is (#reg / 2) + (#reg % 2) + 1.
+ case ARM::VLDMDIA:
+ case ARM::VLDMDIA_UPD:
+ case ARM::VLDMDDB_UPD:
+ case ARM::VLDMSIA:
+ case ARM::VLDMSIA_UPD:
+ case ARM::VLDMSDB_UPD:
+ case ARM::VSTMDIA:
+ case ARM::VSTMDIA_UPD:
+ case ARM::VSTMDDB_UPD:
+ case ARM::VSTMSIA:
+ case ARM::VSTMSIA_UPD:
+ case ARM::VSTMSDB_UPD: {
+ unsigned NumRegs = MI->getNumOperands() - Desc.getNumOperands();
+ return (NumRegs / 2) + (NumRegs % 2) + 1;
+ }
+
+ case ARM::LDMIA_RET:
+ case ARM::LDMIA:
+ case ARM::LDMDA:
+ case ARM::LDMDB:
+ case ARM::LDMIB:
+ case ARM::LDMIA_UPD:
+ case ARM::LDMDA_UPD:
+ case ARM::LDMDB_UPD:
+ case ARM::LDMIB_UPD:
+ case ARM::STMIA:
+ case ARM::STMDA:
+ case ARM::STMDB:
+ case ARM::STMIB:
+ case ARM::STMIA_UPD:
+ case ARM::STMDA_UPD:
+ case ARM::STMDB_UPD:
+ case ARM::STMIB_UPD:
+ case ARM::tLDMIA:
+ case ARM::tLDMIA_UPD:
+ case ARM::tSTMIA_UPD:
+ case ARM::tPOP_RET:
+ case ARM::tPOP:
+ case ARM::tPUSH:
+ case ARM::t2LDMIA_RET:
+ case ARM::t2LDMIA:
+ case ARM::t2LDMDB:
+ case ARM::t2LDMIA_UPD:
+ case ARM::t2LDMDB_UPD:
+ case ARM::t2STMIA:
+ case ARM::t2STMDB:
+ case ARM::t2STMIA_UPD:
+ case ARM::t2STMDB_UPD: {
+ unsigned NumRegs = MI->getNumOperands() - Desc.getNumOperands() + 1;
+ if (Subtarget.isSwift()) {
+ int UOps = 1 + NumRegs; // One for address computation, one for each ld / st.
+ switch (Opc) {
+ default: break;
+ case ARM::VLDMDIA_UPD:
+ case ARM::VLDMDDB_UPD:
+ case ARM::VLDMSIA_UPD:
+ case ARM::VLDMSDB_UPD:
+ case ARM::VSTMDIA_UPD:
+ case ARM::VSTMDDB_UPD:
+ case ARM::VSTMSIA_UPD:
+ case ARM::VSTMSDB_UPD:
+ case ARM::LDMIA_UPD:
+ case ARM::LDMDA_UPD:
+ case ARM::LDMDB_UPD:
+ case ARM::LDMIB_UPD:
+ case ARM::STMIA_UPD:
+ case ARM::STMDA_UPD:
+ case ARM::STMDB_UPD:
+ case ARM::STMIB_UPD:
+ case ARM::tLDMIA_UPD:
+ case ARM::tSTMIA_UPD:
+ case ARM::t2LDMIA_UPD:
+ case ARM::t2LDMDB_UPD:
+ case ARM::t2STMIA_UPD:
+ case ARM::t2STMDB_UPD:
+ ++UOps; // One for base register writeback.
+ break;
+ case ARM::LDMIA_RET:
+ case ARM::tPOP_RET:
+ case ARM::t2LDMIA_RET:
+ UOps += 2; // One for base reg wb, one for write to pc.
+ break;
+ }
+ return UOps;
+ } else if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) {
+ if (NumRegs < 4)
+ return 2;
+ // 4 registers would be issued: 2, 2.
+ // 5 registers would be issued: 2, 2, 1.
+ int A8UOps = (NumRegs / 2);
+ if (NumRegs % 2)
+ ++A8UOps;
+ return A8UOps;
+ } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) {
+ int A9UOps = (NumRegs / 2);
+ // If there are odd number of registers or if it's not 64-bit aligned,
+ // then it takes an extra AGU (Address Generation Unit) cycle.
+ if ((NumRegs % 2) ||
+ !MI->hasOneMemOperand() ||
+ (*MI->memoperands_begin())->getAlignment() < 8)
+ ++A9UOps;
+ return A9UOps;
+ } else {
+ // Assume the worst.
+ return NumRegs;
+ }
+ }
+ }
+}
+
+int
+ARMBaseInstrInfo::getVLDMDefCycle(const InstrItineraryData *ItinData,
+ const MCInstrDesc &DefMCID,
+ unsigned DefClass,
+ unsigned DefIdx, unsigned DefAlign) const {
+ int RegNo = (int)(DefIdx+1) - DefMCID.getNumOperands() + 1;
+ if (RegNo <= 0)
+ // Def is the address writeback.
+ return ItinData->getOperandCycle(DefClass, DefIdx);
+
+ int DefCycle;
+ if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) {
+ // (regno / 2) + (regno % 2) + 1
+ DefCycle = RegNo / 2 + 1;
+ if (RegNo % 2)
+ ++DefCycle;
+ } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) {
+ DefCycle = RegNo;
+ bool isSLoad = false;
+
+ switch (DefMCID.getOpcode()) {
+ default: break;
+ case ARM::VLDMSIA:
+ case ARM::VLDMSIA_UPD:
+ case ARM::VLDMSDB_UPD:
+ isSLoad = true;
+ break;
+ }
+
+ // If there are odd number of 'S' registers or if it's not 64-bit aligned,
+ // then it takes an extra cycle.
+ if ((isSLoad && (RegNo % 2)) || DefAlign < 8)
+ ++DefCycle;
+ } else {
+ // Assume the worst.
+ DefCycle = RegNo + 2;
+ }
+
+ return DefCycle;
+}
+
+int
+ARMBaseInstrInfo::getLDMDefCycle(const InstrItineraryData *ItinData,
+ const MCInstrDesc &DefMCID,
+ unsigned DefClass,
+ unsigned DefIdx, unsigned DefAlign) const {
+ int RegNo = (int)(DefIdx+1) - DefMCID.getNumOperands() + 1;
+ if (RegNo <= 0)
+ // Def is the address writeback.
+ return ItinData->getOperandCycle(DefClass, DefIdx);
+
+ int DefCycle;
+ if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) {
+ // 4 registers would be issued: 1, 2, 1.
+ // 5 registers would be issued: 1, 2, 2.
+ DefCycle = RegNo / 2;
+ if (DefCycle < 1)
+ DefCycle = 1;
+ // Result latency is issue cycle + 2: E2.
+ DefCycle += 2;
+ } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) {
+ DefCycle = (RegNo / 2);
+ // If there are odd number of registers or if it's not 64-bit aligned,
+ // then it takes an extra AGU (Address Generation Unit) cycle.
+ if ((RegNo % 2) || DefAlign < 8)
+ ++DefCycle;
+ // Result latency is AGU cycles + 2.
+ DefCycle += 2;
+ } else {
+ // Assume the worst.
+ DefCycle = RegNo + 2;
+ }
+
+ return DefCycle;
+}
+
+int
+ARMBaseInstrInfo::getVSTMUseCycle(const InstrItineraryData *ItinData,
+ const MCInstrDesc &UseMCID,
+ unsigned UseClass,
+ unsigned UseIdx, unsigned UseAlign) const {
+ int RegNo = (int)(UseIdx+1) - UseMCID.getNumOperands() + 1;
+ if (RegNo <= 0)
+ return ItinData->getOperandCycle(UseClass, UseIdx);
+
+ int UseCycle;
+ if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) {
+ // (regno / 2) + (regno % 2) + 1
+ UseCycle = RegNo / 2 + 1;
+ if (RegNo % 2)
+ ++UseCycle;
+ } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) {
+ UseCycle = RegNo;
+ bool isSStore = false;
+
+ switch (UseMCID.getOpcode()) {
+ default: break;
+ case ARM::VSTMSIA:
+ case ARM::VSTMSIA_UPD:
+ case ARM::VSTMSDB_UPD:
+ isSStore = true;
+ break;
+ }
+
+ // If there are odd number of 'S' registers or if it's not 64-bit aligned,
+ // then it takes an extra cycle.
+ if ((isSStore && (RegNo % 2)) || UseAlign < 8)
+ ++UseCycle;
+ } else {
+ // Assume the worst.
+ UseCycle = RegNo + 2;
+ }
+
+ return UseCycle;
+}
+
+int
+ARMBaseInstrInfo::getSTMUseCycle(const InstrItineraryData *ItinData,
+ const MCInstrDesc &UseMCID,
+ unsigned UseClass,
+ unsigned UseIdx, unsigned UseAlign) const {
+ int RegNo = (int)(UseIdx+1) - UseMCID.getNumOperands() + 1;
+ if (RegNo <= 0)
+ return ItinData->getOperandCycle(UseClass, UseIdx);
+
+ int UseCycle;
+ if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) {
+ UseCycle = RegNo / 2;
+ if (UseCycle < 2)
+ UseCycle = 2;
+ // Read in E3.
+ UseCycle += 2;
+ } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) {
+ UseCycle = (RegNo / 2);
+ // If there are odd number of registers or if it's not 64-bit aligned,
+ // then it takes an extra AGU (Address Generation Unit) cycle.
+ if ((RegNo % 2) || UseAlign < 8)
+ ++UseCycle;
+ } else {
+ // Assume the worst.
+ UseCycle = 1;
+ }
+ return UseCycle;
+}
+
+int
+ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
+ const MCInstrDesc &DefMCID,
+ unsigned DefIdx, unsigned DefAlign,
+ const MCInstrDesc &UseMCID,
+ unsigned UseIdx, unsigned UseAlign) const {
+ unsigned DefClass = DefMCID.getSchedClass();
+ unsigned UseClass = UseMCID.getSchedClass();
+
+ if (DefIdx < DefMCID.getNumDefs() && UseIdx < UseMCID.getNumOperands())
+ return ItinData->getOperandLatency(DefClass, DefIdx, UseClass, UseIdx);
+
+ // This may be a def / use of a variable_ops instruction, the operand
+ // latency might be determinable dynamically. Let the target try to
+ // figure it out.
+ int DefCycle = -1;
+ bool LdmBypass = false;
+ switch (DefMCID.getOpcode()) {
+ default:
+ DefCycle = ItinData->getOperandCycle(DefClass, DefIdx);
+ break;
+
+ case ARM::VLDMDIA:
+ case ARM::VLDMDIA_UPD:
+ case ARM::VLDMDDB_UPD:
+ case ARM::VLDMSIA:
+ case ARM::VLDMSIA_UPD:
+ case ARM::VLDMSDB_UPD:
+ DefCycle = getVLDMDefCycle(ItinData, DefMCID, DefClass, DefIdx, DefAlign);
+ break;
+
+ case ARM::LDMIA_RET:
+ case ARM::LDMIA:
+ case ARM::LDMDA:
+ case ARM::LDMDB:
+ case ARM::LDMIB:
+ case ARM::LDMIA_UPD:
+ case ARM::LDMDA_UPD:
+ case ARM::LDMDB_UPD:
+ case ARM::LDMIB_UPD:
+ case ARM::tLDMIA:
+ case ARM::tLDMIA_UPD:
+ case ARM::tPUSH:
+ case ARM::t2LDMIA_RET:
+ case ARM::t2LDMIA:
+ case ARM::t2LDMDB:
+ case ARM::t2LDMIA_UPD:
+ case ARM::t2LDMDB_UPD:
+ LdmBypass = 1;
+ DefCycle = getLDMDefCycle(ItinData, DefMCID, DefClass, DefIdx, DefAlign);
+ break;
+ }
+
+ if (DefCycle == -1)
+ // We can't seem to determine the result latency of the def, assume it's 2.
+ DefCycle = 2;
+
+ int UseCycle = -1;
+ switch (UseMCID.getOpcode()) {
+ default:
+ UseCycle = ItinData->getOperandCycle(UseClass, UseIdx);
+ break;
+
+ case ARM::VSTMDIA:
+ case ARM::VSTMDIA_UPD:
+ case ARM::VSTMDDB_UPD:
+ case ARM::VSTMSIA:
+ case ARM::VSTMSIA_UPD:
+ case ARM::VSTMSDB_UPD:
+ UseCycle = getVSTMUseCycle(ItinData, UseMCID, UseClass, UseIdx, UseAlign);
+ break;
+
+ case ARM::STMIA:
+ case ARM::STMDA:
+ case ARM::STMDB:
+ case ARM::STMIB:
+ case ARM::STMIA_UPD:
+ case ARM::STMDA_UPD:
+ case ARM::STMDB_UPD:
+ case ARM::STMIB_UPD:
+ case ARM::tSTMIA_UPD:
+ case ARM::tPOP_RET:
+ case ARM::tPOP:
+ case ARM::t2STMIA:
+ case ARM::t2STMDB:
+ case ARM::t2STMIA_UPD:
+ case ARM::t2STMDB_UPD:
+ UseCycle = getSTMUseCycle(ItinData, UseMCID, UseClass, UseIdx, UseAlign);
+ break;
+ }
+
+ if (UseCycle == -1)
+ // Assume it's read in the first stage.
+ UseCycle = 1;
+
+ UseCycle = DefCycle - UseCycle + 1;
+ if (UseCycle > 0) {
+ if (LdmBypass) {
+ // It's a variable_ops instruction so we can't use DefIdx here. Just use
+ // first def operand.
+ if (ItinData->hasPipelineForwarding(DefClass, DefMCID.getNumOperands()-1,
+ UseClass, UseIdx))
+ --UseCycle;
+ } else if (ItinData->hasPipelineForwarding(DefClass, DefIdx,
+ UseClass, UseIdx)) {
+ --UseCycle;
+ }
+ }
+
+ return UseCycle;
+}
+
+static const MachineInstr *getBundledDefMI(const TargetRegisterInfo *TRI,
+ const MachineInstr *MI, unsigned Reg,
+ unsigned &DefIdx, unsigned &Dist) {
+ Dist = 0;
+
+ MachineBasicBlock::const_iterator I = MI; ++I;
+ MachineBasicBlock::const_instr_iterator II = std::prev(I.getInstrIterator());
+ assert(II->isInsideBundle() && "Empty bundle?");
+
+ int Idx = -1;
+ while (II->isInsideBundle()) {
+ Idx = II->findRegisterDefOperandIdx(Reg, false, true, TRI);
+ if (Idx != -1)
+ break;
+ --II;
+ ++Dist;
+ }
+
+ assert(Idx != -1 && "Cannot find bundled definition!");
+ DefIdx = Idx;
+ return &*II;
+}
+
+static const MachineInstr *getBundledUseMI(const TargetRegisterInfo *TRI,
+ const MachineInstr *MI, unsigned Reg,
+ unsigned &UseIdx, unsigned &Dist) {
+ Dist = 0;
+
+ MachineBasicBlock::const_instr_iterator II = ++MI->getIterator();
+ assert(II->isInsideBundle() && "Empty bundle?");
+ MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end();
+
+ // FIXME: This doesn't properly handle multiple uses.
+ int Idx = -1;
+ while (II != E && II->isInsideBundle()) {
+ Idx = II->findRegisterUseOperandIdx(Reg, false, TRI);
+ if (Idx != -1)
+ break;
+ if (II->getOpcode() != ARM::t2IT)
+ ++Dist;
+ ++II;
+ }
+
+ if (Idx == -1) {
+ Dist = 0;
+ return nullptr;
+ }
+
+ UseIdx = Idx;
+ return &*II;
+}
+
+/// Return the number of cycles to add to (or subtract from) the static
+/// itinerary based on the def opcode and alignment. The caller will ensure that
+/// adjusted latency is at least one cycle.
+static int adjustDefLatency(const ARMSubtarget &Subtarget,
+ const MachineInstr *DefMI,
+ const MCInstrDesc *DefMCID, unsigned DefAlign) {
+ int Adjust = 0;
+ if (Subtarget.isCortexA8() || Subtarget.isLikeA9() || Subtarget.isCortexA7()) {
+ // FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2]
+ // variants are one cycle cheaper.
+ switch (DefMCID->getOpcode()) {
+ default: break;
+ case ARM::LDRrs:
+ case ARM::LDRBrs: {
+ unsigned ShOpVal = DefMI->getOperand(3).getImm();
+ unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
+ if (ShImm == 0 ||
+ (ShImm == 2 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))
+ --Adjust;
+ break;
+ }
+ case ARM::t2LDRs:
+ case ARM::t2LDRBs:
+ case ARM::t2LDRHs:
+ case ARM::t2LDRSHs: {
+ // Thumb2 mode: lsl only.
+ unsigned ShAmt = DefMI->getOperand(3).getImm();
+ if (ShAmt == 0 || ShAmt == 2)
+ --Adjust;
+ break;
+ }
+ }
+ } else if (Subtarget.isSwift()) {
+ // FIXME: Properly handle all of the latency adjustments for address
+ // writeback.
+ switch (DefMCID->getOpcode()) {
+ default: break;
+ case ARM::LDRrs:
+ case ARM::LDRBrs: {
+ unsigned ShOpVal = DefMI->getOperand(3).getImm();
+ bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
+ unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
+ if (!isSub &&
+ (ShImm == 0 ||
+ ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
+ ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
+ Adjust -= 2;
+ else if (!isSub &&
+ ShImm == 1 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsr)
+ --Adjust;
+ break;
+ }
+ case ARM::t2LDRs:
+ case ARM::t2LDRBs:
+ case ARM::t2LDRHs:
+ case ARM::t2LDRSHs: {
+ // Thumb2 mode: lsl only.
+ unsigned ShAmt = DefMI->getOperand(3).getImm();
+ if (ShAmt == 0 || ShAmt == 1 || ShAmt == 2 || ShAmt == 3)
+ Adjust -= 2;
+ break;
+ }
+ }
+ }
+
+ if (DefAlign < 8 && Subtarget.isLikeA9()) {
+ switch (DefMCID->getOpcode()) {
+ default: break;
+ case ARM::VLD1q8:
+ case ARM::VLD1q16:
+ case ARM::VLD1q32:
+ case ARM::VLD1q64:
+ case ARM::VLD1q8wb_fixed:
+ case ARM::VLD1q16wb_fixed:
+ case ARM::VLD1q32wb_fixed:
+ case ARM::VLD1q64wb_fixed:
+ case ARM::VLD1q8wb_register:
+ case ARM::VLD1q16wb_register:
+ case ARM::VLD1q32wb_register:
+ case ARM::VLD1q64wb_register:
+ case ARM::VLD2d8:
+ case ARM::VLD2d16:
+ case ARM::VLD2d32:
+ case ARM::VLD2q8:
+ case ARM::VLD2q16:
+ case ARM::VLD2q32:
+ case ARM::VLD2d8wb_fixed:
+ case ARM::VLD2d16wb_fixed:
+ case ARM::VLD2d32wb_fixed:
+ case ARM::VLD2q8wb_fixed:
+ case ARM::VLD2q16wb_fixed:
+ case ARM::VLD2q32wb_fixed:
+ case ARM::VLD2d8wb_register:
+ case ARM::VLD2d16wb_register:
+ case ARM::VLD2d32wb_register:
+ case ARM::VLD2q8wb_register:
+ case ARM::VLD2q16wb_register:
+ case ARM::VLD2q32wb_register:
+ case ARM::VLD3d8:
+ case ARM::VLD3d16:
+ case ARM::VLD3d32:
+ case ARM::VLD1d64T:
+ case ARM::VLD3d8_UPD:
+ case ARM::VLD3d16_UPD:
+ case ARM::VLD3d32_UPD:
+ case ARM::VLD1d64Twb_fixed:
+ case ARM::VLD1d64Twb_register:
+ case ARM::VLD3q8_UPD:
+ case ARM::VLD3q16_UPD:
+ case ARM::VLD3q32_UPD:
+ case ARM::VLD4d8:
+ case ARM::VLD4d16:
+ case ARM::VLD4d32:
+ case ARM::VLD1d64Q:
+ case ARM::VLD4d8_UPD:
+ case ARM::VLD4d16_UPD:
+ case ARM::VLD4d32_UPD:
+ case ARM::VLD1d64Qwb_fixed:
+ case ARM::VLD1d64Qwb_register:
+ case ARM::VLD4q8_UPD:
+ case ARM::VLD4q16_UPD:
+ case ARM::VLD4q32_UPD:
+ case ARM::VLD1DUPq8:
+ case ARM::VLD1DUPq16:
+ case ARM::VLD1DUPq32:
+ case ARM::VLD1DUPq8wb_fixed:
+ case ARM::VLD1DUPq16wb_fixed:
+ case ARM::VLD1DUPq32wb_fixed:
+ case ARM::VLD1DUPq8wb_register:
+ case ARM::VLD1DUPq16wb_register:
+ case ARM::VLD1DUPq32wb_register:
+ case ARM::VLD2DUPd8:
+ case ARM::VLD2DUPd16:
+ case ARM::VLD2DUPd32:
+ case ARM::VLD2DUPd8wb_fixed:
+ case ARM::VLD2DUPd16wb_fixed:
+ case ARM::VLD2DUPd32wb_fixed:
+ case ARM::VLD2DUPd8wb_register:
+ case ARM::VLD2DUPd16wb_register:
+ case ARM::VLD2DUPd32wb_register:
+ case ARM::VLD4DUPd8:
+ case ARM::VLD4DUPd16:
+ case ARM::VLD4DUPd32:
+ case ARM::VLD4DUPd8_UPD:
+ case ARM::VLD4DUPd16_UPD:
+ case ARM::VLD4DUPd32_UPD:
+ case ARM::VLD1LNd8:
+ case ARM::VLD1LNd16:
+ case ARM::VLD1LNd32:
+ case ARM::VLD1LNd8_UPD:
+ case ARM::VLD1LNd16_UPD:
+ case ARM::VLD1LNd32_UPD:
+ case ARM::VLD2LNd8:
+ case ARM::VLD2LNd16:
+ case ARM::VLD2LNd32:
+ case ARM::VLD2LNq16:
+ case ARM::VLD2LNq32:
+ case ARM::VLD2LNd8_UPD:
+ case ARM::VLD2LNd16_UPD:
+ case ARM::VLD2LNd32_UPD:
+ case ARM::VLD2LNq16_UPD:
+ case ARM::VLD2LNq32_UPD:
+ case ARM::VLD4LNd8:
+ case ARM::VLD4LNd16:
+ case ARM::VLD4LNd32:
+ case ARM::VLD4LNq16:
+ case ARM::VLD4LNq32:
+ case ARM::VLD4LNd8_UPD:
+ case ARM::VLD4LNd16_UPD:
+ case ARM::VLD4LNd32_UPD:
+ case ARM::VLD4LNq16_UPD:
+ case ARM::VLD4LNq32_UPD:
+ // If the address is not 64-bit aligned, the latencies of these
+ // instructions increases by one.
+ ++Adjust;
+ break;
+ }
+ }
+ return Adjust;
+}
+
+
+
+int
+ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
+ const MachineInstr *DefMI, unsigned DefIdx,
+ const MachineInstr *UseMI,
+ unsigned UseIdx) const {
+ // No operand latency. The caller may fall back to getInstrLatency.
+ if (!ItinData || ItinData->isEmpty())
+ return -1;
+
+ const MachineOperand &DefMO = DefMI->getOperand(DefIdx);
+ unsigned Reg = DefMO.getReg();
+ const MCInstrDesc *DefMCID = &DefMI->getDesc();
+ const MCInstrDesc *UseMCID = &UseMI->getDesc();
+
+ unsigned DefAdj = 0;
+ if (DefMI->isBundle()) {
+ DefMI = getBundledDefMI(&getRegisterInfo(), DefMI, Reg, DefIdx, DefAdj);
+ DefMCID = &DefMI->getDesc();
+ }
+ if (DefMI->isCopyLike() || DefMI->isInsertSubreg() ||
+ DefMI->isRegSequence() || DefMI->isImplicitDef()) {
+ return 1;
+ }
+
+ unsigned UseAdj = 0;
+ if (UseMI->isBundle()) {
+ unsigned NewUseIdx;
+ const MachineInstr *NewUseMI = getBundledUseMI(&getRegisterInfo(), UseMI,
+ Reg, NewUseIdx, UseAdj);
+ if (!NewUseMI)
+ return -1;
+
+ UseMI = NewUseMI;
+ UseIdx = NewUseIdx;
+ UseMCID = &UseMI->getDesc();
+ }
+
+ if (Reg == ARM::CPSR) {
+ if (DefMI->getOpcode() == ARM::FMSTAT) {
+ // fpscr -> cpsr stalls over 20 cycles on A8 (and earlier?)
+ return Subtarget.isLikeA9() ? 1 : 20;
+ }
+
+ // CPSR set and branch can be paired in the same cycle.
+ if (UseMI->isBranch())
+ return 0;
+
+ // Otherwise it takes the instruction latency (generally one).
+ unsigned Latency = getInstrLatency(ItinData, DefMI);
+
+ // For Thumb2 and -Os, prefer scheduling CPSR setting instruction close to
+ // its uses. Instructions which are otherwise scheduled between them may
+ // incur a code size penalty (not able to use the CPSR setting 16-bit
+ // instructions).
+ if (Latency > 0 && Subtarget.isThumb2()) {
+ const MachineFunction *MF = DefMI->getParent()->getParent();
+ // FIXME: Use Function::optForSize().
+ if (MF->getFunction()->hasFnAttribute(Attribute::OptimizeForSize))
+ --Latency;
+ }
+ return Latency;
+ }
+
+ if (DefMO.isImplicit() || UseMI->getOperand(UseIdx).isImplicit())
+ return -1;
+
+ unsigned DefAlign = DefMI->hasOneMemOperand()
+ ? (*DefMI->memoperands_begin())->getAlignment() : 0;
+ unsigned UseAlign = UseMI->hasOneMemOperand()
+ ? (*UseMI->memoperands_begin())->getAlignment() : 0;
+
+ // Get the itinerary's latency if possible, and handle variable_ops.
+ int Latency = getOperandLatency(ItinData, *DefMCID, DefIdx, DefAlign,
+ *UseMCID, UseIdx, UseAlign);
+ // Unable to find operand latency. The caller may resort to getInstrLatency.
+ if (Latency < 0)
+ return Latency;
+
+ // Adjust for IT block position.
+ int Adj = DefAdj + UseAdj;
+
+ // Adjust for dynamic def-side opcode variants not captured by the itinerary.
+ Adj += adjustDefLatency(Subtarget, DefMI, DefMCID, DefAlign);
+ if (Adj >= 0 || (int)Latency > -Adj) {
+ return Latency + Adj;
+ }
+ // Return the itinerary latency, which may be zero but not less than zero.
+ return Latency;
+}
+
+int
+ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
+ SDNode *DefNode, unsigned DefIdx,
+ SDNode *UseNode, unsigned UseIdx) const {
+ if (!DefNode->isMachineOpcode())
+ return 1;
+
+ const MCInstrDesc &DefMCID = get(DefNode->getMachineOpcode());
+
+ if (isZeroCost(DefMCID.Opcode))
+ return 0;
+
+ if (!ItinData || ItinData->isEmpty())
+ return DefMCID.mayLoad() ? 3 : 1;
+
+ if (!UseNode->isMachineOpcode()) {
+ int Latency = ItinData->getOperandCycle(DefMCID.getSchedClass(), DefIdx);
+ if (Subtarget.isLikeA9() || Subtarget.isSwift())
+ return Latency <= 2 ? 1 : Latency - 1;
+ else
+ return Latency <= 3 ? 1 : Latency - 2;
+ }
+
+ const MCInstrDesc &UseMCID = get(UseNode->getMachineOpcode());
+ const MachineSDNode *DefMN = dyn_cast<MachineSDNode>(DefNode);
+ unsigned DefAlign = !DefMN->memoperands_empty()
+ ? (*DefMN->memoperands_begin())->getAlignment() : 0;
+ const MachineSDNode *UseMN = dyn_cast<MachineSDNode>(UseNode);
+ unsigned UseAlign = !UseMN->memoperands_empty()
+ ? (*UseMN->memoperands_begin())->getAlignment() : 0;
+ int Latency = getOperandLatency(ItinData, DefMCID, DefIdx, DefAlign,
+ UseMCID, UseIdx, UseAlign);
+
+ if (Latency > 1 &&
+ (Subtarget.isCortexA8() || Subtarget.isLikeA9() ||
+ Subtarget.isCortexA7())) {
+ // FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2]
+ // variants are one cycle cheaper.
+ switch (DefMCID.getOpcode()) {
+ default: break;
+ case ARM::LDRrs:
+ case ARM::LDRBrs: {
+ unsigned ShOpVal =
+ cast<ConstantSDNode>(DefNode->getOperand(2))->getZExtValue();
+ unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
+ if (ShImm == 0 ||
+ (ShImm == 2 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))
+ --Latency;
+ break;
+ }
+ case ARM::t2LDRs:
+ case ARM::t2LDRBs:
+ case ARM::t2LDRHs:
+ case ARM::t2LDRSHs: {
+ // Thumb2 mode: lsl only.
+ unsigned ShAmt =
+ cast<ConstantSDNode>(DefNode->getOperand(2))->getZExtValue();
+ if (ShAmt == 0 || ShAmt == 2)
+ --Latency;
+ break;
+ }
+ }
+ } else if (DefIdx == 0 && Latency > 2 && Subtarget.isSwift()) {
+ // FIXME: Properly handle all of the latency adjustments for address
+ // writeback.
+ switch (DefMCID.getOpcode()) {
+ default: break;
+ case ARM::LDRrs:
+ case ARM::LDRBrs: {
+ unsigned ShOpVal =
+ cast<ConstantSDNode>(DefNode->getOperand(2))->getZExtValue();
+ unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
+ if (ShImm == 0 ||
+ ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
+ ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))
+ Latency -= 2;
+ else if (ShImm == 1 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsr)
+ --Latency;
+ break;
+ }
+ case ARM::t2LDRs:
+ case ARM::t2LDRBs:
+ case ARM::t2LDRHs:
+ case ARM::t2LDRSHs: {
+ // Thumb2 mode: lsl 0-3 only.
+ Latency -= 2;
+ break;
+ }
+ }
+ }
+
+ if (DefAlign < 8 && Subtarget.isLikeA9())
+ switch (DefMCID.getOpcode()) {
+ default: break;
+ case ARM::VLD1q8:
+ case ARM::VLD1q16:
+ case ARM::VLD1q32:
+ case ARM::VLD1q64:
+ case ARM::VLD1q8wb_register:
+ case ARM::VLD1q16wb_register:
+ case ARM::VLD1q32wb_register:
+ case ARM::VLD1q64wb_register:
+ case ARM::VLD1q8wb_fixed:
+ case ARM::VLD1q16wb_fixed:
+ case ARM::VLD1q32wb_fixed:
+ case ARM::VLD1q64wb_fixed:
+ case ARM::VLD2d8:
+ case ARM::VLD2d16:
+ case ARM::VLD2d32:
+ case ARM::VLD2q8Pseudo:
+ case ARM::VLD2q16Pseudo:
+ case ARM::VLD2q32Pseudo:
+ case ARM::VLD2d8wb_fixed:
+ case ARM::VLD2d16wb_fixed:
+ case ARM::VLD2d32wb_fixed:
+ case ARM::VLD2q8PseudoWB_fixed:
+ case ARM::VLD2q16PseudoWB_fixed:
+ case ARM::VLD2q32PseudoWB_fixed:
+ case ARM::VLD2d8wb_register:
+ case ARM::VLD2d16wb_register:
+ case ARM::VLD2d32wb_register:
+ case ARM::VLD2q8PseudoWB_register:
+ case ARM::VLD2q16PseudoWB_register:
+ case ARM::VLD2q32PseudoWB_register:
+ case ARM::VLD3d8Pseudo:
+ case ARM::VLD3d16Pseudo:
+ case ARM::VLD3d32Pseudo:
+ case ARM::VLD1d64TPseudo:
+ case ARM::VLD1d64TPseudoWB_fixed:
+ case ARM::VLD3d8Pseudo_UPD:
+ case ARM::VLD3d16Pseudo_UPD:
+ case ARM::VLD3d32Pseudo_UPD:
+ case ARM::VLD3q8Pseudo_UPD:
+ case ARM::VLD3q16Pseudo_UPD:
+ case ARM::VLD3q32Pseudo_UPD:
+ case ARM::VLD3q8oddPseudo:
+ case ARM::VLD3q16oddPseudo:
+ case ARM::VLD3q32oddPseudo:
+ case ARM::VLD3q8oddPseudo_UPD:
+ case ARM::VLD3q16oddPseudo_UPD:
+ case ARM::VLD3q32oddPseudo_UPD:
+ case ARM::VLD4d8Pseudo:
+ case ARM::VLD4d16Pseudo:
+ case ARM::VLD4d32Pseudo:
+ case ARM::VLD1d64QPseudo:
+ case ARM::VLD1d64QPseudoWB_fixed:
+ case ARM::VLD4d8Pseudo_UPD:
+ case ARM::VLD4d16Pseudo_UPD:
+ case ARM::VLD4d32Pseudo_UPD:
+ case ARM::VLD4q8Pseudo_UPD:
+ case ARM::VLD4q16Pseudo_UPD:
+ case ARM::VLD4q32Pseudo_UPD:
+ case ARM::VLD4q8oddPseudo:
+ case ARM::VLD4q16oddPseudo:
+ case ARM::VLD4q32oddPseudo:
+ case ARM::VLD4q8oddPseudo_UPD:
+ case ARM::VLD4q16oddPseudo_UPD:
+ case ARM::VLD4q32oddPseudo_UPD:
+ case ARM::VLD1DUPq8:
+ case ARM::VLD1DUPq16:
+ case ARM::VLD1DUPq32:
+ case ARM::VLD1DUPq8wb_fixed:
+ case ARM::VLD1DUPq16wb_fixed:
+ case ARM::VLD1DUPq32wb_fixed:
+ case ARM::VLD1DUPq8wb_register:
+ case ARM::VLD1DUPq16wb_register:
+ case ARM::VLD1DUPq32wb_register:
+ case ARM::VLD2DUPd8:
+ case ARM::VLD2DUPd16:
+ case ARM::VLD2DUPd32:
+ case ARM::VLD2DUPd8wb_fixed:
+ case ARM::VLD2DUPd16wb_fixed:
+ case ARM::VLD2DUPd32wb_fixed:
+ case ARM::VLD2DUPd8wb_register:
+ case ARM::VLD2DUPd16wb_register:
+ case ARM::VLD2DUPd32wb_register:
+ case ARM::VLD4DUPd8Pseudo:
+ case ARM::VLD4DUPd16Pseudo:
+ case ARM::VLD4DUPd32Pseudo:
+ case ARM::VLD4DUPd8Pseudo_UPD:
+ case ARM::VLD4DUPd16Pseudo_UPD:
+ case ARM::VLD4DUPd32Pseudo_UPD:
+ case ARM::VLD1LNq8Pseudo:
+ case ARM::VLD1LNq16Pseudo:
+ case ARM::VLD1LNq32Pseudo:
+ case ARM::VLD1LNq8Pseudo_UPD:
+ case ARM::VLD1LNq16Pseudo_UPD:
+ case ARM::VLD1LNq32Pseudo_UPD:
+ case ARM::VLD2LNd8Pseudo:
+ case ARM::VLD2LNd16Pseudo:
+ case ARM::VLD2LNd32Pseudo:
+ case ARM::VLD2LNq16Pseudo:
+ case ARM::VLD2LNq32Pseudo:
+ case ARM::VLD2LNd8Pseudo_UPD:
+ case ARM::VLD2LNd16Pseudo_UPD:
+ case ARM::VLD2LNd32Pseudo_UPD:
+ case ARM::VLD2LNq16Pseudo_UPD:
+ case ARM::VLD2LNq32Pseudo_UPD:
+ case ARM::VLD4LNd8Pseudo:
+ case ARM::VLD4LNd16Pseudo:
+ case ARM::VLD4LNd32Pseudo:
+ case ARM::VLD4LNq16Pseudo:
+ case ARM::VLD4LNq32Pseudo:
+ case ARM::VLD4LNd8Pseudo_UPD:
+ case ARM::VLD4LNd16Pseudo_UPD:
+ case ARM::VLD4LNd32Pseudo_UPD:
+ case ARM::VLD4LNq16Pseudo_UPD:
+ case ARM::VLD4LNq32Pseudo_UPD:
+ // If the address is not 64-bit aligned, the latencies of these
+ // instructions increases by one.
+ ++Latency;
+ break;
+ }
+
+ return Latency;
+}
+
+unsigned ARMBaseInstrInfo::getPredicationCost(const MachineInstr *MI) const {
+ if (MI->isCopyLike() || MI->isInsertSubreg() ||
+ MI->isRegSequence() || MI->isImplicitDef())
+ return 0;
+
+ if (MI->isBundle())
+ return 0;
+
+ const MCInstrDesc &MCID = MI->getDesc();
+
+ if (MCID.isCall() || MCID.hasImplicitDefOfPhysReg(ARM::CPSR)) {
+ // When predicated, CPSR is an additional source operand for CPSR updating
+ // instructions, this apparently increases their latencies.
+ return 1;
+ }
+ return 0;
+}
+
+unsigned ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
+ const MachineInstr *MI,
+ unsigned *PredCost) const {
+ if (MI->isCopyLike() || MI->isInsertSubreg() ||
+ MI->isRegSequence() || MI->isImplicitDef())
+ return 1;
+
+ // An instruction scheduler typically runs on unbundled instructions, however
+ // other passes may query the latency of a bundled instruction.
+ if (MI->isBundle()) {
+ unsigned Latency = 0;
+ MachineBasicBlock::const_instr_iterator I = MI->getIterator();
+ MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end();
+ while (++I != E && I->isInsideBundle()) {
+ if (I->getOpcode() != ARM::t2IT)
+ Latency += getInstrLatency(ItinData, &*I, PredCost);
+ }
+ return Latency;
+ }
+
+ const MCInstrDesc &MCID = MI->getDesc();
+ if (PredCost && (MCID.isCall() || MCID.hasImplicitDefOfPhysReg(ARM::CPSR))) {
+ // When predicated, CPSR is an additional source operand for CPSR updating
+ // instructions, this apparently increases their latencies.
+ *PredCost = 1;
+ }
+ // Be sure to call getStageLatency for an empty itinerary in case it has a
+ // valid MinLatency property.
+ if (!ItinData)
+ return MI->mayLoad() ? 3 : 1;
+
+ unsigned Class = MCID.getSchedClass();
+
+ // For instructions with variable uops, use uops as latency.
+ if (!ItinData->isEmpty() && ItinData->getNumMicroOps(Class) < 0)
+ return getNumMicroOps(ItinData, MI);
+
+ // For the common case, fall back on the itinerary's latency.
+ unsigned Latency = ItinData->getStageLatency(Class);
+
+ // Adjust for dynamic def-side opcode variants not captured by the itinerary.
+ unsigned DefAlign = MI->hasOneMemOperand()
+ ? (*MI->memoperands_begin())->getAlignment() : 0;
+ int Adj = adjustDefLatency(Subtarget, MI, &MCID, DefAlign);
+ if (Adj >= 0 || (int)Latency > -Adj) {
+ return Latency + Adj;
+ }
+ return Latency;
+}
+
+int ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
+ SDNode *Node) const {
+ if (!Node->isMachineOpcode())
+ return 1;
+
+ if (!ItinData || ItinData->isEmpty())
+ return 1;
+
+ unsigned Opcode = Node->getMachineOpcode();
+ switch (Opcode) {
+ default:
+ return ItinData->getStageLatency(get(Opcode).getSchedClass());
+ case ARM::VLDMQIA:
+ case ARM::VSTMQIA:
+ return 2;
+ }
+}
+
+bool ARMBaseInstrInfo::
+hasHighOperandLatency(const TargetSchedModel &SchedModel,
+ const MachineRegisterInfo *MRI,
+ const MachineInstr *DefMI, unsigned DefIdx,
+ const MachineInstr *UseMI, unsigned UseIdx) const {
+ unsigned DDomain = DefMI->getDesc().TSFlags & ARMII::DomainMask;
+ unsigned UDomain = UseMI->getDesc().TSFlags & ARMII::DomainMask;
+ if (Subtarget.isCortexA8() &&
+ (DDomain == ARMII::DomainVFP || UDomain == ARMII::DomainVFP))
+ // CortexA8 VFP instructions are not pipelined.
+ return true;
+
+ // Hoist VFP / NEON instructions with 4 or higher latency.
+ unsigned Latency
+ = SchedModel.computeOperandLatency(DefMI, DefIdx, UseMI, UseIdx);
+ if (Latency <= 3)
+ return false;
+ return DDomain == ARMII::DomainVFP || DDomain == ARMII::DomainNEON ||
+ UDomain == ARMII::DomainVFP || UDomain == ARMII::DomainNEON;
+}
+
+bool ARMBaseInstrInfo::
+hasLowDefLatency(const TargetSchedModel &SchedModel,
+ const MachineInstr *DefMI, unsigned DefIdx) const {
+ const InstrItineraryData *ItinData = SchedModel.getInstrItineraries();
+ if (!ItinData || ItinData->isEmpty())
+ return false;
+
+ unsigned DDomain = DefMI->getDesc().TSFlags & ARMII::DomainMask;
+ if (DDomain == ARMII::DomainGeneral) {
+ unsigned DefClass = DefMI->getDesc().getSchedClass();
+ int DefCycle = ItinData->getOperandCycle(DefClass, DefIdx);
+ return (DefCycle != -1 && DefCycle <= 2);
+ }
+ return false;
+}
+
+bool ARMBaseInstrInfo::verifyInstruction(const MachineInstr *MI,
+ StringRef &ErrInfo) const {
+ if (convertAddSubFlagsOpcode(MI->getOpcode())) {
+ ErrInfo = "Pseudo flag setting opcodes only exist in Selection DAG";
+ return false;
+ }
+ return true;
+}
+
+// LoadStackGuard has so far only been implemented for MachO. Different code
+// sequence is needed for other targets.
+void ARMBaseInstrInfo::expandLoadStackGuardBase(MachineBasicBlock::iterator MI,
+ unsigned LoadImmOpc,
+ unsigned LoadOpc,
+ Reloc::Model RM) const {
+ MachineBasicBlock &MBB = *MI->getParent();
+ DebugLoc DL = MI->getDebugLoc();
+ unsigned Reg = MI->getOperand(0).getReg();
+ const GlobalValue *GV =
+ cast<GlobalValue>((*MI->memoperands_begin())->getValue());
+ MachineInstrBuilder MIB;
+
+ BuildMI(MBB, MI, DL, get(LoadImmOpc), Reg)
+ .addGlobalAddress(GV, 0, ARMII::MO_NONLAZY);
+
+ if (Subtarget.GVIsIndirectSymbol(GV, RM)) {
+ MIB = BuildMI(MBB, MI, DL, get(LoadOpc), Reg);
+ MIB.addReg(Reg, RegState::Kill).addImm(0);
+ unsigned Flag = MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant;
+ MachineMemOperand *MMO = MBB.getParent()->getMachineMemOperand(
+ MachinePointerInfo::getGOT(*MBB.getParent()), Flag, 4, 4);
+ MIB.addMemOperand(MMO);
+ AddDefaultPred(MIB);
+ }
+
+ MIB = BuildMI(MBB, MI, DL, get(LoadOpc), Reg);
+ MIB.addReg(Reg, RegState::Kill).addImm(0);
+ MIB.setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
+ AddDefaultPred(MIB);
+}
+
+bool
+ARMBaseInstrInfo::isFpMLxInstruction(unsigned Opcode, unsigned &MulOpc,
+ unsigned &AddSubOpc,
+ bool &NegAcc, bool &HasLane) const {
+ DenseMap<unsigned, unsigned>::const_iterator I = MLxEntryMap.find(Opcode);
+ if (I == MLxEntryMap.end())
+ return false;
+
+ const ARM_MLxEntry &Entry = ARM_MLxTable[I->second];
+ MulOpc = Entry.MulOpc;
+ AddSubOpc = Entry.AddSubOpc;
+ NegAcc = Entry.NegAcc;
+ HasLane = Entry.HasLane;
+ return true;
+}
+
+//===----------------------------------------------------------------------===//
+// Execution domains.
+//===----------------------------------------------------------------------===//
+//
+// Some instructions go down the NEON pipeline, some go down the VFP pipeline,
+// and some can go down both. The vmov instructions go down the VFP pipeline,
+// but they can be changed to vorr equivalents that are executed by the NEON
+// pipeline.
+//
+// We use the following execution domain numbering:
+//
+enum ARMExeDomain {
+ ExeGeneric = 0,
+ ExeVFP = 1,
+ ExeNEON = 2
+};
+//
+// Also see ARMInstrFormats.td and Domain* enums in ARMBaseInfo.h
+//
+std::pair<uint16_t, uint16_t>
+ARMBaseInstrInfo::getExecutionDomain(const MachineInstr *MI) const {
+ // If we don't have access to NEON instructions then we won't be able
+ // to swizzle anything to the NEON domain. Check to make sure.
+ if (Subtarget.hasNEON()) {
+ // VMOVD, VMOVRS and VMOVSR are VFP instructions, but can be changed to NEON
+ // if they are not predicated.
+ if (MI->getOpcode() == ARM::VMOVD && !isPredicated(MI))
+ return std::make_pair(ExeVFP, (1 << ExeVFP) | (1 << ExeNEON));
+
+ // CortexA9 is particularly picky about mixing the two and wants these
+ // converted.
+ if (Subtarget.isCortexA9() && !isPredicated(MI) &&
+ (MI->getOpcode() == ARM::VMOVRS || MI->getOpcode() == ARM::VMOVSR ||
+ MI->getOpcode() == ARM::VMOVS))
+ return std::make_pair(ExeVFP, (1 << ExeVFP) | (1 << ExeNEON));
+ }
+ // No other instructions can be swizzled, so just determine their domain.
+ unsigned Domain = MI->getDesc().TSFlags & ARMII::DomainMask;
+
+ if (Domain & ARMII::DomainNEON)
+ return std::make_pair(ExeNEON, 0);
+
+ // Certain instructions can go either way on Cortex-A8.
+ // Treat them as NEON instructions.
+ if ((Domain & ARMII::DomainNEONA8) && Subtarget.isCortexA8())
+ return std::make_pair(ExeNEON, 0);
+
+ if (Domain & ARMII::DomainVFP)
+ return std::make_pair(ExeVFP, 0);
+
+ return std::make_pair(ExeGeneric, 0);
+}
+
+static unsigned getCorrespondingDRegAndLane(const TargetRegisterInfo *TRI,
+ unsigned SReg, unsigned &Lane) {
+ unsigned DReg = TRI->getMatchingSuperReg(SReg, ARM::ssub_0, &ARM::DPRRegClass);
+ Lane = 0;
+
+ if (DReg != ARM::NoRegister)
+ return DReg;
+
+ Lane = 1;
+ DReg = TRI->getMatchingSuperReg(SReg, ARM::ssub_1, &ARM::DPRRegClass);
+
+ assert(DReg && "S-register with no D super-register?");
+ return DReg;
+}
+
+/// getImplicitSPRUseForDPRUse - Given a use of a DPR register and lane,
+/// set ImplicitSReg to a register number that must be marked as implicit-use or
+/// zero if no register needs to be defined as implicit-use.
+///
+/// If the function cannot determine if an SPR should be marked implicit use or
+/// not, it returns false.
+///
+/// This function handles cases where an instruction is being modified from taking
+/// an SPR to a DPR[Lane]. A use of the DPR is being added, which may conflict
+/// with an earlier def of an SPR corresponding to DPR[Lane^1] (i.e. the other
+/// lane of the DPR).
+///
+/// If the other SPR is defined, an implicit-use of it should be added. Else,
+/// (including the case where the DPR itself is defined), it should not.
+///
+static bool getImplicitSPRUseForDPRUse(const TargetRegisterInfo *TRI,
+ MachineInstr *MI,
+ unsigned DReg, unsigned Lane,
+ unsigned &ImplicitSReg) {
+ // If the DPR is defined or used already, the other SPR lane will be chained
+ // correctly, so there is nothing to be done.
+ if (MI->definesRegister(DReg, TRI) || MI->readsRegister(DReg, TRI)) {
+ ImplicitSReg = 0;
+ return true;
+ }
+
+ // Otherwise we need to go searching to see if the SPR is set explicitly.
+ ImplicitSReg = TRI->getSubReg(DReg,
+ (Lane & 1) ? ARM::ssub_0 : ARM::ssub_1);
+ MachineBasicBlock::LivenessQueryResult LQR =
+ MI->getParent()->computeRegisterLiveness(TRI, ImplicitSReg, MI);
+
+ if (LQR == MachineBasicBlock::LQR_Live)
+ return true;
+ else if (LQR == MachineBasicBlock::LQR_Unknown)
+ return false;
+
+ // If the register is known not to be live, there is no need to add an
+ // implicit-use.
+ ImplicitSReg = 0;
+ return true;
+}
+
+void
+ARMBaseInstrInfo::setExecutionDomain(MachineInstr *MI, unsigned Domain) const {
+ unsigned DstReg, SrcReg, DReg;
+ unsigned Lane;
+ MachineInstrBuilder MIB(*MI->getParent()->getParent(), MI);
+ const TargetRegisterInfo *TRI = &getRegisterInfo();
+ switch (MI->getOpcode()) {
+ default:
+ llvm_unreachable("cannot handle opcode!");
+ break;
+ case ARM::VMOVD:
+ if (Domain != ExeNEON)
+ break;
+
+ // Zap the predicate operands.
+ assert(!isPredicated(MI) && "Cannot predicate a VORRd");
+
+ // Make sure we've got NEON instructions.
+ assert(Subtarget.hasNEON() && "VORRd requires NEON");
+
+ // Source instruction is %DDst = VMOVD %DSrc, 14, %noreg (; implicits)
+ DstReg = MI->getOperand(0).getReg();
+ SrcReg = MI->getOperand(1).getReg();
+
+ for (unsigned i = MI->getDesc().getNumOperands(); i; --i)
+ MI->RemoveOperand(i-1);
+
+ // Change to a %DDst = VORRd %DSrc, %DSrc, 14, %noreg (; implicits)
+ MI->setDesc(get(ARM::VORRd));
+ AddDefaultPred(MIB.addReg(DstReg, RegState::Define)
+ .addReg(SrcReg)
+ .addReg(SrcReg));
+ break;
+ case ARM::VMOVRS:
+ if (Domain != ExeNEON)
+ break;
+ assert(!isPredicated(MI) && "Cannot predicate a VGETLN");
+
+ // Source instruction is %RDst = VMOVRS %SSrc, 14, %noreg (; implicits)
+ DstReg = MI->getOperand(0).getReg();
+ SrcReg = MI->getOperand(1).getReg();
+
+ for (unsigned i = MI->getDesc().getNumOperands(); i; --i)
+ MI->RemoveOperand(i-1);
+
+ DReg = getCorrespondingDRegAndLane(TRI, SrcReg, Lane);
+
+ // Convert to %RDst = VGETLNi32 %DSrc, Lane, 14, %noreg (; imps)
+ // Note that DSrc has been widened and the other lane may be undef, which
+ // contaminates the entire register.
+ MI->setDesc(get(ARM::VGETLNi32));
+ AddDefaultPred(MIB.addReg(DstReg, RegState::Define)
+ .addReg(DReg, RegState::Undef)
+ .addImm(Lane));
+
+ // The old source should be an implicit use, otherwise we might think it
+ // was dead before here.
+ MIB.addReg(SrcReg, RegState::Implicit);
+ break;
+ case ARM::VMOVSR: {
+ if (Domain != ExeNEON)
+ break;
+ assert(!isPredicated(MI) && "Cannot predicate a VSETLN");
+
+ // Source instruction is %SDst = VMOVSR %RSrc, 14, %noreg (; implicits)
+ DstReg = MI->getOperand(0).getReg();
+ SrcReg = MI->getOperand(1).getReg();
+
+ DReg = getCorrespondingDRegAndLane(TRI, DstReg, Lane);
+
+ unsigned ImplicitSReg;
+ if (!getImplicitSPRUseForDPRUse(TRI, MI, DReg, Lane, ImplicitSReg))
+ break;
+
+ for (unsigned i = MI->getDesc().getNumOperands(); i; --i)
+ MI->RemoveOperand(i-1);
+
+ // Convert to %DDst = VSETLNi32 %DDst, %RSrc, Lane, 14, %noreg (; imps)
+ // Again DDst may be undefined at the beginning of this instruction.
+ MI->setDesc(get(ARM::VSETLNi32));
+ MIB.addReg(DReg, RegState::Define)
+ .addReg(DReg, getUndefRegState(!MI->readsRegister(DReg, TRI)))
+ .addReg(SrcReg)
+ .addImm(Lane);
+ AddDefaultPred(MIB);
+
+ // The narrower destination must be marked as set to keep previous chains
+ // in place.
+ MIB.addReg(DstReg, RegState::Define | RegState::Implicit);
+ if (ImplicitSReg != 0)
+ MIB.addReg(ImplicitSReg, RegState::Implicit);
+ break;
+ }
+ case ARM::VMOVS: {
+ if (Domain != ExeNEON)
+ break;
+
+ // Source instruction is %SDst = VMOVS %SSrc, 14, %noreg (; implicits)
+ DstReg = MI->getOperand(0).getReg();
+ SrcReg = MI->getOperand(1).getReg();
+
+ unsigned DstLane = 0, SrcLane = 0, DDst, DSrc;
+ DDst = getCorrespondingDRegAndLane(TRI, DstReg, DstLane);
+ DSrc = getCorrespondingDRegAndLane(TRI, SrcReg, SrcLane);
+
+ unsigned ImplicitSReg;
+ if (!getImplicitSPRUseForDPRUse(TRI, MI, DSrc, SrcLane, ImplicitSReg))
+ break;
+
+ for (unsigned i = MI->getDesc().getNumOperands(); i; --i)
+ MI->RemoveOperand(i-1);
+
+ if (DSrc == DDst) {
+ // Destination can be:
+ // %DDst = VDUPLN32d %DDst, Lane, 14, %noreg (; implicits)
+ MI->setDesc(get(ARM::VDUPLN32d));
+ MIB.addReg(DDst, RegState::Define)
+ .addReg(DDst, getUndefRegState(!MI->readsRegister(DDst, TRI)))
+ .addImm(SrcLane);
+ AddDefaultPred(MIB);
+
+ // Neither the source or the destination are naturally represented any
+ // more, so add them in manually.
+ MIB.addReg(DstReg, RegState::Implicit | RegState::Define);
+ MIB.addReg(SrcReg, RegState::Implicit);
+ if (ImplicitSReg != 0)
+ MIB.addReg(ImplicitSReg, RegState::Implicit);
+ break;
+ }
+
+ // In general there's no single instruction that can perform an S <-> S
+ // move in NEON space, but a pair of VEXT instructions *can* do the
+ // job. It turns out that the VEXTs needed will only use DSrc once, with
+ // the position based purely on the combination of lane-0 and lane-1
+ // involved. For example
+ // vmov s0, s2 -> vext.32 d0, d0, d1, #1 vext.32 d0, d0, d0, #1
+ // vmov s1, s3 -> vext.32 d0, d1, d0, #1 vext.32 d0, d0, d0, #1
+ // vmov s0, s3 -> vext.32 d0, d0, d0, #1 vext.32 d0, d1, d0, #1
+ // vmov s1, s2 -> vext.32 d0, d0, d0, #1 vext.32 d0, d0, d1, #1
+ //
+ // Pattern of the MachineInstrs is:
+ // %DDst = VEXTd32 %DSrc1, %DSrc2, Lane, 14, %noreg (;implicits)
+ MachineInstrBuilder NewMIB;
+ NewMIB = BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
+ get(ARM::VEXTd32), DDst);
+
+ // On the first instruction, both DSrc and DDst may be <undef> if present.
+ // Specifically when the original instruction didn't have them as an
+ // <imp-use>.
+ unsigned CurReg = SrcLane == 1 && DstLane == 1 ? DSrc : DDst;
+ bool CurUndef = !MI->readsRegister(CurReg, TRI);
+ NewMIB.addReg(CurReg, getUndefRegState(CurUndef));
+
+ CurReg = SrcLane == 0 && DstLane == 0 ? DSrc : DDst;
+ CurUndef = !MI->readsRegister(CurReg, TRI);
+ NewMIB.addReg(CurReg, getUndefRegState(CurUndef));
+
+ NewMIB.addImm(1);
+ AddDefaultPred(NewMIB);
+
+ if (SrcLane == DstLane)
+ NewMIB.addReg(SrcReg, RegState::Implicit);
+
+ MI->setDesc(get(ARM::VEXTd32));
+ MIB.addReg(DDst, RegState::Define);
+
+ // On the second instruction, DDst has definitely been defined above, so
+ // it is not <undef>. DSrc, if present, can be <undef> as above.
+ CurReg = SrcLane == 1 && DstLane == 0 ? DSrc : DDst;
+ CurUndef = CurReg == DSrc && !MI->readsRegister(CurReg, TRI);
+ MIB.addReg(CurReg, getUndefRegState(CurUndef));
+
+ CurReg = SrcLane == 0 && DstLane == 1 ? DSrc : DDst;
+ CurUndef = CurReg == DSrc && !MI->readsRegister(CurReg, TRI);
+ MIB.addReg(CurReg, getUndefRegState(CurUndef));
+
+ MIB.addImm(1);
+ AddDefaultPred(MIB);
+
+ if (SrcLane != DstLane)
+ MIB.addReg(SrcReg, RegState::Implicit);
+
+ // As before, the original destination is no longer represented, add it
+ // implicitly.
+ MIB.addReg(DstReg, RegState::Define | RegState::Implicit);
+ if (ImplicitSReg != 0)
+ MIB.addReg(ImplicitSReg, RegState::Implicit);
+ break;
+ }
+ }
+
+}
+
+//===----------------------------------------------------------------------===//
+// Partial register updates
+//===----------------------------------------------------------------------===//
+//
+// Swift renames NEON registers with 64-bit granularity. That means any
+// instruction writing an S-reg implicitly reads the containing D-reg. The
+// problem is mostly avoided by translating f32 operations to v2f32 operations
+// on D-registers, but f32 loads are still a problem.
+//
+// These instructions can load an f32 into a NEON register:
+//
+// VLDRS - Only writes S, partial D update.
+// VLD1LNd32 - Writes all D-regs, explicit partial D update, 2 uops.
+// VLD1DUPd32 - Writes all D-regs, no partial reg update, 2 uops.
+//
+// FCONSTD can be used as a dependency-breaking instruction.
+unsigned ARMBaseInstrInfo::
+getPartialRegUpdateClearance(const MachineInstr *MI,
+ unsigned OpNum,
+ const TargetRegisterInfo *TRI) const {
+ if (!SwiftPartialUpdateClearance ||
+ !(Subtarget.isSwift() || Subtarget.isCortexA15()))
+ return 0;
+
+ assert(TRI && "Need TRI instance");
+
+ const MachineOperand &MO = MI->getOperand(OpNum);
+ if (MO.readsReg())
+ return 0;
+ unsigned Reg = MO.getReg();
+ int UseOp = -1;
+
+ switch(MI->getOpcode()) {
+ // Normal instructions writing only an S-register.
+ case ARM::VLDRS:
+ case ARM::FCONSTS:
+ case ARM::VMOVSR:
+ case ARM::VMOVv8i8:
+ case ARM::VMOVv4i16:
+ case ARM::VMOVv2i32:
+ case ARM::VMOVv2f32:
+ case ARM::VMOVv1i64:
+ UseOp = MI->findRegisterUseOperandIdx(Reg, false, TRI);
+ break;
+
+ // Explicitly reads the dependency.
+ case ARM::VLD1LNd32:
+ UseOp = 3;
+ break;
+ default:
+ return 0;
+ }
+
+ // If this instruction actually reads a value from Reg, there is no unwanted
+ // dependency.
+ if (UseOp != -1 && MI->getOperand(UseOp).readsReg())
+ return 0;
+
+ // We must be able to clobber the whole D-reg.
+ if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ // Virtual register must be a foo:ssub_0<def,undef> operand.
+ if (!MO.getSubReg() || MI->readsVirtualRegister(Reg))
+ return 0;
+ } else if (ARM::SPRRegClass.contains(Reg)) {
+ // Physical register: MI must define the full D-reg.
+ unsigned DReg = TRI->getMatchingSuperReg(Reg, ARM::ssub_0,
+ &ARM::DPRRegClass);
+ if (!DReg || !MI->definesRegister(DReg, TRI))
+ return 0;
+ }
+
+ // MI has an unwanted D-register dependency.
+ // Avoid defs in the previous N instructrions.
+ return SwiftPartialUpdateClearance;
+}
+
+// Break a partial register dependency after getPartialRegUpdateClearance
+// returned non-zero.
+void ARMBaseInstrInfo::
+breakPartialRegDependency(MachineBasicBlock::iterator MI,
+ unsigned OpNum,
+ const TargetRegisterInfo *TRI) const {
+ assert(MI && OpNum < MI->getDesc().getNumDefs() && "OpNum is not a def");
+ assert(TRI && "Need TRI instance");
+
+ const MachineOperand &MO = MI->getOperand(OpNum);
+ unsigned Reg = MO.getReg();
+ assert(TargetRegisterInfo::isPhysicalRegister(Reg) &&
+ "Can't break virtual register dependencies.");
+ unsigned DReg = Reg;
+
+ // If MI defines an S-reg, find the corresponding D super-register.
+ if (ARM::SPRRegClass.contains(Reg)) {
+ DReg = ARM::D0 + (Reg - ARM::S0) / 2;
+ assert(TRI->isSuperRegister(Reg, DReg) && "Register enums broken");
+ }
+
+ assert(ARM::DPRRegClass.contains(DReg) && "Can only break D-reg deps");
+ assert(MI->definesRegister(DReg, TRI) && "MI doesn't clobber full D-reg");
+
+ // FIXME: In some cases, VLDRS can be changed to a VLD1DUPd32 which defines
+ // the full D-register by loading the same value to both lanes. The
+ // instruction is micro-coded with 2 uops, so don't do this until we can
+ // properly schedule micro-coded instructions. The dispatcher stalls cause
+ // too big regressions.
+
+ // Insert the dependency-breaking FCONSTD before MI.
+ // 96 is the encoding of 0.5, but the actual value doesn't matter here.
+ AddDefaultPred(BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
+ get(ARM::FCONSTD), DReg).addImm(96));
+ MI->addRegisterKilled(DReg, TRI, true);
+}
+
+bool ARMBaseInstrInfo::hasNOP() const {
+ return Subtarget.getFeatureBits()[ARM::HasV6KOps];
+}
+
+bool ARMBaseInstrInfo::isSwiftFastImmShift(const MachineInstr *MI) const {
+ if (MI->getNumOperands() < 4)
+ return true;
+ unsigned ShOpVal = MI->getOperand(3).getImm();
+ unsigned ShImm = ARM_AM::getSORegOffset(ShOpVal);
+ // Swift supports faster shifts for: lsl 2, lsl 1, and lsr 1.
+ if ((ShImm == 1 && ARM_AM::getSORegShOp(ShOpVal) == ARM_AM::lsr) ||
+ ((ShImm == 1 || ShImm == 2) &&
+ ARM_AM::getSORegShOp(ShOpVal) == ARM_AM::lsl))
+ return true;
+
+ return false;
+}
+
+bool ARMBaseInstrInfo::getRegSequenceLikeInputs(
+ const MachineInstr &MI, unsigned DefIdx,
+ SmallVectorImpl<RegSubRegPairAndIdx> &InputRegs) const {
+ assert(DefIdx < MI.getDesc().getNumDefs() && "Invalid definition index");
+ assert(MI.isRegSequenceLike() && "Invalid kind of instruction");
+
+ switch (MI.getOpcode()) {
+ case ARM::VMOVDRR:
+ // dX = VMOVDRR rY, rZ
+ // is the same as:
+ // dX = REG_SEQUENCE rY, ssub_0, rZ, ssub_1
+ // Populate the InputRegs accordingly.
+ // rY
+ const MachineOperand *MOReg = &MI.getOperand(1);
+ InputRegs.push_back(
+ RegSubRegPairAndIdx(MOReg->getReg(), MOReg->getSubReg(), ARM::ssub_0));
+ // rZ
+ MOReg = &MI.getOperand(2);
+ InputRegs.push_back(
+ RegSubRegPairAndIdx(MOReg->getReg(), MOReg->getSubReg(), ARM::ssub_1));
+ return true;
+ }
+ llvm_unreachable("Target dependent opcode missing");
+}
+
+bool ARMBaseInstrInfo::getExtractSubregLikeInputs(
+ const MachineInstr &MI, unsigned DefIdx,
+ RegSubRegPairAndIdx &InputReg) const {
+ assert(DefIdx < MI.getDesc().getNumDefs() && "Invalid definition index");
+ assert(MI.isExtractSubregLike() && "Invalid kind of instruction");
+
+ switch (MI.getOpcode()) {
+ case ARM::VMOVRRD:
+ // rX, rY = VMOVRRD dZ
+ // is the same as:
+ // rX = EXTRACT_SUBREG dZ, ssub_0
+ // rY = EXTRACT_SUBREG dZ, ssub_1
+ const MachineOperand &MOReg = MI.getOperand(2);
+ InputReg.Reg = MOReg.getReg();
+ InputReg.SubReg = MOReg.getSubReg();
+ InputReg.SubIdx = DefIdx == 0 ? ARM::ssub_0 : ARM::ssub_1;
+ return true;
+ }
+ llvm_unreachable("Target dependent opcode missing");
+}
+
+bool ARMBaseInstrInfo::getInsertSubregLikeInputs(
+ const MachineInstr &MI, unsigned DefIdx, RegSubRegPair &BaseReg,
+ RegSubRegPairAndIdx &InsertedReg) const {
+ assert(DefIdx < MI.getDesc().getNumDefs() && "Invalid definition index");
+ assert(MI.isInsertSubregLike() && "Invalid kind of instruction");
+
+ switch (MI.getOpcode()) {
+ case ARM::VSETLNi32:
+ // dX = VSETLNi32 dY, rZ, imm
+ const MachineOperand &MOBaseReg = MI.getOperand(1);
+ const MachineOperand &MOInsertedReg = MI.getOperand(2);
+ const MachineOperand &MOIndex = MI.getOperand(3);
+ BaseReg.Reg = MOBaseReg.getReg();
+ BaseReg.SubReg = MOBaseReg.getSubReg();
+
+ InsertedReg.Reg = MOInsertedReg.getReg();
+ InsertedReg.SubReg = MOInsertedReg.getSubReg();
+ InsertedReg.SubIdx = MOIndex.getImm() == 0 ? ARM::ssub_0 : ARM::ssub_1;
+ return true;
+ }
+ llvm_unreachable("Target dependent opcode missing");
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h b/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
new file mode 100644
index 0000000..d80c494
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
@@ -0,0 +1,509 @@
+//===-- ARMBaseInstrInfo.h - ARM Base Instruction Information ---*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Base ARM implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_ARM_ARMBASEINSTRINFO_H
+#define LLVM_LIB_TARGET_ARM_ARMBASEINSTRINFO_H
+
+#include "MCTargetDesc/ARMBaseInfo.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/Support/CodeGen.h"
+#include "llvm/Target/TargetInstrInfo.h"
+
+#define GET_INSTRINFO_HEADER
+#include "ARMGenInstrInfo.inc"
+
+namespace llvm {
+ class ARMSubtarget;
+ class ARMBaseRegisterInfo;
+
+class ARMBaseInstrInfo : public ARMGenInstrInfo {
+ const ARMSubtarget &Subtarget;
+
+protected:
+ // Can be only subclassed.
+ explicit ARMBaseInstrInfo(const ARMSubtarget &STI);
+
+ void expandLoadStackGuardBase(MachineBasicBlock::iterator MI,
+ unsigned LoadImmOpc, unsigned LoadOpc,
+ Reloc::Model RM) const;
+
+ /// Build the equivalent inputs of a REG_SEQUENCE for the given \p MI
+ /// and \p DefIdx.
+ /// \p [out] InputRegs of the equivalent REG_SEQUENCE. Each element of
+ /// the list is modeled as <Reg:SubReg, SubIdx>.
+ /// E.g., REG_SEQUENCE vreg1:sub1, sub0, vreg2, sub1 would produce
+ /// two elements:
+ /// - vreg1:sub1, sub0
+ /// - vreg2<:0>, sub1
+ ///
+ /// \returns true if it is possible to build such an input sequence
+ /// with the pair \p MI, \p DefIdx. False otherwise.
+ ///
+ /// \pre MI.isRegSequenceLike().
+ bool getRegSequenceLikeInputs(
+ const MachineInstr &MI, unsigned DefIdx,
+ SmallVectorImpl<RegSubRegPairAndIdx> &InputRegs) const override;
+
+ /// Build the equivalent inputs of a EXTRACT_SUBREG for the given \p MI
+ /// and \p DefIdx.
+ /// \p [out] InputReg of the equivalent EXTRACT_SUBREG.
+ /// E.g., EXTRACT_SUBREG vreg1:sub1, sub0, sub1 would produce:
+ /// - vreg1:sub1, sub0
+ ///
+ /// \returns true if it is possible to build such an input sequence
+ /// with the pair \p MI, \p DefIdx. False otherwise.
+ ///
+ /// \pre MI.isExtractSubregLike().
+ bool getExtractSubregLikeInputs(const MachineInstr &MI, unsigned DefIdx,
+ RegSubRegPairAndIdx &InputReg) const override;
+
+ /// Build the equivalent inputs of a INSERT_SUBREG for the given \p MI
+ /// and \p DefIdx.
+ /// \p [out] BaseReg and \p [out] InsertedReg contain
+ /// the equivalent inputs of INSERT_SUBREG.
+ /// E.g., INSERT_SUBREG vreg0:sub0, vreg1:sub1, sub3 would produce:
+ /// - BaseReg: vreg0:sub0
+ /// - InsertedReg: vreg1:sub1, sub3
+ ///
+ /// \returns true if it is possible to build such an input sequence
+ /// with the pair \p MI, \p DefIdx. False otherwise.
+ ///
+ /// \pre MI.isInsertSubregLike().
+ bool
+ getInsertSubregLikeInputs(const MachineInstr &MI, unsigned DefIdx,
+ RegSubRegPair &BaseReg,
+ RegSubRegPairAndIdx &InsertedReg) const override;
+
+ /// Commutes the operands in the given instruction.
+ /// The commutable operands are specified by their indices OpIdx1 and OpIdx2.
+ ///
+ /// Do not call this method for a non-commutable instruction or for
+ /// non-commutable pair of operand indices OpIdx1 and OpIdx2.
+ /// Even though the instruction is commutable, the method may still
+ /// fail to commute the operands, null pointer is returned in such cases.
+ MachineInstr *commuteInstructionImpl(MachineInstr *MI,
+ bool NewMI,
+ unsigned OpIdx1,
+ unsigned OpIdx2) const override;
+
+public:
+ // Return whether the target has an explicit NOP encoding.
+ bool hasNOP() const;
+
+ // Return the non-pre/post incrementing version of 'Opc'. Return 0
+ // if there is not such an opcode.
+ virtual unsigned getUnindexedOpcode(unsigned Opc) const =0;
+
+ MachineInstr *convertToThreeAddress(MachineFunction::iterator &MFI,
+ MachineBasicBlock::iterator &MBBI,
+ LiveVariables *LV) const override;
+
+ virtual const ARMBaseRegisterInfo &getRegisterInfo() const = 0;
+ const ARMSubtarget &getSubtarget() const { return Subtarget; }
+
+ ScheduleHazardRecognizer *
+ CreateTargetHazardRecognizer(const TargetSubtargetInfo *STI,
+ const ScheduleDAG *DAG) const override;
+
+ ScheduleHazardRecognizer *
+ CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II,
+ const ScheduleDAG *DAG) const override;
+
+ // Branch analysis.
+ bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond,
+ bool AllowModify = false) const override;
+ unsigned RemoveBranch(MachineBasicBlock &MBB) const override;
+ unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB, ArrayRef<MachineOperand> Cond,
+ DebugLoc DL) const override;
+
+ bool
+ ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const override;
+
+ // Predication support.
+ bool isPredicated(const MachineInstr *MI) const override;
+
+ ARMCC::CondCodes getPredicate(const MachineInstr *MI) const {
+ int PIdx = MI->findFirstPredOperandIdx();
+ return PIdx != -1 ? (ARMCC::CondCodes)MI->getOperand(PIdx).getImm()
+ : ARMCC::AL;
+ }
+
+ bool PredicateInstruction(MachineInstr *MI,
+ ArrayRef<MachineOperand> Pred) const override;
+
+ bool SubsumesPredicate(ArrayRef<MachineOperand> Pred1,
+ ArrayRef<MachineOperand> Pred2) const override;
+
+ bool DefinesPredicate(MachineInstr *MI,
+ std::vector<MachineOperand> &Pred) const override;
+
+ bool isPredicable(MachineInstr *MI) const override;
+
+ /// GetInstSize - Returns the size of the specified MachineInstr.
+ ///
+ virtual unsigned GetInstSizeInBytes(const MachineInstr* MI) const;
+
+ unsigned isLoadFromStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const override;
+ unsigned isStoreToStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const override;
+ unsigned isLoadFromStackSlotPostFE(const MachineInstr *MI,
+ int &FrameIndex) const override;
+ unsigned isStoreToStackSlotPostFE(const MachineInstr *MI,
+ int &FrameIndex) const override;
+
+ void copyToCPSR(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+ unsigned SrcReg, bool KillSrc,
+ const ARMSubtarget &Subtarget) const;
+ void copyFromCPSR(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+ unsigned DestReg, bool KillSrc,
+ const ARMSubtarget &Subtarget) const;
+
+ void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+ DebugLoc DL, unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const override;
+
+ void storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned SrcReg, bool isKill, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const override;
+
+ void loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned DestReg, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const override;
+
+ bool expandPostRAPseudo(MachineBasicBlock::iterator MI) const override;
+
+ void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
+ unsigned DestReg, unsigned SubIdx,
+ const MachineInstr *Orig,
+ const TargetRegisterInfo &TRI) const override;
+
+ MachineInstr *duplicate(MachineInstr *Orig,
+ MachineFunction &MF) const override;
+
+ const MachineInstrBuilder &AddDReg(MachineInstrBuilder &MIB, unsigned Reg,
+ unsigned SubIdx, unsigned State,
+ const TargetRegisterInfo *TRI) const;
+
+ bool produceSameValue(const MachineInstr *MI0, const MachineInstr *MI1,
+ const MachineRegisterInfo *MRI) const override;
+
+ /// areLoadsFromSameBasePtr - This is used by the pre-regalloc scheduler to
+ /// determine if two loads are loading from the same base address. It should
+ /// only return true if the base pointers are the same and the only
+ /// differences between the two addresses is the offset. It also returns the
+ /// offsets by reference.
+ bool areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2, int64_t &Offset1,
+ int64_t &Offset2) const override;
+
+ /// shouldScheduleLoadsNear - This is a used by the pre-regalloc scheduler to
+ /// determine (in conjunction with areLoadsFromSameBasePtr) if two loads
+ /// should be scheduled togther. On some targets if two loads are loading from
+ /// addresses in the same cache line, it's better if they are scheduled
+ /// together. This function takes two integers that represent the load offsets
+ /// from the common base address. It returns true if it decides it's desirable
+ /// to schedule the two loads together. "NumLoads" is the number of loads that
+ /// have already been scheduled after Load1.
+ bool shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2,
+ int64_t Offset1, int64_t Offset2,
+ unsigned NumLoads) const override;
+
+ bool isSchedulingBoundary(const MachineInstr *MI,
+ const MachineBasicBlock *MBB,
+ const MachineFunction &MF) const override;
+
+ bool isProfitableToIfCvt(MachineBasicBlock &MBB,
+ unsigned NumCycles, unsigned ExtraPredCycles,
+ BranchProbability Probability) const override;
+
+ bool isProfitableToIfCvt(MachineBasicBlock &TMBB, unsigned NumT,
+ unsigned ExtraT, MachineBasicBlock &FMBB,
+ unsigned NumF, unsigned ExtraF,
+ BranchProbability Probability) const override;
+
+ bool isProfitableToDupForIfCvt(MachineBasicBlock &MBB, unsigned NumCycles,
+ BranchProbability Probability) const override {
+ return NumCycles == 1;
+ }
+
+ bool isProfitableToUnpredicate(MachineBasicBlock &TMBB,
+ MachineBasicBlock &FMBB) const override;
+
+ /// analyzeCompare - For a comparison instruction, return the source registers
+ /// in SrcReg and SrcReg2 if having two register operands, and the value it
+ /// compares against in CmpValue. Return true if the comparison instruction
+ /// can be analyzed.
+ bool analyzeCompare(const MachineInstr *MI, unsigned &SrcReg,
+ unsigned &SrcReg2, int &CmpMask,
+ int &CmpValue) const override;
+
+ /// optimizeCompareInstr - Convert the instruction to set the zero flag so
+ /// that we can remove a "comparison with zero"; Remove a redundant CMP
+ /// instruction if the flags can be updated in the same way by an earlier
+ /// instruction such as SUB.
+ bool optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg,
+ unsigned SrcReg2, int CmpMask, int CmpValue,
+ const MachineRegisterInfo *MRI) const override;
+
+ bool analyzeSelect(const MachineInstr *MI,
+ SmallVectorImpl<MachineOperand> &Cond,
+ unsigned &TrueOp, unsigned &FalseOp,
+ bool &Optimizable) const override;
+
+ MachineInstr *optimizeSelect(MachineInstr *MI,
+ SmallPtrSetImpl<MachineInstr *> &SeenMIs,
+ bool) const override;
+
+ /// FoldImmediate - 'Reg' is known to be defined by a move immediate
+ /// instruction, try to fold the immediate into the use instruction.
+ bool FoldImmediate(MachineInstr *UseMI, MachineInstr *DefMI,
+ unsigned Reg, MachineRegisterInfo *MRI) const override;
+
+ unsigned getNumMicroOps(const InstrItineraryData *ItinData,
+ const MachineInstr *MI) const override;
+
+ int getOperandLatency(const InstrItineraryData *ItinData,
+ const MachineInstr *DefMI, unsigned DefIdx,
+ const MachineInstr *UseMI,
+ unsigned UseIdx) const override;
+ int getOperandLatency(const InstrItineraryData *ItinData,
+ SDNode *DefNode, unsigned DefIdx,
+ SDNode *UseNode, unsigned UseIdx) const override;
+
+ /// VFP/NEON execution domains.
+ std::pair<uint16_t, uint16_t>
+ getExecutionDomain(const MachineInstr *MI) const override;
+ void setExecutionDomain(MachineInstr *MI, unsigned Domain) const override;
+
+ unsigned getPartialRegUpdateClearance(const MachineInstr*, unsigned,
+ const TargetRegisterInfo*) const override;
+ void breakPartialRegDependency(MachineBasicBlock::iterator, unsigned,
+ const TargetRegisterInfo *TRI) const override;
+
+ /// Get the number of addresses by LDM or VLDM or zero for unknown.
+ unsigned getNumLDMAddresses(const MachineInstr *MI) const;
+
+private:
+ unsigned getInstBundleLength(const MachineInstr *MI) const;
+
+ int getVLDMDefCycle(const InstrItineraryData *ItinData,
+ const MCInstrDesc &DefMCID,
+ unsigned DefClass,
+ unsigned DefIdx, unsigned DefAlign) const;
+ int getLDMDefCycle(const InstrItineraryData *ItinData,
+ const MCInstrDesc &DefMCID,
+ unsigned DefClass,
+ unsigned DefIdx, unsigned DefAlign) const;
+ int getVSTMUseCycle(const InstrItineraryData *ItinData,
+ const MCInstrDesc &UseMCID,
+ unsigned UseClass,
+ unsigned UseIdx, unsigned UseAlign) const;
+ int getSTMUseCycle(const InstrItineraryData *ItinData,
+ const MCInstrDesc &UseMCID,
+ unsigned UseClass,
+ unsigned UseIdx, unsigned UseAlign) const;
+ int getOperandLatency(const InstrItineraryData *ItinData,
+ const MCInstrDesc &DefMCID,
+ unsigned DefIdx, unsigned DefAlign,
+ const MCInstrDesc &UseMCID,
+ unsigned UseIdx, unsigned UseAlign) const;
+
+ unsigned getPredicationCost(const MachineInstr *MI) const override;
+
+ unsigned getInstrLatency(const InstrItineraryData *ItinData,
+ const MachineInstr *MI,
+ unsigned *PredCost = nullptr) const override;
+
+ int getInstrLatency(const InstrItineraryData *ItinData,
+ SDNode *Node) const override;
+
+ bool hasHighOperandLatency(const TargetSchedModel &SchedModel,
+ const MachineRegisterInfo *MRI,
+ const MachineInstr *DefMI, unsigned DefIdx,
+ const MachineInstr *UseMI,
+ unsigned UseIdx) const override;
+ bool hasLowDefLatency(const TargetSchedModel &SchedModel,
+ const MachineInstr *DefMI,
+ unsigned DefIdx) const override;
+
+ /// verifyInstruction - Perform target specific instruction verification.
+ bool verifyInstruction(const MachineInstr *MI,
+ StringRef &ErrInfo) const override;
+
+ virtual void expandLoadStackGuard(MachineBasicBlock::iterator MI,
+ Reloc::Model RM) const = 0;
+
+ void expandMEMCPY(MachineBasicBlock::iterator) const;
+
+private:
+ /// Modeling special VFP / NEON fp MLA / MLS hazards.
+
+ /// MLxEntryMap - Map fp MLA / MLS to the corresponding entry in the internal
+ /// MLx table.
+ DenseMap<unsigned, unsigned> MLxEntryMap;
+
+ /// MLxHazardOpcodes - Set of add / sub and multiply opcodes that would cause
+ /// stalls when scheduled together with fp MLA / MLS opcodes.
+ SmallSet<unsigned, 16> MLxHazardOpcodes;
+
+public:
+ /// isFpMLxInstruction - Return true if the specified opcode is a fp MLA / MLS
+ /// instruction.
+ bool isFpMLxInstruction(unsigned Opcode) const {
+ return MLxEntryMap.count(Opcode);
+ }
+
+ /// isFpMLxInstruction - This version also returns the multiply opcode and the
+ /// addition / subtraction opcode to expand to. Return true for 'HasLane' for
+ /// the MLX instructions with an extra lane operand.
+ bool isFpMLxInstruction(unsigned Opcode, unsigned &MulOpc,
+ unsigned &AddSubOpc, bool &NegAcc,
+ bool &HasLane) const;
+
+ /// canCauseFpMLxStall - Return true if an instruction of the specified opcode
+ /// will cause stalls when scheduled after (within 4-cycle window) a fp
+ /// MLA / MLS instruction.
+ bool canCauseFpMLxStall(unsigned Opcode) const {
+ return MLxHazardOpcodes.count(Opcode);
+ }
+
+ /// Returns true if the instruction has a shift by immediate that can be
+ /// executed in one cycle less.
+ bool isSwiftFastImmShift(const MachineInstr *MI) const;
+};
+
+static inline
+const MachineInstrBuilder &AddDefaultPred(const MachineInstrBuilder &MIB) {
+ return MIB.addImm((int64_t)ARMCC::AL).addReg(0);
+}
+
+static inline
+const MachineInstrBuilder &AddDefaultCC(const MachineInstrBuilder &MIB) {
+ return MIB.addReg(0);
+}
+
+static inline
+const MachineInstrBuilder &AddDefaultT1CC(const MachineInstrBuilder &MIB,
+ bool isDead = false) {
+ return MIB.addReg(ARM::CPSR, getDefRegState(true) | getDeadRegState(isDead));
+}
+
+static inline
+const MachineInstrBuilder &AddNoT1CC(const MachineInstrBuilder &MIB) {
+ return MIB.addReg(0);
+}
+
+static inline
+bool isUncondBranchOpcode(int Opc) {
+ return Opc == ARM::B || Opc == ARM::tB || Opc == ARM::t2B;
+}
+
+static inline
+bool isCondBranchOpcode(int Opc) {
+ return Opc == ARM::Bcc || Opc == ARM::tBcc || Opc == ARM::t2Bcc;
+}
+
+static inline
+bool isJumpTableBranchOpcode(int Opc) {
+ return Opc == ARM::BR_JTr || Opc == ARM::BR_JTm || Opc == ARM::BR_JTadd ||
+ Opc == ARM::tBR_JTr || Opc == ARM::t2BR_JT;
+}
+
+static inline
+bool isIndirectBranchOpcode(int Opc) {
+ return Opc == ARM::BX || Opc == ARM::MOVPCRX || Opc == ARM::tBRIND;
+}
+
+static inline bool isPopOpcode(int Opc) {
+ return Opc == ARM::tPOP_RET || Opc == ARM::LDMIA_RET ||
+ Opc == ARM::t2LDMIA_RET || Opc == ARM::tPOP || Opc == ARM::LDMIA_UPD ||
+ Opc == ARM::t2LDMIA_UPD || Opc == ARM::VLDMDIA_UPD;
+}
+
+static inline bool isPushOpcode(int Opc) {
+ return Opc == ARM::tPUSH || Opc == ARM::t2STMDB_UPD ||
+ Opc == ARM::STMDB_UPD || Opc == ARM::VSTMDDB_UPD;
+}
+
+/// getInstrPredicate - If instruction is predicated, returns its predicate
+/// condition, otherwise returns AL. It also returns the condition code
+/// register by reference.
+ARMCC::CondCodes getInstrPredicate(const MachineInstr *MI, unsigned &PredReg);
+
+unsigned getMatchingCondBranchOpcode(unsigned Opc);
+
+/// Determine if MI can be folded into an ARM MOVCC instruction, and return the
+/// opcode of the SSA instruction representing the conditional MI.
+unsigned canFoldARMInstrIntoMOVCC(unsigned Reg,
+ MachineInstr *&MI,
+ const MachineRegisterInfo &MRI);
+
+/// Map pseudo instructions that imply an 'S' bit onto real opcodes. Whether
+/// the instruction is encoded with an 'S' bit is determined by the optional
+/// CPSR def operand.
+unsigned convertAddSubFlagsOpcode(unsigned OldOpc);
+
+/// emitARMRegPlusImmediate / emitT2RegPlusImmediate - Emits a series of
+/// instructions to materializea destreg = basereg + immediate in ARM / Thumb2
+/// code.
+void emitARMRegPlusImmediate(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI, DebugLoc dl,
+ unsigned DestReg, unsigned BaseReg, int NumBytes,
+ ARMCC::CondCodes Pred, unsigned PredReg,
+ const ARMBaseInstrInfo &TII, unsigned MIFlags = 0);
+
+void emitT2RegPlusImmediate(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI, DebugLoc dl,
+ unsigned DestReg, unsigned BaseReg, int NumBytes,
+ ARMCC::CondCodes Pred, unsigned PredReg,
+ const ARMBaseInstrInfo &TII, unsigned MIFlags = 0);
+void emitThumbRegPlusImmediate(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI, DebugLoc dl,
+ unsigned DestReg, unsigned BaseReg,
+ int NumBytes, const TargetInstrInfo &TII,
+ const ARMBaseRegisterInfo& MRI,
+ unsigned MIFlags = 0);
+
+/// Tries to add registers to the reglist of a given base-updating
+/// push/pop instruction to adjust the stack by an additional
+/// NumBytes. This can save a few bytes per function in code-size, but
+/// obviously generates more memory traffic. As such, it only takes
+/// effect in functions being optimised for size.
+bool tryFoldSPUpdateIntoPushPop(const ARMSubtarget &Subtarget,
+ MachineFunction &MF, MachineInstr *MI,
+ unsigned NumBytes);
+
+/// rewriteARMFrameIndex / rewriteT2FrameIndex -
+/// Rewrite MI to access 'Offset' bytes from the FP. Return false if the
+/// offset could not be handled directly in MI, and return the left-over
+/// portion by reference.
+bool rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
+ unsigned FrameReg, int &Offset,
+ const ARMBaseInstrInfo &TII);
+
+bool rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
+ unsigned FrameReg, int &Offset,
+ const ARMBaseInstrInfo &TII);
+
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp
new file mode 100644
index 0000000..419717c
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp
@@ -0,0 +1,810 @@
+//===-- ARMBaseRegisterInfo.cpp - ARM Register Information ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the base ARM implementation of TargetRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARMBaseRegisterInfo.h"
+#include "ARM.h"
+#include "ARMBaseInstrInfo.h"
+#include "ARMFrameLowering.h"
+#include "ARMMachineFunctionInfo.h"
+#include "ARMSubtarget.h"
+#include "MCTargetDesc/ARMAddressingModes.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/CodeGen/VirtRegMap.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+
+#define DEBUG_TYPE "arm-register-info"
+
+#define GET_REGINFO_TARGET_DESC
+#include "ARMGenRegisterInfo.inc"
+
+using namespace llvm;
+
+ARMBaseRegisterInfo::ARMBaseRegisterInfo()
+ : ARMGenRegisterInfo(ARM::LR, 0, 0, ARM::PC), BasePtr(ARM::R6) {}
+
+static unsigned getFramePointerReg(const ARMSubtarget &STI) {
+ if (STI.isTargetMachO()) {
+ if (STI.isTargetDarwin() || STI.isThumb1Only())
+ return ARM::R7;
+ else
+ return ARM::R11;
+ } else if (STI.isTargetWindows())
+ return ARM::R11;
+ else // ARM EABI
+ return STI.isThumb() ? ARM::R7 : ARM::R11;
+}
+
+const MCPhysReg*
+ARMBaseRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
+ const ARMSubtarget &STI = MF->getSubtarget<ARMSubtarget>();
+ const MCPhysReg *RegList =
+ STI.isTargetDarwin() ? CSR_iOS_SaveList : CSR_AAPCS_SaveList;
+
+ const Function *F = MF->getFunction();
+ if (F->getCallingConv() == CallingConv::GHC) {
+ // GHC set of callee saved regs is empty as all those regs are
+ // used for passing STG regs around
+ return CSR_NoRegs_SaveList;
+ } else if (F->hasFnAttribute("interrupt")) {
+ if (STI.isMClass()) {
+ // M-class CPUs have hardware which saves the registers needed to allow a
+ // function conforming to the AAPCS to function as a handler.
+ return CSR_AAPCS_SaveList;
+ } else if (F->getFnAttribute("interrupt").getValueAsString() == "FIQ") {
+ // Fast interrupt mode gives the handler a private copy of R8-R14, so less
+ // need to be saved to restore user-mode state.
+ return CSR_FIQ_SaveList;
+ } else {
+ // Generally only R13-R14 (i.e. SP, LR) are automatically preserved by
+ // exception handling.
+ return CSR_GenericInt_SaveList;
+ }
+ }
+
+ return RegList;
+}
+
+const uint32_t *
+ARMBaseRegisterInfo::getCallPreservedMask(const MachineFunction &MF,
+ CallingConv::ID CC) const {
+ const ARMSubtarget &STI = MF.getSubtarget<ARMSubtarget>();
+ if (CC == CallingConv::GHC)
+ // This is academic becase all GHC calls are (supposed to be) tail calls
+ return CSR_NoRegs_RegMask;
+ return STI.isTargetDarwin() ? CSR_iOS_RegMask : CSR_AAPCS_RegMask;
+}
+
+const uint32_t*
+ARMBaseRegisterInfo::getNoPreservedMask() const {
+ return CSR_NoRegs_RegMask;
+}
+
+const uint32_t *
+ARMBaseRegisterInfo::getThisReturnPreservedMask(const MachineFunction &MF,
+ CallingConv::ID CC) const {
+ const ARMSubtarget &STI = MF.getSubtarget<ARMSubtarget>();
+ // This should return a register mask that is the same as that returned by
+ // getCallPreservedMask but that additionally preserves the register used for
+ // the first i32 argument (which must also be the register used to return a
+ // single i32 return value)
+ //
+ // In case that the calling convention does not use the same register for
+ // both or otherwise does not want to enable this optimization, the function
+ // should return NULL
+ if (CC == CallingConv::GHC)
+ // This is academic becase all GHC calls are (supposed to be) tail calls
+ return nullptr;
+ return STI.isTargetDarwin() ? CSR_iOS_ThisReturn_RegMask
+ : CSR_AAPCS_ThisReturn_RegMask;
+}
+
+BitVector ARMBaseRegisterInfo::
+getReservedRegs(const MachineFunction &MF) const {
+ const ARMSubtarget &STI = MF.getSubtarget<ARMSubtarget>();
+ const ARMFrameLowering *TFI = getFrameLowering(MF);
+
+ // FIXME: avoid re-calculating this every time.
+ BitVector Reserved(getNumRegs());
+ Reserved.set(ARM::SP);
+ Reserved.set(ARM::PC);
+ Reserved.set(ARM::FPSCR);
+ Reserved.set(ARM::APSR_NZCV);
+ if (TFI->hasFP(MF))
+ Reserved.set(getFramePointerReg(STI));
+ if (hasBasePointer(MF))
+ Reserved.set(BasePtr);
+ // Some targets reserve R9.
+ if (STI.isR9Reserved())
+ Reserved.set(ARM::R9);
+ // Reserve D16-D31 if the subtarget doesn't support them.
+ if (!STI.hasVFP3() || STI.hasD16()) {
+ assert(ARM::D31 == ARM::D16 + 15);
+ for (unsigned i = 0; i != 16; ++i)
+ Reserved.set(ARM::D16 + i);
+ }
+ const TargetRegisterClass *RC = &ARM::GPRPairRegClass;
+ for(TargetRegisterClass::iterator I = RC->begin(), E = RC->end(); I!=E; ++I)
+ for (MCSubRegIterator SI(*I, this); SI.isValid(); ++SI)
+ if (Reserved.test(*SI)) Reserved.set(*I);
+
+ return Reserved;
+}
+
+const TargetRegisterClass *
+ARMBaseRegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC,
+ const MachineFunction &) const {
+ const TargetRegisterClass *Super = RC;
+ TargetRegisterClass::sc_iterator I = RC->getSuperClasses();
+ do {
+ switch (Super->getID()) {
+ case ARM::GPRRegClassID:
+ case ARM::SPRRegClassID:
+ case ARM::DPRRegClassID:
+ case ARM::QPRRegClassID:
+ case ARM::QQPRRegClassID:
+ case ARM::QQQQPRRegClassID:
+ case ARM::GPRPairRegClassID:
+ return Super;
+ }
+ Super = *I++;
+ } while (Super);
+ return RC;
+}
+
+const TargetRegisterClass *
+ARMBaseRegisterInfo::getPointerRegClass(const MachineFunction &MF, unsigned Kind)
+ const {
+ return &ARM::GPRRegClass;
+}
+
+const TargetRegisterClass *
+ARMBaseRegisterInfo::getCrossCopyRegClass(const TargetRegisterClass *RC) const {
+ if (RC == &ARM::CCRRegClass)
+ return &ARM::rGPRRegClass; // Can't copy CCR registers.
+ return RC;
+}
+
+unsigned
+ARMBaseRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
+ MachineFunction &MF) const {
+ const ARMSubtarget &STI = MF.getSubtarget<ARMSubtarget>();
+ const ARMFrameLowering *TFI = getFrameLowering(MF);
+
+ switch (RC->getID()) {
+ default:
+ return 0;
+ case ARM::tGPRRegClassID:
+ return TFI->hasFP(MF) ? 4 : 5;
+ case ARM::GPRRegClassID: {
+ unsigned FP = TFI->hasFP(MF) ? 1 : 0;
+ return 10 - FP - (STI.isR9Reserved() ? 1 : 0);
+ }
+ case ARM::SPRRegClassID: // Currently not used as 'rep' register class.
+ case ARM::DPRRegClassID:
+ return 32 - 10;
+ }
+}
+
+// Get the other register in a GPRPair.
+static unsigned getPairedGPR(unsigned Reg, bool Odd, const MCRegisterInfo *RI) {
+ for (MCSuperRegIterator Supers(Reg, RI); Supers.isValid(); ++Supers)
+ if (ARM::GPRPairRegClass.contains(*Supers))
+ return RI->getSubReg(*Supers, Odd ? ARM::gsub_1 : ARM::gsub_0);
+ return 0;
+}
+
+// Resolve the RegPairEven / RegPairOdd register allocator hints.
+void
+ARMBaseRegisterInfo::getRegAllocationHints(unsigned VirtReg,
+ ArrayRef<MCPhysReg> Order,
+ SmallVectorImpl<MCPhysReg> &Hints,
+ const MachineFunction &MF,
+ const VirtRegMap *VRM,
+ const LiveRegMatrix *Matrix) const {
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+ std::pair<unsigned, unsigned> Hint = MRI.getRegAllocationHint(VirtReg);
+
+ unsigned Odd;
+ switch (Hint.first) {
+ case ARMRI::RegPairEven:
+ Odd = 0;
+ break;
+ case ARMRI::RegPairOdd:
+ Odd = 1;
+ break;
+ default:
+ TargetRegisterInfo::getRegAllocationHints(VirtReg, Order, Hints, MF, VRM);
+ return;
+ }
+
+ // This register should preferably be even (Odd == 0) or odd (Odd == 1).
+ // Check if the other part of the pair has already been assigned, and provide
+ // the paired register as the first hint.
+ unsigned Paired = Hint.second;
+ if (Paired == 0)
+ return;
+
+ unsigned PairedPhys = 0;
+ if (TargetRegisterInfo::isPhysicalRegister(Paired)) {
+ PairedPhys = Paired;
+ } else if (VRM && VRM->hasPhys(Paired)) {
+ PairedPhys = getPairedGPR(VRM->getPhys(Paired), Odd, this);
+ }
+
+ // First prefer the paired physreg.
+ if (PairedPhys &&
+ std::find(Order.begin(), Order.end(), PairedPhys) != Order.end())
+ Hints.push_back(PairedPhys);
+
+ // Then prefer even or odd registers.
+ for (unsigned I = 0, E = Order.size(); I != E; ++I) {
+ unsigned Reg = Order[I];
+ if (Reg == PairedPhys || (getEncodingValue(Reg) & 1) != Odd)
+ continue;
+ // Don't provide hints that are paired to a reserved register.
+ unsigned Paired = getPairedGPR(Reg, !Odd, this);
+ if (!Paired || MRI.isReserved(Paired))
+ continue;
+ Hints.push_back(Reg);
+ }
+}
+
+void
+ARMBaseRegisterInfo::updateRegAllocHint(unsigned Reg, unsigned NewReg,
+ MachineFunction &MF) const {
+ MachineRegisterInfo *MRI = &MF.getRegInfo();
+ std::pair<unsigned, unsigned> Hint = MRI->getRegAllocationHint(Reg);
+ if ((Hint.first == (unsigned)ARMRI::RegPairOdd ||
+ Hint.first == (unsigned)ARMRI::RegPairEven) &&
+ TargetRegisterInfo::isVirtualRegister(Hint.second)) {
+ // If 'Reg' is one of the even / odd register pair and it's now changed
+ // (e.g. coalesced) into a different register. The other register of the
+ // pair allocation hint must be updated to reflect the relationship
+ // change.
+ unsigned OtherReg = Hint.second;
+ Hint = MRI->getRegAllocationHint(OtherReg);
+ // Make sure the pair has not already divorced.
+ if (Hint.second == Reg) {
+ MRI->setRegAllocationHint(OtherReg, Hint.first, NewReg);
+ if (TargetRegisterInfo::isVirtualRegister(NewReg))
+ MRI->setRegAllocationHint(NewReg,
+ Hint.first == (unsigned)ARMRI::RegPairOdd ? ARMRI::RegPairEven
+ : ARMRI::RegPairOdd, OtherReg);
+ }
+ }
+}
+
+bool ARMBaseRegisterInfo::hasBasePointer(const MachineFunction &MF) const {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ const ARMFrameLowering *TFI = getFrameLowering(MF);
+
+ // When outgoing call frames are so large that we adjust the stack pointer
+ // around the call, we can no longer use the stack pointer to reach the
+ // emergency spill slot.
+ if (needsStackRealignment(MF) && !TFI->hasReservedCallFrame(MF))
+ return true;
+
+ // Thumb has trouble with negative offsets from the FP. Thumb2 has a limited
+ // negative range for ldr/str (255), and thumb1 is positive offsets only.
+ // It's going to be better to use the SP or Base Pointer instead. When there
+ // are variable sized objects, we can't reference off of the SP, so we
+ // reserve a Base Pointer.
+ if (AFI->isThumbFunction() && MFI->hasVarSizedObjects()) {
+ // Conservatively estimate whether the negative offset from the frame
+ // pointer will be sufficient to reach. If a function has a smallish
+ // frame, it's less likely to have lots of spills and callee saved
+ // space, so it's all more likely to be within range of the frame pointer.
+ // If it's wrong, the scavenger will still enable access to work, it just
+ // won't be optimal.
+ if (AFI->isThumb2Function() && MFI->getLocalFrameSize() < 128)
+ return false;
+ return true;
+ }
+
+ return false;
+}
+
+bool ARMBaseRegisterInfo::canRealignStack(const MachineFunction &MF) const {
+ const MachineRegisterInfo *MRI = &MF.getRegInfo();
+ const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ const ARMFrameLowering *TFI = getFrameLowering(MF);
+ // We can't realign the stack if:
+ // 1. Dynamic stack realignment is explicitly disabled,
+ // 2. This is a Thumb1 function (it's not useful, so we don't bother), or
+ // 3. There are VLAs in the function and the base pointer is disabled.
+ if (!TargetRegisterInfo::canRealignStack(MF))
+ return false;
+ if (AFI->isThumb1OnlyFunction())
+ return false;
+ // Stack realignment requires a frame pointer. If we already started
+ // register allocation with frame pointer elimination, it is too late now.
+ if (!MRI->canReserveReg(getFramePointerReg(MF.getSubtarget<ARMSubtarget>())))
+ return false;
+ // We may also need a base pointer if there are dynamic allocas or stack
+ // pointer adjustments around calls.
+ if (TFI->hasReservedCallFrame(MF))
+ return true;
+ // A base pointer is required and allowed. Check that it isn't too late to
+ // reserve it.
+ return MRI->canReserveReg(BasePtr);
+}
+
+bool ARMBaseRegisterInfo::
+cannotEliminateFrame(const MachineFunction &MF) const {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ if (MF.getTarget().Options.DisableFramePointerElim(MF) && MFI->adjustsStack())
+ return true;
+ return MFI->hasVarSizedObjects() || MFI->isFrameAddressTaken()
+ || needsStackRealignment(MF);
+}
+
+unsigned
+ARMBaseRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
+ const ARMSubtarget &STI = MF.getSubtarget<ARMSubtarget>();
+ const ARMFrameLowering *TFI = getFrameLowering(MF);
+
+ if (TFI->hasFP(MF))
+ return getFramePointerReg(STI);
+ return ARM::SP;
+}
+
+/// emitLoadConstPool - Emits a load from constpool to materialize the
+/// specified immediate.
+void ARMBaseRegisterInfo::
+emitLoadConstPool(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI,
+ DebugLoc dl,
+ unsigned DestReg, unsigned SubIdx, int Val,
+ ARMCC::CondCodes Pred,
+ unsigned PredReg, unsigned MIFlags) const {
+ MachineFunction &MF = *MBB.getParent();
+ const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
+ MachineConstantPool *ConstantPool = MF.getConstantPool();
+ const Constant *C =
+ ConstantInt::get(Type::getInt32Ty(MF.getFunction()->getContext()), Val);
+ unsigned Idx = ConstantPool->getConstantPoolIndex(C, 4);
+
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::LDRcp))
+ .addReg(DestReg, getDefRegState(true), SubIdx)
+ .addConstantPoolIndex(Idx)
+ .addImm(0).addImm(Pred).addReg(PredReg)
+ .setMIFlags(MIFlags);
+}
+
+bool ARMBaseRegisterInfo::
+requiresRegisterScavenging(const MachineFunction &MF) const {
+ return true;
+}
+
+bool ARMBaseRegisterInfo::
+trackLivenessAfterRegAlloc(const MachineFunction &MF) const {
+ return true;
+}
+
+bool ARMBaseRegisterInfo::
+requiresFrameIndexScavenging(const MachineFunction &MF) const {
+ return true;
+}
+
+bool ARMBaseRegisterInfo::
+requiresVirtualBaseRegisters(const MachineFunction &MF) const {
+ return true;
+}
+
+int64_t ARMBaseRegisterInfo::
+getFrameIndexInstrOffset(const MachineInstr *MI, int Idx) const {
+ const MCInstrDesc &Desc = MI->getDesc();
+ unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask);
+ int64_t InstrOffs = 0;
+ int Scale = 1;
+ unsigned ImmIdx = 0;
+ switch (AddrMode) {
+ case ARMII::AddrModeT2_i8:
+ case ARMII::AddrModeT2_i12:
+ case ARMII::AddrMode_i12:
+ InstrOffs = MI->getOperand(Idx+1).getImm();
+ Scale = 1;
+ break;
+ case ARMII::AddrMode5: {
+ // VFP address mode.
+ const MachineOperand &OffOp = MI->getOperand(Idx+1);
+ InstrOffs = ARM_AM::getAM5Offset(OffOp.getImm());
+ if (ARM_AM::getAM5Op(OffOp.getImm()) == ARM_AM::sub)
+ InstrOffs = -InstrOffs;
+ Scale = 4;
+ break;
+ }
+ case ARMII::AddrMode2: {
+ ImmIdx = Idx+2;
+ InstrOffs = ARM_AM::getAM2Offset(MI->getOperand(ImmIdx).getImm());
+ if (ARM_AM::getAM2Op(MI->getOperand(ImmIdx).getImm()) == ARM_AM::sub)
+ InstrOffs = -InstrOffs;
+ break;
+ }
+ case ARMII::AddrMode3: {
+ ImmIdx = Idx+2;
+ InstrOffs = ARM_AM::getAM3Offset(MI->getOperand(ImmIdx).getImm());
+ if (ARM_AM::getAM3Op(MI->getOperand(ImmIdx).getImm()) == ARM_AM::sub)
+ InstrOffs = -InstrOffs;
+ break;
+ }
+ case ARMII::AddrModeT1_s: {
+ ImmIdx = Idx+1;
+ InstrOffs = MI->getOperand(ImmIdx).getImm();
+ Scale = 4;
+ break;
+ }
+ default:
+ llvm_unreachable("Unsupported addressing mode!");
+ }
+
+ return InstrOffs * Scale;
+}
+
+/// needsFrameBaseReg - Returns true if the instruction's frame index
+/// reference would be better served by a base register other than FP
+/// or SP. Used by LocalStackFrameAllocation to determine which frame index
+/// references it should create new base registers for.
+bool ARMBaseRegisterInfo::
+needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const {
+ for (unsigned i = 0; !MI->getOperand(i).isFI(); ++i) {
+ assert(i < MI->getNumOperands() &&"Instr doesn't have FrameIndex operand!");
+ }
+
+ // It's the load/store FI references that cause issues, as it can be difficult
+ // to materialize the offset if it won't fit in the literal field. Estimate
+ // based on the size of the local frame and some conservative assumptions
+ // about the rest of the stack frame (note, this is pre-regalloc, so
+ // we don't know everything for certain yet) whether this offset is likely
+ // to be out of range of the immediate. Return true if so.
+
+ // We only generate virtual base registers for loads and stores, so
+ // return false for everything else.
+ unsigned Opc = MI->getOpcode();
+ switch (Opc) {
+ case ARM::LDRi12: case ARM::LDRH: case ARM::LDRBi12:
+ case ARM::STRi12: case ARM::STRH: case ARM::STRBi12:
+ case ARM::t2LDRi12: case ARM::t2LDRi8:
+ case ARM::t2STRi12: case ARM::t2STRi8:
+ case ARM::VLDRS: case ARM::VLDRD:
+ case ARM::VSTRS: case ARM::VSTRD:
+ case ARM::tSTRspi: case ARM::tLDRspi:
+ break;
+ default:
+ return false;
+ }
+
+ // Without a virtual base register, if the function has variable sized
+ // objects, all fixed-size local references will be via the frame pointer,
+ // Approximate the offset and see if it's legal for the instruction.
+ // Note that the incoming offset is based on the SP value at function entry,
+ // so it'll be negative.
+ MachineFunction &MF = *MI->getParent()->getParent();
+ const ARMFrameLowering *TFI = getFrameLowering(MF);
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+
+ // Estimate an offset from the frame pointer.
+ // Conservatively assume all callee-saved registers get pushed. R4-R6
+ // will be earlier than the FP, so we ignore those.
+ // R7, LR
+ int64_t FPOffset = Offset - 8;
+ // ARM and Thumb2 functions also need to consider R8-R11 and D8-D15
+ if (!AFI->isThumbFunction() || !AFI->isThumb1OnlyFunction())
+ FPOffset -= 80;
+ // Estimate an offset from the stack pointer.
+ // The incoming offset is relating to the SP at the start of the function,
+ // but when we access the local it'll be relative to the SP after local
+ // allocation, so adjust our SP-relative offset by that allocation size.
+ Offset += MFI->getLocalFrameSize();
+ // Assume that we'll have at least some spill slots allocated.
+ // FIXME: This is a total SWAG number. We should run some statistics
+ // and pick a real one.
+ Offset += 128; // 128 bytes of spill slots
+
+ // If there's a frame pointer and the addressing mode allows it, try using it.
+ // The FP is only available if there is no dynamic realignment. We
+ // don't know for sure yet whether we'll need that, so we guess based
+ // on whether there are any local variables that would trigger it.
+ unsigned StackAlign = TFI->getStackAlignment();
+ if (TFI->hasFP(MF) &&
+ !((MFI->getLocalFrameMaxAlign() > StackAlign) && canRealignStack(MF))) {
+ if (isFrameOffsetLegal(MI, getFrameRegister(MF), FPOffset))
+ return false;
+ }
+ // If we can reference via the stack pointer, try that.
+ // FIXME: This (and the code that resolves the references) can be improved
+ // to only disallow SP relative references in the live range of
+ // the VLA(s). In practice, it's unclear how much difference that
+ // would make, but it may be worth doing.
+ if (!MFI->hasVarSizedObjects() && isFrameOffsetLegal(MI, ARM::SP, Offset))
+ return false;
+
+ // The offset likely isn't legal, we want to allocate a virtual base register.
+ return true;
+}
+
+/// materializeFrameBaseRegister - Insert defining instruction(s) for BaseReg to
+/// be a pointer to FrameIdx at the beginning of the basic block.
+void ARMBaseRegisterInfo::
+materializeFrameBaseRegister(MachineBasicBlock *MBB,
+ unsigned BaseReg, int FrameIdx,
+ int64_t Offset) const {
+ ARMFunctionInfo *AFI = MBB->getParent()->getInfo<ARMFunctionInfo>();
+ unsigned ADDriOpc = !AFI->isThumbFunction() ? ARM::ADDri :
+ (AFI->isThumb1OnlyFunction() ? ARM::tADDframe : ARM::t2ADDri);
+
+ MachineBasicBlock::iterator Ins = MBB->begin();
+ DebugLoc DL; // Defaults to "unknown"
+ if (Ins != MBB->end())
+ DL = Ins->getDebugLoc();
+
+ const MachineFunction &MF = *MBB->getParent();
+ MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
+ const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
+ const MCInstrDesc &MCID = TII.get(ADDriOpc);
+ MRI.constrainRegClass(BaseReg, TII.getRegClass(MCID, 0, this, MF));
+
+ MachineInstrBuilder MIB = BuildMI(*MBB, Ins, DL, MCID, BaseReg)
+ .addFrameIndex(FrameIdx).addImm(Offset);
+
+ if (!AFI->isThumb1OnlyFunction())
+ AddDefaultCC(AddDefaultPred(MIB));
+}
+
+void ARMBaseRegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg,
+ int64_t Offset) const {
+ MachineBasicBlock &MBB = *MI.getParent();
+ MachineFunction &MF = *MBB.getParent();
+ const ARMBaseInstrInfo &TII =
+ *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ int Off = Offset; // ARM doesn't need the general 64-bit offsets
+ unsigned i = 0;
+
+ assert(!AFI->isThumb1OnlyFunction() &&
+ "This resolveFrameIndex does not support Thumb1!");
+
+ while (!MI.getOperand(i).isFI()) {
+ ++i;
+ assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!");
+ }
+ bool Done = false;
+ if (!AFI->isThumbFunction())
+ Done = rewriteARMFrameIndex(MI, i, BaseReg, Off, TII);
+ else {
+ assert(AFI->isThumb2Function());
+ Done = rewriteT2FrameIndex(MI, i, BaseReg, Off, TII);
+ }
+ assert (Done && "Unable to resolve frame index!");
+ (void)Done;
+}
+
+bool ARMBaseRegisterInfo::isFrameOffsetLegal(const MachineInstr *MI, unsigned BaseReg,
+ int64_t Offset) const {
+ const MCInstrDesc &Desc = MI->getDesc();
+ unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask);
+ unsigned i = 0;
+
+ while (!MI->getOperand(i).isFI()) {
+ ++i;
+ assert(i < MI->getNumOperands() &&"Instr doesn't have FrameIndex operand!");
+ }
+
+ // AddrMode4 and AddrMode6 cannot handle any offset.
+ if (AddrMode == ARMII::AddrMode4 || AddrMode == ARMII::AddrMode6)
+ return Offset == 0;
+
+ unsigned NumBits = 0;
+ unsigned Scale = 1;
+ bool isSigned = true;
+ switch (AddrMode) {
+ case ARMII::AddrModeT2_i8:
+ case ARMII::AddrModeT2_i12:
+ // i8 supports only negative, and i12 supports only positive, so
+ // based on Offset sign, consider the appropriate instruction
+ Scale = 1;
+ if (Offset < 0) {
+ NumBits = 8;
+ Offset = -Offset;
+ } else {
+ NumBits = 12;
+ }
+ break;
+ case ARMII::AddrMode5:
+ // VFP address mode.
+ NumBits = 8;
+ Scale = 4;
+ break;
+ case ARMII::AddrMode_i12:
+ case ARMII::AddrMode2:
+ NumBits = 12;
+ break;
+ case ARMII::AddrMode3:
+ NumBits = 8;
+ break;
+ case ARMII::AddrModeT1_s:
+ NumBits = (BaseReg == ARM::SP ? 8 : 5);
+ Scale = 4;
+ isSigned = false;
+ break;
+ default:
+ llvm_unreachable("Unsupported addressing mode!");
+ }
+
+ Offset += getFrameIndexInstrOffset(MI, i);
+ // Make sure the offset is encodable for instructions that scale the
+ // immediate.
+ if ((Offset & (Scale-1)) != 0)
+ return false;
+
+ if (isSigned && Offset < 0)
+ Offset = -Offset;
+
+ unsigned Mask = (1 << NumBits) - 1;
+ if ((unsigned)Offset <= Mask * Scale)
+ return true;
+
+ return false;
+}
+
+void
+ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
+ int SPAdj, unsigned FIOperandNum,
+ RegScavenger *RS) const {
+ MachineInstr &MI = *II;
+ MachineBasicBlock &MBB = *MI.getParent();
+ MachineFunction &MF = *MBB.getParent();
+ const ARMBaseInstrInfo &TII =
+ *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
+ const ARMFrameLowering *TFI = getFrameLowering(MF);
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ assert(!AFI->isThumb1OnlyFunction() &&
+ "This eliminateFrameIndex does not support Thumb1!");
+ int FrameIndex = MI.getOperand(FIOperandNum).getIndex();
+ unsigned FrameReg;
+
+ int Offset = TFI->ResolveFrameIndexReference(MF, FrameIndex, FrameReg, SPAdj);
+
+ // PEI::scavengeFrameVirtualRegs() cannot accurately track SPAdj because the
+ // call frame setup/destroy instructions have already been eliminated. That
+ // means the stack pointer cannot be used to access the emergency spill slot
+ // when !hasReservedCallFrame().
+#ifndef NDEBUG
+ if (RS && FrameReg == ARM::SP && RS->isScavengingFrameIndex(FrameIndex)){
+ assert(TFI->hasReservedCallFrame(MF) &&
+ "Cannot use SP to access the emergency spill slot in "
+ "functions without a reserved call frame");
+ assert(!MF.getFrameInfo()->hasVarSizedObjects() &&
+ "Cannot use SP to access the emergency spill slot in "
+ "functions with variable sized frame objects");
+ }
+#endif // NDEBUG
+
+ assert(!MI.isDebugValue() && "DBG_VALUEs should be handled in target-independent code");
+
+ // Modify MI as necessary to handle as much of 'Offset' as possible
+ bool Done = false;
+ if (!AFI->isThumbFunction())
+ Done = rewriteARMFrameIndex(MI, FIOperandNum, FrameReg, Offset, TII);
+ else {
+ assert(AFI->isThumb2Function());
+ Done = rewriteT2FrameIndex(MI, FIOperandNum, FrameReg, Offset, TII);
+ }
+ if (Done)
+ return;
+
+ // If we get here, the immediate doesn't fit into the instruction. We folded
+ // as much as possible above, handle the rest, providing a register that is
+ // SP+LargeImm.
+ assert((Offset ||
+ (MI.getDesc().TSFlags & ARMII::AddrModeMask) == ARMII::AddrMode4 ||
+ (MI.getDesc().TSFlags & ARMII::AddrModeMask) == ARMII::AddrMode6) &&
+ "This code isn't needed if offset already handled!");
+
+ unsigned ScratchReg = 0;
+ int PIdx = MI.findFirstPredOperandIdx();
+ ARMCC::CondCodes Pred = (PIdx == -1)
+ ? ARMCC::AL : (ARMCC::CondCodes)MI.getOperand(PIdx).getImm();
+ unsigned PredReg = (PIdx == -1) ? 0 : MI.getOperand(PIdx+1).getReg();
+ if (Offset == 0)
+ // Must be addrmode4/6.
+ MI.getOperand(FIOperandNum).ChangeToRegister(FrameReg, false, false, false);
+ else {
+ ScratchReg = MF.getRegInfo().createVirtualRegister(&ARM::GPRRegClass);
+ if (!AFI->isThumbFunction())
+ emitARMRegPlusImmediate(MBB, II, MI.getDebugLoc(), ScratchReg, FrameReg,
+ Offset, Pred, PredReg, TII);
+ else {
+ assert(AFI->isThumb2Function());
+ emitT2RegPlusImmediate(MBB, II, MI.getDebugLoc(), ScratchReg, FrameReg,
+ Offset, Pred, PredReg, TII);
+ }
+ // Update the original instruction to use the scratch register.
+ MI.getOperand(FIOperandNum).ChangeToRegister(ScratchReg, false, false,true);
+ }
+}
+
+bool ARMBaseRegisterInfo::shouldCoalesce(MachineInstr *MI,
+ const TargetRegisterClass *SrcRC,
+ unsigned SubReg,
+ const TargetRegisterClass *DstRC,
+ unsigned DstSubReg,
+ const TargetRegisterClass *NewRC) const {
+ auto MBB = MI->getParent();
+ auto MF = MBB->getParent();
+ const MachineRegisterInfo &MRI = MF->getRegInfo();
+ // If not copying into a sub-register this should be ok because we shouldn't
+ // need to split the reg.
+ if (!DstSubReg)
+ return true;
+ // Small registers don't frequently cause a problem, so we can coalesce them.
+ if (NewRC->getSize() < 32 && DstRC->getSize() < 32 && SrcRC->getSize() < 32)
+ return true;
+
+ auto NewRCWeight =
+ MRI.getTargetRegisterInfo()->getRegClassWeight(NewRC);
+ auto SrcRCWeight =
+ MRI.getTargetRegisterInfo()->getRegClassWeight(SrcRC);
+ auto DstRCWeight =
+ MRI.getTargetRegisterInfo()->getRegClassWeight(DstRC);
+ // If the source register class is more expensive than the destination, the
+ // coalescing is probably profitable.
+ if (SrcRCWeight.RegWeight > NewRCWeight.RegWeight)
+ return true;
+ if (DstRCWeight.RegWeight > NewRCWeight.RegWeight)
+ return true;
+
+ // If the register allocator isn't constrained, we can always allow coalescing
+ // unfortunately we don't know yet if we will be constrained.
+ // The goal of this heuristic is to restrict how many expensive registers
+ // we allow to coalesce in a given basic block.
+ auto AFI = MF->getInfo<ARMFunctionInfo>();
+ auto It = AFI->getCoalescedWeight(MBB);
+
+ DEBUG(dbgs() << "\tARM::shouldCoalesce - Coalesced Weight: "
+ << It->second << "\n");
+ DEBUG(dbgs() << "\tARM::shouldCoalesce - Reg Weight: "
+ << NewRCWeight.RegWeight << "\n");
+
+ // This number is the largest round number that which meets the criteria:
+ // (1) addresses PR18825
+ // (2) generates better code in some test cases (like vldm-shed-a9.ll)
+ // (3) Doesn't regress any test cases (in-tree, test-suite, and SPEC)
+ // In practice the SizeMultiplier will only factor in for straight line code
+ // that uses a lot of NEON vectors, which isn't terribly common.
+ unsigned SizeMultiplier = MBB->size()/100;
+ SizeMultiplier = SizeMultiplier ? SizeMultiplier : 1;
+ if (It->second < NewRCWeight.WeightLimit * SizeMultiplier) {
+ It->second += NewRCWeight.RegWeight;
+ return true;
+ }
+ return false;
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h b/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h
new file mode 100644
index 0000000..cea8b80
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h
@@ -0,0 +1,191 @@
+//===-- ARMBaseRegisterInfo.h - ARM Register Information Impl ---*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the base ARM implementation of TargetRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_ARM_ARMBASEREGISTERINFO_H
+#define LLVM_LIB_TARGET_ARM_ARMBASEREGISTERINFO_H
+
+#include "MCTargetDesc/ARMBaseInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+#define GET_REGINFO_HEADER
+#include "ARMGenRegisterInfo.inc"
+
+namespace llvm {
+/// Register allocation hints.
+namespace ARMRI {
+ enum {
+ RegPairOdd = 1,
+ RegPairEven = 2
+ };
+}
+
+/// isARMArea1Register - Returns true if the register is a low register (r0-r7)
+/// or a stack/pc register that we should push/pop.
+static inline bool isARMArea1Register(unsigned Reg, bool isIOS) {
+ using namespace ARM;
+ switch (Reg) {
+ case R0: case R1: case R2: case R3:
+ case R4: case R5: case R6: case R7:
+ case LR: case SP: case PC:
+ return true;
+ case R8: case R9: case R10: case R11: case R12:
+ // For iOS we want r7 and lr to be next to each other.
+ return !isIOS;
+ default:
+ return false;
+ }
+}
+
+static inline bool isARMArea2Register(unsigned Reg, bool isIOS) {
+ using namespace ARM;
+ switch (Reg) {
+ case R8: case R9: case R10: case R11: case R12:
+ // iOS has this second area.
+ return isIOS;
+ default:
+ return false;
+ }
+}
+
+static inline bool isARMArea3Register(unsigned Reg, bool isIOS) {
+ using namespace ARM;
+ switch (Reg) {
+ case D15: case D14: case D13: case D12:
+ case D11: case D10: case D9: case D8:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static inline bool isCalleeSavedRegister(unsigned Reg,
+ const MCPhysReg *CSRegs) {
+ for (unsigned i = 0; CSRegs[i]; ++i)
+ if (Reg == CSRegs[i])
+ return true;
+ return false;
+}
+
+class ARMBaseRegisterInfo : public ARMGenRegisterInfo {
+protected:
+ /// BasePtr - ARM physical register used as a base ptr in complex stack
+ /// frames. I.e., when we need a 3rd base, not just SP and FP, due to
+ /// variable size stack objects.
+ unsigned BasePtr;
+
+ // Can be only subclassed.
+ explicit ARMBaseRegisterInfo();
+
+ // Return the opcode that implements 'Op', or 0 if no opcode
+ unsigned getOpcode(int Op) const;
+
+public:
+ /// Code Generation virtual methods...
+ const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override;
+ const uint32_t *getCallPreservedMask(const MachineFunction &MF,
+ CallingConv::ID) const override;
+ const uint32_t *getNoPreservedMask() const override;
+
+ /// getThisReturnPreservedMask - Returns a call preserved mask specific to the
+ /// case that 'returned' is on an i32 first argument if the calling convention
+ /// is one that can (partially) model this attribute with a preserved mask
+ /// (i.e. it is a calling convention that uses the same register for the first
+ /// i32 argument and an i32 return value)
+ ///
+ /// Should return NULL in the case that the calling convention does not have
+ /// this property
+ const uint32_t *getThisReturnPreservedMask(const MachineFunction &MF,
+ CallingConv::ID) const;
+
+ BitVector getReservedRegs(const MachineFunction &MF) const override;
+
+ const TargetRegisterClass *
+ getPointerRegClass(const MachineFunction &MF,
+ unsigned Kind = 0) const override;
+ const TargetRegisterClass *
+ getCrossCopyRegClass(const TargetRegisterClass *RC) const override;
+
+ const TargetRegisterClass *
+ getLargestLegalSuperClass(const TargetRegisterClass *RC,
+ const MachineFunction &MF) const override;
+
+ unsigned getRegPressureLimit(const TargetRegisterClass *RC,
+ MachineFunction &MF) const override;
+
+ void getRegAllocationHints(unsigned VirtReg,
+ ArrayRef<MCPhysReg> Order,
+ SmallVectorImpl<MCPhysReg> &Hints,
+ const MachineFunction &MF,
+ const VirtRegMap *VRM,
+ const LiveRegMatrix *Matrix) const override;
+
+ void updateRegAllocHint(unsigned Reg, unsigned NewReg,
+ MachineFunction &MF) const override;
+
+ bool hasBasePointer(const MachineFunction &MF) const;
+
+ bool canRealignStack(const MachineFunction &MF) const override;
+ int64_t getFrameIndexInstrOffset(const MachineInstr *MI,
+ int Idx) const override;
+ bool needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const override;
+ void materializeFrameBaseRegister(MachineBasicBlock *MBB,
+ unsigned BaseReg, int FrameIdx,
+ int64_t Offset) const override;
+ void resolveFrameIndex(MachineInstr &MI, unsigned BaseReg,
+ int64_t Offset) const override;
+ bool isFrameOffsetLegal(const MachineInstr *MI, unsigned BaseReg,
+ int64_t Offset) const override;
+
+ bool cannotEliminateFrame(const MachineFunction &MF) const;
+
+ // Debug information queries.
+ unsigned getFrameRegister(const MachineFunction &MF) const override;
+ unsigned getBaseRegister() const { return BasePtr; }
+
+ bool isLowRegister(unsigned Reg) const;
+
+
+ /// emitLoadConstPool - Emits a load from constpool to materialize the
+ /// specified immediate.
+ virtual void emitLoadConstPool(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI,
+ DebugLoc dl, unsigned DestReg, unsigned SubIdx,
+ int Val, ARMCC::CondCodes Pred = ARMCC::AL,
+ unsigned PredReg = 0,
+ unsigned MIFlags = MachineInstr::NoFlags)const;
+
+ /// Code Generation virtual methods...
+ bool requiresRegisterScavenging(const MachineFunction &MF) const override;
+
+ bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const override;
+
+ bool requiresFrameIndexScavenging(const MachineFunction &MF) const override;
+
+ bool requiresVirtualBaseRegisters(const MachineFunction &MF) const override;
+
+ void eliminateFrameIndex(MachineBasicBlock::iterator II,
+ int SPAdj, unsigned FIOperandNum,
+ RegScavenger *RS = nullptr) const override;
+
+ /// \brief SrcRC and DstRC will be morphed into NewRC if this returns true
+ bool shouldCoalesce(MachineInstr *MI,
+ const TargetRegisterClass *SrcRC,
+ unsigned SubReg,
+ const TargetRegisterClass *DstRC,
+ unsigned DstSubReg,
+ const TargetRegisterClass *NewRC) const override;
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/ARM/ARMCallingConv.h b/contrib/llvm/lib/Target/ARM/ARMCallingConv.h
new file mode 100644
index 0000000..a731d00
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMCallingConv.h
@@ -0,0 +1,288 @@
+//=== ARMCallingConv.h - ARM Custom Calling Convention Routines -*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the custom routines for the ARM Calling Convention that
+// aren't done by tablegen.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_ARM_ARMCALLINGCONV_H
+#define LLVM_LIB_TARGET_ARM_ARMCALLINGCONV_H
+
+#include "ARM.h"
+#include "ARMBaseInstrInfo.h"
+#include "ARMSubtarget.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/Target/TargetInstrInfo.h"
+
+namespace llvm {
+
+// APCS f64 is in register pairs, possibly split to stack
+static bool f64AssignAPCS(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ CCState &State, bool CanFail) {
+ static const MCPhysReg RegList[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3 };
+
+ // Try to get the first register.
+ if (unsigned Reg = State.AllocateReg(RegList))
+ State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+ else {
+ // For the 2nd half of a v2f64, do not fail.
+ if (CanFail)
+ return false;
+
+ // Put the whole thing on the stack.
+ State.addLoc(CCValAssign::getCustomMem(ValNo, ValVT,
+ State.AllocateStack(8, 4),
+ LocVT, LocInfo));
+ return true;
+ }
+
+ // Try to get the second register.
+ if (unsigned Reg = State.AllocateReg(RegList))
+ State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+ else
+ State.addLoc(CCValAssign::getCustomMem(ValNo, ValVT,
+ State.AllocateStack(4, 4),
+ LocVT, LocInfo));
+ return true;
+}
+
+static bool CC_ARM_APCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags,
+ CCState &State) {
+ if (!f64AssignAPCS(ValNo, ValVT, LocVT, LocInfo, State, true))
+ return false;
+ if (LocVT == MVT::v2f64 &&
+ !f64AssignAPCS(ValNo, ValVT, LocVT, LocInfo, State, false))
+ return false;
+ return true; // we handled it
+}
+
+// AAPCS f64 is in aligned register pairs
+static bool f64AssignAAPCS(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ CCState &State, bool CanFail) {
+ static const MCPhysReg HiRegList[] = { ARM::R0, ARM::R2 };
+ static const MCPhysReg LoRegList[] = { ARM::R1, ARM::R3 };
+ static const MCPhysReg ShadowRegList[] = { ARM::R0, ARM::R1 };
+ static const MCPhysReg GPRArgRegs[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3 };
+
+ unsigned Reg = State.AllocateReg(HiRegList, ShadowRegList);
+ if (Reg == 0) {
+
+ // If we had R3 unallocated only, now we still must to waste it.
+ Reg = State.AllocateReg(GPRArgRegs);
+ assert((!Reg || Reg == ARM::R3) && "Wrong GPRs usage for f64");
+
+ // For the 2nd half of a v2f64, do not just fail.
+ if (CanFail)
+ return false;
+
+ // Put the whole thing on the stack.
+ State.addLoc(CCValAssign::getCustomMem(ValNo, ValVT,
+ State.AllocateStack(8, 8),
+ LocVT, LocInfo));
+ return true;
+ }
+
+ unsigned i;
+ for (i = 0; i < 2; ++i)
+ if (HiRegList[i] == Reg)
+ break;
+
+ unsigned T = State.AllocateReg(LoRegList[i]);
+ (void)T;
+ assert(T == LoRegList[i] && "Could not allocate register");
+
+ State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+ State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, LoRegList[i],
+ LocVT, LocInfo));
+ return true;
+}
+
+static bool CC_ARM_AAPCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags,
+ CCState &State) {
+ if (!f64AssignAAPCS(ValNo, ValVT, LocVT, LocInfo, State, true))
+ return false;
+ if (LocVT == MVT::v2f64 &&
+ !f64AssignAAPCS(ValNo, ValVT, LocVT, LocInfo, State, false))
+ return false;
+ return true; // we handled it
+}
+
+static bool f64RetAssign(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo, CCState &State) {
+ static const MCPhysReg HiRegList[] = { ARM::R0, ARM::R2 };
+ static const MCPhysReg LoRegList[] = { ARM::R1, ARM::R3 };
+
+ unsigned Reg = State.AllocateReg(HiRegList, LoRegList);
+ if (Reg == 0)
+ return false; // we didn't handle it
+
+ unsigned i;
+ for (i = 0; i < 2; ++i)
+ if (HiRegList[i] == Reg)
+ break;
+
+ State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+ State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, LoRegList[i],
+ LocVT, LocInfo));
+ return true;
+}
+
+static bool RetCC_ARM_APCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags,
+ CCState &State) {
+ if (!f64RetAssign(ValNo, ValVT, LocVT, LocInfo, State))
+ return false;
+ if (LocVT == MVT::v2f64 && !f64RetAssign(ValNo, ValVT, LocVT, LocInfo, State))
+ return false;
+ return true; // we handled it
+}
+
+static bool RetCC_ARM_AAPCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags,
+ CCState &State) {
+ return RetCC_ARM_APCS_Custom_f64(ValNo, ValVT, LocVT, LocInfo, ArgFlags,
+ State);
+}
+
+static const MCPhysReg RRegList[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3 };
+
+static const MCPhysReg SRegList[] = { ARM::S0, ARM::S1, ARM::S2, ARM::S3,
+ ARM::S4, ARM::S5, ARM::S6, ARM::S7,
+ ARM::S8, ARM::S9, ARM::S10, ARM::S11,
+ ARM::S12, ARM::S13, ARM::S14, ARM::S15 };
+static const MCPhysReg DRegList[] = { ARM::D0, ARM::D1, ARM::D2, ARM::D3,
+ ARM::D4, ARM::D5, ARM::D6, ARM::D7 };
+static const MCPhysReg QRegList[] = { ARM::Q0, ARM::Q1, ARM::Q2, ARM::Q3 };
+
+
+// Allocate part of an AAPCS HFA or HVA. We assume that each member of the HA
+// has InConsecutiveRegs set, and that the last member also has
+// InConsecutiveRegsLast set. We must process all members of the HA before
+// we can allocate it, as we need to know the total number of registers that
+// will be needed in order to (attempt to) allocate a contiguous block.
+static bool CC_ARM_AAPCS_Custom_Aggregate(unsigned &ValNo, MVT &ValVT,
+ MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags,
+ CCState &State) {
+ SmallVectorImpl<CCValAssign> &PendingMembers = State.getPendingLocs();
+
+ // AAPCS HFAs must have 1-4 elements, all of the same type
+ if (PendingMembers.size() > 0)
+ assert(PendingMembers[0].getLocVT() == LocVT);
+
+ // Add the argument to the list to be allocated once we know the size of the
+ // aggregate. Store the type's required alignmnent as extra info for later: in
+ // the [N x i64] case all trace has been removed by the time we actually get
+ // to do allocation.
+ PendingMembers.push_back(CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo,
+ ArgFlags.getOrigAlign()));
+
+ if (!ArgFlags.isInConsecutiveRegsLast())
+ return true;
+
+ // Try to allocate a contiguous block of registers, each of the correct
+ // size to hold one member.
+ auto &DL = State.getMachineFunction().getDataLayout();
+ unsigned StackAlign = DL.getStackAlignment();
+ unsigned Align = std::min(PendingMembers[0].getExtraInfo(), StackAlign);
+
+ ArrayRef<MCPhysReg> RegList;
+ switch (LocVT.SimpleTy) {
+ case MVT::i32: {
+ RegList = RRegList;
+ unsigned RegIdx = State.getFirstUnallocated(RegList);
+
+ // First consume all registers that would give an unaligned object. Whether
+ // we go on stack or in regs, no-one will be using them in future.
+ unsigned RegAlign = RoundUpToAlignment(Align, 4) / 4;
+ while (RegIdx % RegAlign != 0 && RegIdx < RegList.size())
+ State.AllocateReg(RegList[RegIdx++]);
+
+ break;
+ }
+ case MVT::f32:
+ RegList = SRegList;
+ break;
+ case MVT::f64:
+ RegList = DRegList;
+ break;
+ case MVT::v2f64:
+ RegList = QRegList;
+ break;
+ default:
+ llvm_unreachable("Unexpected member type for block aggregate");
+ break;
+ }
+
+ unsigned RegResult = State.AllocateRegBlock(RegList, PendingMembers.size());
+ if (RegResult) {
+ for (SmallVectorImpl<CCValAssign>::iterator It = PendingMembers.begin();
+ It != PendingMembers.end(); ++It) {
+ It->convertToReg(RegResult);
+ State.addLoc(*It);
+ ++RegResult;
+ }
+ PendingMembers.clear();
+ return true;
+ }
+
+ // Register allocation failed, we'll be needing the stack
+ unsigned Size = LocVT.getSizeInBits() / 8;
+ if (LocVT == MVT::i32 && State.getNextStackOffset() == 0) {
+ // If nothing else has used the stack until this point, a non-HFA aggregate
+ // can be split between regs and stack.
+ unsigned RegIdx = State.getFirstUnallocated(RegList);
+ for (auto &It : PendingMembers) {
+ if (RegIdx >= RegList.size())
+ It.convertToMem(State.AllocateStack(Size, Size));
+ else
+ It.convertToReg(State.AllocateReg(RegList[RegIdx++]));
+
+ State.addLoc(It);
+ }
+ PendingMembers.clear();
+ return true;
+ } else if (LocVT != MVT::i32)
+ RegList = SRegList;
+
+ // Mark all regs as unavailable (AAPCS rule C.2.vfp for VFP, C.6 for core)
+ for (auto Reg : RegList)
+ State.AllocateReg(Reg);
+
+ for (auto &It : PendingMembers) {
+ It.convertToMem(State.AllocateStack(Size, Align));
+ State.addLoc(It);
+
+ // After the first item has been allocated, the rest are packed as tightly
+ // as possible. (E.g. an incoming i64 would have starting Align of 8, but
+ // we'll be allocating a bunch of i32 slots).
+ Align = Size;
+ }
+
+ // All pending members have now been allocated
+ PendingMembers.clear();
+
+ // This will be allocated by the last member of the aggregate
+ return true;
+}
+
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/ARM/ARMCallingConv.td b/contrib/llvm/lib/Target/ARM/ARMCallingConv.td
new file mode 100644
index 0000000..2335164
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMCallingConv.td
@@ -0,0 +1,246 @@
+//===-- ARMCallingConv.td - Calling Conventions for ARM ----*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This describes the calling conventions for ARM architecture.
+//===----------------------------------------------------------------------===//
+
+/// CCIfAlign - Match of the original alignment of the arg
+class CCIfAlign<string Align, CCAction A>:
+ CCIf<!strconcat("ArgFlags.getOrigAlign() == ", Align), A>;
+
+//===----------------------------------------------------------------------===//
+// ARM APCS Calling Convention
+//===----------------------------------------------------------------------===//
+def CC_ARM_APCS : CallingConv<[
+
+ // Handles byval parameters.
+ CCIfByVal<CCPassByVal<4, 4>>,
+
+ CCIfType<[i1, i8, i16], CCPromoteToType<i32>>,
+
+ // Handle all vector types as either f64 or v2f64.
+ CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType<f64>>,
+ CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType<v2f64>>,
+
+ // f64 and v2f64 are passed in adjacent GPRs, possibly split onto the stack
+ CCIfType<[f64, v2f64], CCCustom<"CC_ARM_APCS_Custom_f64">>,
+
+ CCIfType<[f32], CCBitConvertToType<i32>>,
+ CCIfType<[i32], CCAssignToReg<[R0, R1, R2, R3]>>,
+
+ CCIfType<[i32], CCAssignToStack<4, 4>>,
+ CCIfType<[f64], CCAssignToStack<8, 4>>,
+ CCIfType<[v2f64], CCAssignToStack<16, 4>>
+]>;
+
+def RetCC_ARM_APCS : CallingConv<[
+ CCIfType<[i1, i8, i16], CCPromoteToType<i32>>,
+ CCIfType<[f32], CCBitConvertToType<i32>>,
+
+ // Handle all vector types as either f64 or v2f64.
+ CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType<f64>>,
+ CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType<v2f64>>,
+
+ CCIfType<[f64, v2f64], CCCustom<"RetCC_ARM_APCS_Custom_f64">>,
+
+ CCIfType<[i32], CCAssignToReg<[R0, R1, R2, R3]>>,
+ CCIfType<[i64], CCAssignToRegWithShadow<[R0, R2], [R1, R3]>>
+]>;
+
+//===----------------------------------------------------------------------===//
+// ARM APCS Calling Convention for FastCC (when VFP2 or later is available)
+//===----------------------------------------------------------------------===//
+def FastCC_ARM_APCS : CallingConv<[
+ // Handle all vector types as either f64 or v2f64.
+ CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType<f64>>,
+ CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType<v2f64>>,
+
+ CCIfType<[v2f64], CCAssignToReg<[Q0, Q1, Q2, Q3]>>,
+ CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>,
+ CCIfType<[f32], CCAssignToReg<[S0, S1, S2, S3, S4, S5, S6, S7, S8,
+ S9, S10, S11, S12, S13, S14, S15]>>,
+
+ // CPRCs may be allocated to co-processor registers or the stack - they
+ // may never be allocated to core registers.
+ CCIfType<[f32], CCAssignToStackWithShadow<4, 4, [Q0, Q1, Q2, Q3]>>,
+ CCIfType<[f64], CCAssignToStackWithShadow<8, 4, [Q0, Q1, Q2, Q3]>>,
+ CCIfType<[v2f64], CCAssignToStackWithShadow<16, 4, [Q0, Q1, Q2, Q3]>>,
+
+ CCDelegateTo<CC_ARM_APCS>
+]>;
+
+def RetFastCC_ARM_APCS : CallingConv<[
+ // Handle all vector types as either f64 or v2f64.
+ CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType<f64>>,
+ CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType<v2f64>>,
+
+ CCIfType<[v2f64], CCAssignToReg<[Q0, Q1, Q2, Q3]>>,
+ CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>,
+ CCIfType<[f32], CCAssignToReg<[S0, S1, S2, S3, S4, S5, S6, S7, S8,
+ S9, S10, S11, S12, S13, S14, S15]>>,
+ CCDelegateTo<RetCC_ARM_APCS>
+]>;
+
+//===----------------------------------------------------------------------===//
+// ARM APCS Calling Convention for GHC
+//===----------------------------------------------------------------------===//
+
+def CC_ARM_APCS_GHC : CallingConv<[
+ // Handle all vector types as either f64 or v2f64.
+ CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType<f64>>,
+ CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType<v2f64>>,
+
+ CCIfType<[v2f64], CCAssignToReg<[Q4, Q5]>>,
+ CCIfType<[f64], CCAssignToReg<[D8, D9, D10, D11]>>,
+ CCIfType<[f32], CCAssignToReg<[S16, S17, S18, S19, S20, S21, S22, S23]>>,
+
+ // Promote i8/i16 arguments to i32.
+ CCIfType<[i8, i16], CCPromoteToType<i32>>,
+
+ // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, SpLim
+ CCIfType<[i32], CCAssignToReg<[R4, R5, R6, R7, R8, R9, R10, R11]>>
+]>;
+
+//===----------------------------------------------------------------------===//
+// ARM AAPCS (EABI) Calling Convention, common parts
+//===----------------------------------------------------------------------===//
+
+def CC_ARM_AAPCS_Common : CallingConv<[
+
+ CCIfType<[i1, i8, i16], CCPromoteToType<i32>>,
+
+ // i64/f64 is passed in even pairs of GPRs
+ // i64 is 8-aligned i32 here, so we may need to eat R1 as a pad register
+ // (and the same is true for f64 if VFP is not enabled)
+ CCIfType<[i32], CCIfAlign<"8", CCAssignToRegWithShadow<[R0, R2], [R0, R1]>>>,
+ CCIfType<[i32], CCIf<"ArgFlags.getOrigAlign() != 8",
+ CCAssignToReg<[R0, R1, R2, R3]>>>,
+
+ CCIfType<[i32], CCIfAlign<"8", CCAssignToStackWithShadow<4, 8, [R0, R1, R2, R3]>>>,
+ CCIfType<[i32], CCAssignToStackWithShadow<4, 4, [R0, R1, R2, R3]>>,
+ CCIfType<[f32], CCAssignToStackWithShadow<4, 4, [Q0, Q1, Q2, Q3]>>,
+ CCIfType<[f64], CCAssignToStackWithShadow<8, 8, [Q0, Q1, Q2, Q3]>>,
+ CCIfType<[v2f64], CCIfAlign<"16",
+ CCAssignToStackWithShadow<16, 16, [Q0, Q1, Q2, Q3]>>>,
+ CCIfType<[v2f64], CCAssignToStackWithShadow<16, 8, [Q0, Q1, Q2, Q3]>>
+]>;
+
+def RetCC_ARM_AAPCS_Common : CallingConv<[
+ CCIfType<[i1, i8, i16], CCPromoteToType<i32>>,
+ CCIfType<[i32], CCAssignToReg<[R0, R1, R2, R3]>>,
+ CCIfType<[i64], CCAssignToRegWithShadow<[R0, R2], [R1, R3]>>
+]>;
+
+//===----------------------------------------------------------------------===//
+// ARM AAPCS (EABI) Calling Convention
+//===----------------------------------------------------------------------===//
+
+def CC_ARM_AAPCS : CallingConv<[
+ // Handles byval parameters.
+ CCIfByVal<CCPassByVal<4, 4>>,
+
+ // The 'nest' parameter, if any, is passed in R12.
+ CCIfNest<CCAssignToReg<[R12]>>,
+
+ // Handle all vector types as either f64 or v2f64.
+ CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType<f64>>,
+ CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType<v2f64>>,
+
+ CCIfType<[f64, v2f64], CCCustom<"CC_ARM_AAPCS_Custom_f64">>,
+ CCIfType<[f32], CCBitConvertToType<i32>>,
+ CCDelegateTo<CC_ARM_AAPCS_Common>
+]>;
+
+def RetCC_ARM_AAPCS : CallingConv<[
+ // Handle all vector types as either f64 or v2f64.
+ CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType<f64>>,
+ CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType<v2f64>>,
+
+ CCIfType<[f64, v2f64], CCCustom<"RetCC_ARM_AAPCS_Custom_f64">>,
+ CCIfType<[f32], CCBitConvertToType<i32>>,
+ CCDelegateTo<RetCC_ARM_AAPCS_Common>
+]>;
+
+//===----------------------------------------------------------------------===//
+// ARM AAPCS-VFP (EABI) Calling Convention
+// Also used for FastCC (when VFP2 or later is available)
+//===----------------------------------------------------------------------===//
+
+def CC_ARM_AAPCS_VFP : CallingConv<[
+ // Handles byval parameters.
+ CCIfByVal<CCPassByVal<4, 4>>,
+
+ // Handle all vector types as either f64 or v2f64.
+ CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType<f64>>,
+ CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType<v2f64>>,
+
+ // HFAs are passed in a contiguous block of registers, or on the stack
+ CCIfConsecutiveRegs<CCCustom<"CC_ARM_AAPCS_Custom_Aggregate">>,
+
+ CCIfType<[v2f64], CCAssignToReg<[Q0, Q1, Q2, Q3]>>,
+ CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>,
+ CCIfType<[f32], CCAssignToReg<[S0, S1, S2, S3, S4, S5, S6, S7, S8,
+ S9, S10, S11, S12, S13, S14, S15]>>,
+ CCDelegateTo<CC_ARM_AAPCS_Common>
+]>;
+
+def RetCC_ARM_AAPCS_VFP : CallingConv<[
+ // Handle all vector types as either f64 or v2f64.
+ CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType<f64>>,
+ CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType<v2f64>>,
+
+ CCIfType<[v2f64], CCAssignToReg<[Q0, Q1, Q2, Q3]>>,
+ CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>,
+ CCIfType<[f32], CCAssignToReg<[S0, S1, S2, S3, S4, S5, S6, S7, S8,
+ S9, S10, S11, S12, S13, S14, S15]>>,
+ CCDelegateTo<RetCC_ARM_AAPCS_Common>
+]>;
+
+//===----------------------------------------------------------------------===//
+// Callee-saved register lists.
+//===----------------------------------------------------------------------===//
+
+def CSR_NoRegs : CalleeSavedRegs<(add)>;
+
+def CSR_AAPCS : CalleeSavedRegs<(add LR, R11, R10, R9, R8, R7, R6, R5, R4,
+ (sequence "D%u", 15, 8))>;
+
+// Constructors and destructors return 'this' in the ARM C++ ABI; since 'this'
+// and the pointer return value are both passed in R0 in these cases, this can
+// be partially modelled by treating R0 as a callee-saved register
+// Only the resulting RegMask is used; the SaveList is ignored
+def CSR_AAPCS_ThisReturn : CalleeSavedRegs<(add LR, R11, R10, R9, R8, R7, R6,
+ R5, R4, (sequence "D%u", 15, 8),
+ R0)>;
+
+// iOS ABI deviates from ARM standard ABI. R9 is not a callee-saved register.
+// Also save R7-R4 first to match the stack frame fixed spill areas.
+def CSR_iOS : CalleeSavedRegs<(add LR, R7, R6, R5, R4, (sub CSR_AAPCS, R9))>;
+
+def CSR_iOS_ThisReturn : CalleeSavedRegs<(add LR, R7, R6, R5, R4,
+ (sub CSR_AAPCS_ThisReturn, R9))>;
+
+// The "interrupt" attribute is used to generate code that is acceptable in
+// exception-handlers of various kinds. It makes us use a different return
+// instruction (handled elsewhere) and affects which registers we must return to
+// our "caller" in the same state as we receive them.
+
+// For most interrupts, all registers except SP and LR are shared with
+// user-space. We mark LR to be saved anyway, since this is what the ARM backend
+// generally does rather than tracking its liveness as a normal register.
+def CSR_GenericInt : CalleeSavedRegs<(add LR, (sequence "R%u", 12, 0))>;
+
+// The fast interrupt handlers have more private state and get their own copies
+// of R8-R12, in addition to SP and LR. As before, mark LR for saving too.
+
+// FIXME: we mark R11 as callee-saved since it's often the frame-pointer, and
+// current frame lowering expects to encounter it while processing callee-saved
+// registers.
+def CSR_FIQ : CalleeSavedRegs<(add LR, R11, (sequence "R%u", 7, 0))>;
+
+
diff --git a/contrib/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp b/contrib/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp
new file mode 100644
index 0000000..55c1684
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp
@@ -0,0 +1,2281 @@
+//===-- ARMConstantIslandPass.cpp - ARM constant islands ------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a pass that splits the constant pool up into 'islands'
+// which are scattered through-out the function. This is required due to the
+// limited pc-relative displacements that ARM has.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARM.h"
+#include "ARMMachineFunctionInfo.h"
+#include "MCTargetDesc/ARMAddressingModes.h"
+#include "Thumb2InstrInfo.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
+#include <algorithm>
+using namespace llvm;
+
+#define DEBUG_TYPE "arm-cp-islands"
+
+STATISTIC(NumCPEs, "Number of constpool entries");
+STATISTIC(NumSplit, "Number of uncond branches inserted");
+STATISTIC(NumCBrFixed, "Number of cond branches fixed");
+STATISTIC(NumUBrFixed, "Number of uncond branches fixed");
+STATISTIC(NumTBs, "Number of table branches generated");
+STATISTIC(NumT2CPShrunk, "Number of Thumb2 constantpool instructions shrunk");
+STATISTIC(NumT2BrShrunk, "Number of Thumb2 immediate branches shrunk");
+STATISTIC(NumCBZ, "Number of CBZ / CBNZ formed");
+STATISTIC(NumJTMoved, "Number of jump table destination blocks moved");
+STATISTIC(NumJTInserted, "Number of jump table intermediate blocks inserted");
+
+
+static cl::opt<bool>
+AdjustJumpTableBlocks("arm-adjust-jump-tables", cl::Hidden, cl::init(true),
+ cl::desc("Adjust basic block layout to better use TB[BH]"));
+
+/// UnknownPadding - Return the worst case padding that could result from
+/// unknown offset bits. This does not include alignment padding caused by
+/// known offset bits.
+///
+/// @param LogAlign log2(alignment)
+/// @param KnownBits Number of known low offset bits.
+static inline unsigned UnknownPadding(unsigned LogAlign, unsigned KnownBits) {
+ if (KnownBits < LogAlign)
+ return (1u << LogAlign) - (1u << KnownBits);
+ return 0;
+}
+
+namespace {
+ /// ARMConstantIslands - Due to limited PC-relative displacements, ARM
+ /// requires constant pool entries to be scattered among the instructions
+ /// inside a function. To do this, it completely ignores the normal LLVM
+ /// constant pool; instead, it places constants wherever it feels like with
+ /// special instructions.
+ ///
+ /// The terminology used in this pass includes:
+ /// Islands - Clumps of constants placed in the function.
+ /// Water - Potential places where an island could be formed.
+ /// CPE - A constant pool entry that has been placed somewhere, which
+ /// tracks a list of users.
+ class ARMConstantIslands : public MachineFunctionPass {
+ /// BasicBlockInfo - Information about the offset and size of a single
+ /// basic block.
+ struct BasicBlockInfo {
+ /// Offset - Distance from the beginning of the function to the beginning
+ /// of this basic block.
+ ///
+ /// Offsets are computed assuming worst case padding before an aligned
+ /// block. This means that subtracting basic block offsets always gives a
+ /// conservative estimate of the real distance which may be smaller.
+ ///
+ /// Because worst case padding is used, the computed offset of an aligned
+ /// block may not actually be aligned.
+ unsigned Offset;
+
+ /// Size - Size of the basic block in bytes. If the block contains
+ /// inline assembly, this is a worst case estimate.
+ ///
+ /// The size does not include any alignment padding whether from the
+ /// beginning of the block, or from an aligned jump table at the end.
+ unsigned Size;
+
+ /// KnownBits - The number of low bits in Offset that are known to be
+ /// exact. The remaining bits of Offset are an upper bound.
+ uint8_t KnownBits;
+
+ /// Unalign - When non-zero, the block contains instructions (inline asm)
+ /// of unknown size. The real size may be smaller than Size bytes by a
+ /// multiple of 1 << Unalign.
+ uint8_t Unalign;
+
+ /// PostAlign - When non-zero, the block terminator contains a .align
+ /// directive, so the end of the block is aligned to 1 << PostAlign
+ /// bytes.
+ uint8_t PostAlign;
+
+ BasicBlockInfo() : Offset(0), Size(0), KnownBits(0), Unalign(0),
+ PostAlign(0) {}
+
+ /// Compute the number of known offset bits internally to this block.
+ /// This number should be used to predict worst case padding when
+ /// splitting the block.
+ unsigned internalKnownBits() const {
+ unsigned Bits = Unalign ? Unalign : KnownBits;
+ // If the block size isn't a multiple of the known bits, assume the
+ // worst case padding.
+ if (Size & ((1u << Bits) - 1))
+ Bits = countTrailingZeros(Size);
+ return Bits;
+ }
+
+ /// Compute the offset immediately following this block. If LogAlign is
+ /// specified, return the offset the successor block will get if it has
+ /// this alignment.
+ unsigned postOffset(unsigned LogAlign = 0) const {
+ unsigned PO = Offset + Size;
+ unsigned LA = std::max(unsigned(PostAlign), LogAlign);
+ if (!LA)
+ return PO;
+ // Add alignment padding from the terminator.
+ return PO + UnknownPadding(LA, internalKnownBits());
+ }
+
+ /// Compute the number of known low bits of postOffset. If this block
+ /// contains inline asm, the number of known bits drops to the
+ /// instruction alignment. An aligned terminator may increase the number
+ /// of know bits.
+ /// If LogAlign is given, also consider the alignment of the next block.
+ unsigned postKnownBits(unsigned LogAlign = 0) const {
+ return std::max(std::max(unsigned(PostAlign), LogAlign),
+ internalKnownBits());
+ }
+ };
+
+ std::vector<BasicBlockInfo> BBInfo;
+
+ /// WaterList - A sorted list of basic blocks where islands could be placed
+ /// (i.e. blocks that don't fall through to the following block, due
+ /// to a return, unreachable, or unconditional branch).
+ std::vector<MachineBasicBlock*> WaterList;
+
+ /// NewWaterList - The subset of WaterList that was created since the
+ /// previous iteration by inserting unconditional branches.
+ SmallSet<MachineBasicBlock*, 4> NewWaterList;
+
+ typedef std::vector<MachineBasicBlock*>::iterator water_iterator;
+
+ /// CPUser - One user of a constant pool, keeping the machine instruction
+ /// pointer, the constant pool being referenced, and the max displacement
+ /// allowed from the instruction to the CP. The HighWaterMark records the
+ /// highest basic block where a new CPEntry can be placed. To ensure this
+ /// pass terminates, the CP entries are initially placed at the end of the
+ /// function and then move monotonically to lower addresses. The
+ /// exception to this rule is when the current CP entry for a particular
+ /// CPUser is out of range, but there is another CP entry for the same
+ /// constant value in range. We want to use the existing in-range CP
+ /// entry, but if it later moves out of range, the search for new water
+ /// should resume where it left off. The HighWaterMark is used to record
+ /// that point.
+ struct CPUser {
+ MachineInstr *MI;
+ MachineInstr *CPEMI;
+ MachineBasicBlock *HighWaterMark;
+ unsigned MaxDisp;
+ bool NegOk;
+ bool IsSoImm;
+ bool KnownAlignment;
+ CPUser(MachineInstr *mi, MachineInstr *cpemi, unsigned maxdisp,
+ bool neg, bool soimm)
+ : MI(mi), CPEMI(cpemi), MaxDisp(maxdisp), NegOk(neg), IsSoImm(soimm),
+ KnownAlignment(false) {
+ HighWaterMark = CPEMI->getParent();
+ }
+ /// getMaxDisp - Returns the maximum displacement supported by MI.
+ /// Correct for unknown alignment.
+ /// Conservatively subtract 2 bytes to handle weird alignment effects.
+ unsigned getMaxDisp() const {
+ return (KnownAlignment ? MaxDisp : MaxDisp - 2) - 2;
+ }
+ };
+
+ /// CPUsers - Keep track of all of the machine instructions that use various
+ /// constant pools and their max displacement.
+ std::vector<CPUser> CPUsers;
+
+ /// CPEntry - One per constant pool entry, keeping the machine instruction
+ /// pointer, the constpool index, and the number of CPUser's which
+ /// reference this entry.
+ struct CPEntry {
+ MachineInstr *CPEMI;
+ unsigned CPI;
+ unsigned RefCount;
+ CPEntry(MachineInstr *cpemi, unsigned cpi, unsigned rc = 0)
+ : CPEMI(cpemi), CPI(cpi), RefCount(rc) {}
+ };
+
+ /// CPEntries - Keep track of all of the constant pool entry machine
+ /// instructions. For each original constpool index (i.e. those that existed
+ /// upon entry to this pass), it keeps a vector of entries. Original
+ /// elements are cloned as we go along; the clones are put in the vector of
+ /// the original element, but have distinct CPIs.
+ ///
+ /// The first half of CPEntries contains generic constants, the second half
+ /// contains jump tables. Use getCombinedIndex on a generic CPEMI to look up
+ /// which vector it will be in here.
+ std::vector<std::vector<CPEntry> > CPEntries;
+
+ /// Maps a JT index to the offset in CPEntries containing copies of that
+ /// table. The equivalent map for a CONSTPOOL_ENTRY is the identity.
+ DenseMap<int, int> JumpTableEntryIndices;
+
+ /// Maps a JT index to the LEA that actually uses the index to calculate its
+ /// base address.
+ DenseMap<int, int> JumpTableUserIndices;
+
+ /// ImmBranch - One per immediate branch, keeping the machine instruction
+ /// pointer, conditional or unconditional, the max displacement,
+ /// and (if isCond is true) the corresponding unconditional branch
+ /// opcode.
+ struct ImmBranch {
+ MachineInstr *MI;
+ unsigned MaxDisp : 31;
+ bool isCond : 1;
+ unsigned UncondBr;
+ ImmBranch(MachineInstr *mi, unsigned maxdisp, bool cond, unsigned ubr)
+ : MI(mi), MaxDisp(maxdisp), isCond(cond), UncondBr(ubr) {}
+ };
+
+ /// ImmBranches - Keep track of all the immediate branch instructions.
+ ///
+ std::vector<ImmBranch> ImmBranches;
+
+ /// PushPopMIs - Keep track of all the Thumb push / pop instructions.
+ ///
+ SmallVector<MachineInstr*, 4> PushPopMIs;
+
+ /// T2JumpTables - Keep track of all the Thumb2 jumptable instructions.
+ SmallVector<MachineInstr*, 4> T2JumpTables;
+
+ /// HasFarJump - True if any far jump instruction has been emitted during
+ /// the branch fix up pass.
+ bool HasFarJump;
+
+ MachineFunction *MF;
+ MachineConstantPool *MCP;
+ const ARMBaseInstrInfo *TII;
+ const ARMSubtarget *STI;
+ ARMFunctionInfo *AFI;
+ bool isThumb;
+ bool isThumb1;
+ bool isThumb2;
+ public:
+ static char ID;
+ ARMConstantIslands() : MachineFunctionPass(ID) {}
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ const char *getPassName() const override {
+ return "ARM constant island placement and branch shortening pass";
+ }
+
+ private:
+ void doInitialConstPlacement(std::vector<MachineInstr *> &CPEMIs);
+ void doInitialJumpTablePlacement(std::vector<MachineInstr *> &CPEMIs);
+ bool BBHasFallthrough(MachineBasicBlock *MBB);
+ CPEntry *findConstPoolEntry(unsigned CPI, const MachineInstr *CPEMI);
+ unsigned getCPELogAlign(const MachineInstr *CPEMI);
+ void scanFunctionJumpTables();
+ void initializeFunctionInfo(const std::vector<MachineInstr*> &CPEMIs);
+ MachineBasicBlock *splitBlockBeforeInstr(MachineInstr *MI);
+ void updateForInsertedWaterBlock(MachineBasicBlock *NewBB);
+ void adjustBBOffsetsAfter(MachineBasicBlock *BB);
+ bool decrementCPEReferenceCount(unsigned CPI, MachineInstr* CPEMI);
+ unsigned getCombinedIndex(const MachineInstr *CPEMI);
+ int findInRangeCPEntry(CPUser& U, unsigned UserOffset);
+ bool findAvailableWater(CPUser&U, unsigned UserOffset,
+ water_iterator &WaterIter);
+ void createNewWater(unsigned CPUserIndex, unsigned UserOffset,
+ MachineBasicBlock *&NewMBB);
+ bool handleConstantPoolUser(unsigned CPUserIndex);
+ void removeDeadCPEMI(MachineInstr *CPEMI);
+ bool removeUnusedCPEntries();
+ bool isCPEntryInRange(MachineInstr *MI, unsigned UserOffset,
+ MachineInstr *CPEMI, unsigned Disp, bool NegOk,
+ bool DoDump = false);
+ bool isWaterInRange(unsigned UserOffset, MachineBasicBlock *Water,
+ CPUser &U, unsigned &Growth);
+ bool isBBInRange(MachineInstr *MI, MachineBasicBlock *BB, unsigned Disp);
+ bool fixupImmediateBr(ImmBranch &Br);
+ bool fixupConditionalBr(ImmBranch &Br);
+ bool fixupUnconditionalBr(ImmBranch &Br);
+ bool undoLRSpillRestore();
+ bool mayOptimizeThumb2Instruction(const MachineInstr *MI) const;
+ bool optimizeThumb2Instructions();
+ bool optimizeThumb2Branches();
+ bool reorderThumb2JumpTables();
+ bool preserveBaseRegister(MachineInstr *JumpMI, MachineInstr *LEAMI,
+ unsigned &DeadSize, bool &CanDeleteLEA,
+ bool &BaseRegKill);
+ bool optimizeThumb2JumpTables();
+ MachineBasicBlock *adjustJTTargetBlockForward(MachineBasicBlock *BB,
+ MachineBasicBlock *JTBB);
+
+ void computeBlockSize(MachineBasicBlock *MBB);
+ unsigned getOffsetOf(MachineInstr *MI) const;
+ unsigned getUserOffset(CPUser&) const;
+ void dumpBBs();
+ void verify();
+
+ bool isOffsetInRange(unsigned UserOffset, unsigned TrialOffset,
+ unsigned Disp, bool NegativeOK, bool IsSoImm = false);
+ bool isOffsetInRange(unsigned UserOffset, unsigned TrialOffset,
+ const CPUser &U) {
+ return isOffsetInRange(UserOffset, TrialOffset,
+ U.getMaxDisp(), U.NegOk, U.IsSoImm);
+ }
+ };
+ char ARMConstantIslands::ID = 0;
+}
+
+/// verify - check BBOffsets, BBSizes, alignment of islands
+void ARMConstantIslands::verify() {
+#ifndef NDEBUG
+ assert(std::is_sorted(MF->begin(), MF->end(),
+ [this](const MachineBasicBlock &LHS,
+ const MachineBasicBlock &RHS) {
+ return BBInfo[LHS.getNumber()].postOffset() <
+ BBInfo[RHS.getNumber()].postOffset();
+ }));
+ DEBUG(dbgs() << "Verifying " << CPUsers.size() << " CP users.\n");
+ for (unsigned i = 0, e = CPUsers.size(); i != e; ++i) {
+ CPUser &U = CPUsers[i];
+ unsigned UserOffset = getUserOffset(U);
+ // Verify offset using the real max displacement without the safety
+ // adjustment.
+ if (isCPEntryInRange(U.MI, UserOffset, U.CPEMI, U.getMaxDisp()+2, U.NegOk,
+ /* DoDump = */ true)) {
+ DEBUG(dbgs() << "OK\n");
+ continue;
+ }
+ DEBUG(dbgs() << "Out of range.\n");
+ dumpBBs();
+ DEBUG(MF->dump());
+ llvm_unreachable("Constant pool entry out of range!");
+ }
+#endif
+}
+
+/// print block size and offset information - debugging
+void ARMConstantIslands::dumpBBs() {
+ DEBUG({
+ for (unsigned J = 0, E = BBInfo.size(); J !=E; ++J) {
+ const BasicBlockInfo &BBI = BBInfo[J];
+ dbgs() << format("%08x BB#%u\t", BBI.Offset, J)
+ << " kb=" << unsigned(BBI.KnownBits)
+ << " ua=" << unsigned(BBI.Unalign)
+ << " pa=" << unsigned(BBI.PostAlign)
+ << format(" size=%#x\n", BBInfo[J].Size);
+ }
+ });
+}
+
+/// createARMConstantIslandPass - returns an instance of the constpool
+/// island pass.
+FunctionPass *llvm::createARMConstantIslandPass() {
+ return new ARMConstantIslands();
+}
+
+bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) {
+ MF = &mf;
+ MCP = mf.getConstantPool();
+
+ DEBUG(dbgs() << "***** ARMConstantIslands: "
+ << MCP->getConstants().size() << " CP entries, aligned to "
+ << MCP->getConstantPoolAlignment() << " bytes *****\n");
+
+ STI = &static_cast<const ARMSubtarget &>(MF->getSubtarget());
+ TII = STI->getInstrInfo();
+ AFI = MF->getInfo<ARMFunctionInfo>();
+
+ isThumb = AFI->isThumbFunction();
+ isThumb1 = AFI->isThumb1OnlyFunction();
+ isThumb2 = AFI->isThumb2Function();
+
+ HasFarJump = false;
+
+ // This pass invalidates liveness information when it splits basic blocks.
+ MF->getRegInfo().invalidateLiveness();
+
+ // Renumber all of the machine basic blocks in the function, guaranteeing that
+ // the numbers agree with the position of the block in the function.
+ MF->RenumberBlocks();
+
+ // Try to reorder and otherwise adjust the block layout to make good use
+ // of the TB[BH] instructions.
+ bool MadeChange = false;
+ if (isThumb2 && AdjustJumpTableBlocks) {
+ scanFunctionJumpTables();
+ MadeChange |= reorderThumb2JumpTables();
+ // Data is out of date, so clear it. It'll be re-computed later.
+ T2JumpTables.clear();
+ // Blocks may have shifted around. Keep the numbering up to date.
+ MF->RenumberBlocks();
+ }
+
+ // Perform the initial placement of the constant pool entries. To start with,
+ // we put them all at the end of the function.
+ std::vector<MachineInstr*> CPEMIs;
+ if (!MCP->isEmpty())
+ doInitialConstPlacement(CPEMIs);
+
+ if (MF->getJumpTableInfo())
+ doInitialJumpTablePlacement(CPEMIs);
+
+ /// The next UID to take is the first unused one.
+ AFI->initPICLabelUId(CPEMIs.size());
+
+ // Do the initial scan of the function, building up information about the
+ // sizes of each block, the location of all the water, and finding all of the
+ // constant pool users.
+ initializeFunctionInfo(CPEMIs);
+ CPEMIs.clear();
+ DEBUG(dumpBBs());
+
+ // Functions with jump tables need an alignment of 4 because they use the ADR
+ // instruction, which aligns the PC to 4 bytes before adding an offset.
+ if (!T2JumpTables.empty())
+ MF->ensureAlignment(2);
+
+ /// Remove dead constant pool entries.
+ MadeChange |= removeUnusedCPEntries();
+
+ // Iteratively place constant pool entries and fix up branches until there
+ // is no change.
+ unsigned NoCPIters = 0, NoBRIters = 0;
+ while (true) {
+ DEBUG(dbgs() << "Beginning CP iteration #" << NoCPIters << '\n');
+ bool CPChange = false;
+ for (unsigned i = 0, e = CPUsers.size(); i != e; ++i)
+ CPChange |= handleConstantPoolUser(i);
+ if (CPChange && ++NoCPIters > 30)
+ report_fatal_error("Constant Island pass failed to converge!");
+ DEBUG(dumpBBs());
+
+ // Clear NewWaterList now. If we split a block for branches, it should
+ // appear as "new water" for the next iteration of constant pool placement.
+ NewWaterList.clear();
+
+ DEBUG(dbgs() << "Beginning BR iteration #" << NoBRIters << '\n');
+ bool BRChange = false;
+ for (unsigned i = 0, e = ImmBranches.size(); i != e; ++i)
+ BRChange |= fixupImmediateBr(ImmBranches[i]);
+ if (BRChange && ++NoBRIters > 30)
+ report_fatal_error("Branch Fix Up pass failed to converge!");
+ DEBUG(dumpBBs());
+
+ if (!CPChange && !BRChange)
+ break;
+ MadeChange = true;
+ }
+
+ // Shrink 32-bit Thumb2 branch, load, and store instructions.
+ if (isThumb2 && !STI->prefers32BitThumb())
+ MadeChange |= optimizeThumb2Instructions();
+
+ // After a while, this might be made debug-only, but it is not expensive.
+ verify();
+
+ // If LR has been forced spilled and no far jump (i.e. BL) has been issued,
+ // undo the spill / restore of LR if possible.
+ if (isThumb && !HasFarJump && AFI->isLRSpilledForFarJump())
+ MadeChange |= undoLRSpillRestore();
+
+ // Save the mapping between original and cloned constpool entries.
+ for (unsigned i = 0, e = CPEntries.size(); i != e; ++i) {
+ for (unsigned j = 0, je = CPEntries[i].size(); j != je; ++j) {
+ const CPEntry & CPE = CPEntries[i][j];
+ if (CPE.CPEMI && CPE.CPEMI->getOperand(1).isCPI())
+ AFI->recordCPEClone(i, CPE.CPI);
+ }
+ }
+
+ DEBUG(dbgs() << '\n'; dumpBBs());
+
+ BBInfo.clear();
+ WaterList.clear();
+ CPUsers.clear();
+ CPEntries.clear();
+ JumpTableEntryIndices.clear();
+ JumpTableUserIndices.clear();
+ ImmBranches.clear();
+ PushPopMIs.clear();
+ T2JumpTables.clear();
+
+ return MadeChange;
+}
+
+/// \brief Perform the initial placement of the regular constant pool entries.
+/// To start with, we put them all at the end of the function.
+void
+ARMConstantIslands::doInitialConstPlacement(std::vector<MachineInstr*> &CPEMIs) {
+ // Create the basic block to hold the CPE's.
+ MachineBasicBlock *BB = MF->CreateMachineBasicBlock();
+ MF->push_back(BB);
+
+ // MachineConstantPool measures alignment in bytes. We measure in log2(bytes).
+ unsigned MaxAlign = Log2_32(MCP->getConstantPoolAlignment());
+
+ // Mark the basic block as required by the const-pool.
+ BB->setAlignment(MaxAlign);
+
+ // The function needs to be as aligned as the basic blocks. The linker may
+ // move functions around based on their alignment.
+ MF->ensureAlignment(BB->getAlignment());
+
+ // Order the entries in BB by descending alignment. That ensures correct
+ // alignment of all entries as long as BB is sufficiently aligned. Keep
+ // track of the insertion point for each alignment. We are going to bucket
+ // sort the entries as they are created.
+ SmallVector<MachineBasicBlock::iterator, 8> InsPoint(MaxAlign + 1, BB->end());
+
+ // Add all of the constants from the constant pool to the end block, use an
+ // identity mapping of CPI's to CPE's.
+ const std::vector<MachineConstantPoolEntry> &CPs = MCP->getConstants();
+
+ const DataLayout &TD = MF->getDataLayout();
+ for (unsigned i = 0, e = CPs.size(); i != e; ++i) {
+ unsigned Size = TD.getTypeAllocSize(CPs[i].getType());
+ assert(Size >= 4 && "Too small constant pool entry");
+ unsigned Align = CPs[i].getAlignment();
+ assert(isPowerOf2_32(Align) && "Invalid alignment");
+ // Verify that all constant pool entries are a multiple of their alignment.
+ // If not, we would have to pad them out so that instructions stay aligned.
+ assert((Size % Align) == 0 && "CP Entry not multiple of 4 bytes!");
+
+ // Insert CONSTPOOL_ENTRY before entries with a smaller alignment.
+ unsigned LogAlign = Log2_32(Align);
+ MachineBasicBlock::iterator InsAt = InsPoint[LogAlign];
+ MachineInstr *CPEMI =
+ BuildMI(*BB, InsAt, DebugLoc(), TII->get(ARM::CONSTPOOL_ENTRY))
+ .addImm(i).addConstantPoolIndex(i).addImm(Size);
+ CPEMIs.push_back(CPEMI);
+
+ // Ensure that future entries with higher alignment get inserted before
+ // CPEMI. This is bucket sort with iterators.
+ for (unsigned a = LogAlign + 1; a <= MaxAlign; ++a)
+ if (InsPoint[a] == InsAt)
+ InsPoint[a] = CPEMI;
+
+ // Add a new CPEntry, but no corresponding CPUser yet.
+ CPEntries.emplace_back(1, CPEntry(CPEMI, i));
+ ++NumCPEs;
+ DEBUG(dbgs() << "Moved CPI#" << i << " to end of function, size = "
+ << Size << ", align = " << Align <<'\n');
+ }
+ DEBUG(BB->dump());
+}
+
+/// \brief Do initial placement of the jump tables. Because Thumb2's TBB and TBH
+/// instructions can be made more efficient if the jump table immediately
+/// follows the instruction, it's best to place them immediately next to their
+/// jumps to begin with. In almost all cases they'll never be moved from that
+/// position.
+void ARMConstantIslands::doInitialJumpTablePlacement(
+ std::vector<MachineInstr *> &CPEMIs) {
+ unsigned i = CPEntries.size();
+ auto MJTI = MF->getJumpTableInfo();
+ const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
+
+ MachineBasicBlock *LastCorrectlyNumberedBB = nullptr;
+ for (MachineBasicBlock &MBB : *MF) {
+ auto MI = MBB.getLastNonDebugInstr();
+ if (MI == MBB.end())
+ continue;
+
+ unsigned JTOpcode;
+ switch (MI->getOpcode()) {
+ default:
+ continue;
+ case ARM::BR_JTadd:
+ case ARM::BR_JTr:
+ case ARM::tBR_JTr:
+ case ARM::BR_JTm:
+ JTOpcode = ARM::JUMPTABLE_ADDRS;
+ break;
+ case ARM::t2BR_JT:
+ JTOpcode = ARM::JUMPTABLE_INSTS;
+ break;
+ case ARM::t2TBB_JT:
+ JTOpcode = ARM::JUMPTABLE_TBB;
+ break;
+ case ARM::t2TBH_JT:
+ JTOpcode = ARM::JUMPTABLE_TBH;
+ break;
+ }
+
+ unsigned NumOps = MI->getDesc().getNumOperands();
+ MachineOperand JTOp =
+ MI->getOperand(NumOps - (MI->isPredicable() ? 2 : 1));
+ unsigned JTI = JTOp.getIndex();
+ unsigned Size = JT[JTI].MBBs.size() * sizeof(uint32_t);
+ MachineBasicBlock *JumpTableBB = MF->CreateMachineBasicBlock();
+ MF->insert(std::next(MachineFunction::iterator(MBB)), JumpTableBB);
+ MachineInstr *CPEMI = BuildMI(*JumpTableBB, JumpTableBB->begin(),
+ DebugLoc(), TII->get(JTOpcode))
+ .addImm(i++)
+ .addJumpTableIndex(JTI)
+ .addImm(Size);
+ CPEMIs.push_back(CPEMI);
+ CPEntries.emplace_back(1, CPEntry(CPEMI, JTI));
+ JumpTableEntryIndices.insert(std::make_pair(JTI, CPEntries.size() - 1));
+ if (!LastCorrectlyNumberedBB)
+ LastCorrectlyNumberedBB = &MBB;
+ }
+
+ // If we did anything then we need to renumber the subsequent blocks.
+ if (LastCorrectlyNumberedBB)
+ MF->RenumberBlocks(LastCorrectlyNumberedBB);
+}
+
+/// BBHasFallthrough - Return true if the specified basic block can fallthrough
+/// into the block immediately after it.
+bool ARMConstantIslands::BBHasFallthrough(MachineBasicBlock *MBB) {
+ // Get the next machine basic block in the function.
+ MachineFunction::iterator MBBI = MBB->getIterator();
+ // Can't fall off end of function.
+ if (std::next(MBBI) == MBB->getParent()->end())
+ return false;
+
+ MachineBasicBlock *NextBB = &*std::next(MBBI);
+ if (std::find(MBB->succ_begin(), MBB->succ_end(), NextBB) == MBB->succ_end())
+ return false;
+
+ // Try to analyze the end of the block. A potential fallthrough may already
+ // have an unconditional branch for whatever reason.
+ MachineBasicBlock *TBB, *FBB;
+ SmallVector<MachineOperand, 4> Cond;
+ bool TooDifficult = TII->AnalyzeBranch(*MBB, TBB, FBB, Cond);
+ return TooDifficult || FBB == nullptr;
+}
+
+/// findConstPoolEntry - Given the constpool index and CONSTPOOL_ENTRY MI,
+/// look up the corresponding CPEntry.
+ARMConstantIslands::CPEntry
+*ARMConstantIslands::findConstPoolEntry(unsigned CPI,
+ const MachineInstr *CPEMI) {
+ std::vector<CPEntry> &CPEs = CPEntries[CPI];
+ // Number of entries per constpool index should be small, just do a
+ // linear search.
+ for (unsigned i = 0, e = CPEs.size(); i != e; ++i) {
+ if (CPEs[i].CPEMI == CPEMI)
+ return &CPEs[i];
+ }
+ return nullptr;
+}
+
+/// getCPELogAlign - Returns the required alignment of the constant pool entry
+/// represented by CPEMI. Alignment is measured in log2(bytes) units.
+unsigned ARMConstantIslands::getCPELogAlign(const MachineInstr *CPEMI) {
+ switch (CPEMI->getOpcode()) {
+ case ARM::CONSTPOOL_ENTRY:
+ break;
+ case ARM::JUMPTABLE_TBB:
+ return 0;
+ case ARM::JUMPTABLE_TBH:
+ case ARM::JUMPTABLE_INSTS:
+ return 1;
+ case ARM::JUMPTABLE_ADDRS:
+ return 2;
+ default:
+ llvm_unreachable("unknown constpool entry kind");
+ }
+
+ unsigned CPI = getCombinedIndex(CPEMI);
+ assert(CPI < MCP->getConstants().size() && "Invalid constant pool index.");
+ unsigned Align = MCP->getConstants()[CPI].getAlignment();
+ assert(isPowerOf2_32(Align) && "Invalid CPE alignment");
+ return Log2_32(Align);
+}
+
+/// scanFunctionJumpTables - Do a scan of the function, building up
+/// information about the sizes of each block and the locations of all
+/// the jump tables.
+void ARMConstantIslands::scanFunctionJumpTables() {
+ for (MachineFunction::iterator MBBI = MF->begin(), E = MF->end();
+ MBBI != E; ++MBBI) {
+ MachineBasicBlock &MBB = *MBBI;
+
+ for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
+ I != E; ++I)
+ if (I->isBranch() && I->getOpcode() == ARM::t2BR_JT)
+ T2JumpTables.push_back(I);
+ }
+}
+
+/// initializeFunctionInfo - Do the initial scan of the function, building up
+/// information about the sizes of each block, the location of all the water,
+/// and finding all of the constant pool users.
+void ARMConstantIslands::
+initializeFunctionInfo(const std::vector<MachineInstr*> &CPEMIs) {
+ BBInfo.clear();
+ BBInfo.resize(MF->getNumBlockIDs());
+
+ // First thing, compute the size of all basic blocks, and see if the function
+ // has any inline assembly in it. If so, we have to be conservative about
+ // alignment assumptions, as we don't know for sure the size of any
+ // instructions in the inline assembly.
+ for (MachineBasicBlock &MBB : *MF)
+ computeBlockSize(&MBB);
+
+ // The known bits of the entry block offset are determined by the function
+ // alignment.
+ BBInfo.front().KnownBits = MF->getAlignment();
+
+ // Compute block offsets and known bits.
+ adjustBBOffsetsAfter(&MF->front());
+
+ // Now go back through the instructions and build up our data structures.
+ for (MachineFunction::iterator MBBI = MF->begin(), E = MF->end();
+ MBBI != E; ++MBBI) {
+ MachineBasicBlock &MBB = *MBBI;
+
+ // If this block doesn't fall through into the next MBB, then this is
+ // 'water' that a constant pool island could be placed.
+ if (!BBHasFallthrough(&MBB))
+ WaterList.push_back(&MBB);
+
+ for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
+ I != E; ++I) {
+ if (I->isDebugValue())
+ continue;
+
+ unsigned Opc = I->getOpcode();
+ if (I->isBranch()) {
+ bool isCond = false;
+ unsigned Bits = 0;
+ unsigned Scale = 1;
+ int UOpc = Opc;
+ switch (Opc) {
+ default:
+ continue; // Ignore other JT branches
+ case ARM::t2BR_JT:
+ T2JumpTables.push_back(I);
+ continue; // Does not get an entry in ImmBranches
+ case ARM::Bcc:
+ isCond = true;
+ UOpc = ARM::B;
+ // Fallthrough
+ case ARM::B:
+ Bits = 24;
+ Scale = 4;
+ break;
+ case ARM::tBcc:
+ isCond = true;
+ UOpc = ARM::tB;
+ Bits = 8;
+ Scale = 2;
+ break;
+ case ARM::tB:
+ Bits = 11;
+ Scale = 2;
+ break;
+ case ARM::t2Bcc:
+ isCond = true;
+ UOpc = ARM::t2B;
+ Bits = 20;
+ Scale = 2;
+ break;
+ case ARM::t2B:
+ Bits = 24;
+ Scale = 2;
+ break;
+ }
+
+ // Record this immediate branch.
+ unsigned MaxOffs = ((1 << (Bits-1))-1) * Scale;
+ ImmBranches.push_back(ImmBranch(I, MaxOffs, isCond, UOpc));
+ }
+
+ if (Opc == ARM::tPUSH || Opc == ARM::tPOP_RET)
+ PushPopMIs.push_back(I);
+
+ if (Opc == ARM::CONSTPOOL_ENTRY || Opc == ARM::JUMPTABLE_ADDRS ||
+ Opc == ARM::JUMPTABLE_INSTS || Opc == ARM::JUMPTABLE_TBB ||
+ Opc == ARM::JUMPTABLE_TBH)
+ continue;
+
+ // Scan the instructions for constant pool operands.
+ for (unsigned op = 0, e = I->getNumOperands(); op != e; ++op)
+ if (I->getOperand(op).isCPI() || I->getOperand(op).isJTI()) {
+ // We found one. The addressing mode tells us the max displacement
+ // from the PC that this instruction permits.
+
+ // Basic size info comes from the TSFlags field.
+ unsigned Bits = 0;
+ unsigned Scale = 1;
+ bool NegOk = false;
+ bool IsSoImm = false;
+
+ switch (Opc) {
+ default:
+ llvm_unreachable("Unknown addressing mode for CP reference!");
+
+ // Taking the address of a CP entry.
+ case ARM::LEApcrel:
+ case ARM::LEApcrelJT:
+ // This takes a SoImm, which is 8 bit immediate rotated. We'll
+ // pretend the maximum offset is 255 * 4. Since each instruction
+ // 4 byte wide, this is always correct. We'll check for other
+ // displacements that fits in a SoImm as well.
+ Bits = 8;
+ Scale = 4;
+ NegOk = true;
+ IsSoImm = true;
+ break;
+ case ARM::t2LEApcrel:
+ case ARM::t2LEApcrelJT:
+ Bits = 12;
+ NegOk = true;
+ break;
+ case ARM::tLEApcrel:
+ case ARM::tLEApcrelJT:
+ Bits = 8;
+ Scale = 4;
+ break;
+
+ case ARM::LDRBi12:
+ case ARM::LDRi12:
+ case ARM::LDRcp:
+ case ARM::t2LDRpci:
+ Bits = 12; // +-offset_12
+ NegOk = true;
+ break;
+
+ case ARM::tLDRpci:
+ Bits = 8;
+ Scale = 4; // +(offset_8*4)
+ break;
+
+ case ARM::VLDRD:
+ case ARM::VLDRS:
+ Bits = 8;
+ Scale = 4; // +-(offset_8*4)
+ NegOk = true;
+ break;
+ }
+
+ // Remember that this is a user of a CP entry.
+ unsigned CPI = I->getOperand(op).getIndex();
+ if (I->getOperand(op).isJTI()) {
+ JumpTableUserIndices.insert(std::make_pair(CPI, CPUsers.size()));
+ CPI = JumpTableEntryIndices[CPI];
+ }
+
+ MachineInstr *CPEMI = CPEMIs[CPI];
+ unsigned MaxOffs = ((1 << Bits)-1) * Scale;
+ CPUsers.push_back(CPUser(I, CPEMI, MaxOffs, NegOk, IsSoImm));
+
+ // Increment corresponding CPEntry reference count.
+ CPEntry *CPE = findConstPoolEntry(CPI, CPEMI);
+ assert(CPE && "Cannot find a corresponding CPEntry!");
+ CPE->RefCount++;
+
+ // Instructions can only use one CP entry, don't bother scanning the
+ // rest of the operands.
+ break;
+ }
+ }
+ }
+}
+
+/// computeBlockSize - Compute the size and some alignment information for MBB.
+/// This function updates BBInfo directly.
+void ARMConstantIslands::computeBlockSize(MachineBasicBlock *MBB) {
+ BasicBlockInfo &BBI = BBInfo[MBB->getNumber()];
+ BBI.Size = 0;
+ BBI.Unalign = 0;
+ BBI.PostAlign = 0;
+
+ for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E;
+ ++I) {
+ BBI.Size += TII->GetInstSizeInBytes(I);
+ // For inline asm, GetInstSizeInBytes returns a conservative estimate.
+ // The actual size may be smaller, but still a multiple of the instr size.
+ if (I->isInlineAsm())
+ BBI.Unalign = isThumb ? 1 : 2;
+ // Also consider instructions that may be shrunk later.
+ else if (isThumb && mayOptimizeThumb2Instruction(I))
+ BBI.Unalign = 1;
+ }
+
+ // tBR_JTr contains a .align 2 directive.
+ if (!MBB->empty() && MBB->back().getOpcode() == ARM::tBR_JTr) {
+ BBI.PostAlign = 2;
+ MBB->getParent()->ensureAlignment(2);
+ }
+}
+
+/// getOffsetOf - Return the current offset of the specified machine instruction
+/// from the start of the function. This offset changes as stuff is moved
+/// around inside the function.
+unsigned ARMConstantIslands::getOffsetOf(MachineInstr *MI) const {
+ MachineBasicBlock *MBB = MI->getParent();
+
+ // The offset is composed of two things: the sum of the sizes of all MBB's
+ // before this instruction's block, and the offset from the start of the block
+ // it is in.
+ unsigned Offset = BBInfo[MBB->getNumber()].Offset;
+
+ // Sum instructions before MI in MBB.
+ for (MachineBasicBlock::iterator I = MBB->begin(); &*I != MI; ++I) {
+ assert(I != MBB->end() && "Didn't find MI in its own basic block?");
+ Offset += TII->GetInstSizeInBytes(I);
+ }
+ return Offset;
+}
+
+/// CompareMBBNumbers - Little predicate function to sort the WaterList by MBB
+/// ID.
+static bool CompareMBBNumbers(const MachineBasicBlock *LHS,
+ const MachineBasicBlock *RHS) {
+ return LHS->getNumber() < RHS->getNumber();
+}
+
+/// updateForInsertedWaterBlock - When a block is newly inserted into the
+/// machine function, it upsets all of the block numbers. Renumber the blocks
+/// and update the arrays that parallel this numbering.
+void ARMConstantIslands::updateForInsertedWaterBlock(MachineBasicBlock *NewBB) {
+ // Renumber the MBB's to keep them consecutive.
+ NewBB->getParent()->RenumberBlocks(NewBB);
+
+ // Insert an entry into BBInfo to align it properly with the (newly
+ // renumbered) block numbers.
+ BBInfo.insert(BBInfo.begin() + NewBB->getNumber(), BasicBlockInfo());
+
+ // Next, update WaterList. Specifically, we need to add NewMBB as having
+ // available water after it.
+ water_iterator IP =
+ std::lower_bound(WaterList.begin(), WaterList.end(), NewBB,
+ CompareMBBNumbers);
+ WaterList.insert(IP, NewBB);
+}
+
+
+/// Split the basic block containing MI into two blocks, which are joined by
+/// an unconditional branch. Update data structures and renumber blocks to
+/// account for this change and returns the newly created block.
+MachineBasicBlock *ARMConstantIslands::splitBlockBeforeInstr(MachineInstr *MI) {
+ MachineBasicBlock *OrigBB = MI->getParent();
+
+ // Create a new MBB for the code after the OrigBB.
+ MachineBasicBlock *NewBB =
+ MF->CreateMachineBasicBlock(OrigBB->getBasicBlock());
+ MachineFunction::iterator MBBI = ++OrigBB->getIterator();
+ MF->insert(MBBI, NewBB);
+
+ // Splice the instructions starting with MI over to NewBB.
+ NewBB->splice(NewBB->end(), OrigBB, MI, OrigBB->end());
+
+ // Add an unconditional branch from OrigBB to NewBB.
+ // Note the new unconditional branch is not being recorded.
+ // There doesn't seem to be meaningful DebugInfo available; this doesn't
+ // correspond to anything in the source.
+ unsigned Opc = isThumb ? (isThumb2 ? ARM::t2B : ARM::tB) : ARM::B;
+ if (!isThumb)
+ BuildMI(OrigBB, DebugLoc(), TII->get(Opc)).addMBB(NewBB);
+ else
+ BuildMI(OrigBB, DebugLoc(), TII->get(Opc)).addMBB(NewBB)
+ .addImm(ARMCC::AL).addReg(0);
+ ++NumSplit;
+
+ // Update the CFG. All succs of OrigBB are now succs of NewBB.
+ NewBB->transferSuccessors(OrigBB);
+
+ // OrigBB branches to NewBB.
+ OrigBB->addSuccessor(NewBB);
+
+ // Update internal data structures to account for the newly inserted MBB.
+ // This is almost the same as updateForInsertedWaterBlock, except that
+ // the Water goes after OrigBB, not NewBB.
+ MF->RenumberBlocks(NewBB);
+
+ // Insert an entry into BBInfo to align it properly with the (newly
+ // renumbered) block numbers.
+ BBInfo.insert(BBInfo.begin() + NewBB->getNumber(), BasicBlockInfo());
+
+ // Next, update WaterList. Specifically, we need to add OrigMBB as having
+ // available water after it (but not if it's already there, which happens
+ // when splitting before a conditional branch that is followed by an
+ // unconditional branch - in that case we want to insert NewBB).
+ water_iterator IP =
+ std::lower_bound(WaterList.begin(), WaterList.end(), OrigBB,
+ CompareMBBNumbers);
+ MachineBasicBlock* WaterBB = *IP;
+ if (WaterBB == OrigBB)
+ WaterList.insert(std::next(IP), NewBB);
+ else
+ WaterList.insert(IP, OrigBB);
+ NewWaterList.insert(OrigBB);
+
+ // Figure out how large the OrigBB is. As the first half of the original
+ // block, it cannot contain a tablejump. The size includes
+ // the new jump we added. (It should be possible to do this without
+ // recounting everything, but it's very confusing, and this is rarely
+ // executed.)
+ computeBlockSize(OrigBB);
+
+ // Figure out how large the NewMBB is. As the second half of the original
+ // block, it may contain a tablejump.
+ computeBlockSize(NewBB);
+
+ // All BBOffsets following these blocks must be modified.
+ adjustBBOffsetsAfter(OrigBB);
+
+ return NewBB;
+}
+
+/// getUserOffset - Compute the offset of U.MI as seen by the hardware
+/// displacement computation. Update U.KnownAlignment to match its current
+/// basic block location.
+unsigned ARMConstantIslands::getUserOffset(CPUser &U) const {
+ unsigned UserOffset = getOffsetOf(U.MI);
+ const BasicBlockInfo &BBI = BBInfo[U.MI->getParent()->getNumber()];
+ unsigned KnownBits = BBI.internalKnownBits();
+
+ // The value read from PC is offset from the actual instruction address.
+ UserOffset += (isThumb ? 4 : 8);
+
+ // Because of inline assembly, we may not know the alignment (mod 4) of U.MI.
+ // Make sure U.getMaxDisp() returns a constrained range.
+ U.KnownAlignment = (KnownBits >= 2);
+
+ // On Thumb, offsets==2 mod 4 are rounded down by the hardware for
+ // purposes of the displacement computation; compensate for that here.
+ // For unknown alignments, getMaxDisp() constrains the range instead.
+ if (isThumb && U.KnownAlignment)
+ UserOffset &= ~3u;
+
+ return UserOffset;
+}
+
+/// isOffsetInRange - Checks whether UserOffset (the location of a constant pool
+/// reference) is within MaxDisp of TrialOffset (a proposed location of a
+/// constant pool entry).
+/// UserOffset is computed by getUserOffset above to include PC adjustments. If
+/// the mod 4 alignment of UserOffset is not known, the uncertainty must be
+/// subtracted from MaxDisp instead. CPUser::getMaxDisp() does that.
+bool ARMConstantIslands::isOffsetInRange(unsigned UserOffset,
+ unsigned TrialOffset, unsigned MaxDisp,
+ bool NegativeOK, bool IsSoImm) {
+ if (UserOffset <= TrialOffset) {
+ // User before the Trial.
+ if (TrialOffset - UserOffset <= MaxDisp)
+ return true;
+ // FIXME: Make use full range of soimm values.
+ } else if (NegativeOK) {
+ if (UserOffset - TrialOffset <= MaxDisp)
+ return true;
+ // FIXME: Make use full range of soimm values.
+ }
+ return false;
+}
+
+/// isWaterInRange - Returns true if a CPE placed after the specified
+/// Water (a basic block) will be in range for the specific MI.
+///
+/// Compute how much the function will grow by inserting a CPE after Water.
+bool ARMConstantIslands::isWaterInRange(unsigned UserOffset,
+ MachineBasicBlock* Water, CPUser &U,
+ unsigned &Growth) {
+ unsigned CPELogAlign = getCPELogAlign(U.CPEMI);
+ unsigned CPEOffset = BBInfo[Water->getNumber()].postOffset(CPELogAlign);
+ unsigned NextBlockOffset, NextBlockAlignment;
+ MachineFunction::const_iterator NextBlock = Water->getIterator();
+ if (++NextBlock == MF->end()) {
+ NextBlockOffset = BBInfo[Water->getNumber()].postOffset();
+ NextBlockAlignment = 0;
+ } else {
+ NextBlockOffset = BBInfo[NextBlock->getNumber()].Offset;
+ NextBlockAlignment = NextBlock->getAlignment();
+ }
+ unsigned Size = U.CPEMI->getOperand(2).getImm();
+ unsigned CPEEnd = CPEOffset + Size;
+
+ // The CPE may be able to hide in the alignment padding before the next
+ // block. It may also cause more padding to be required if it is more aligned
+ // that the next block.
+ if (CPEEnd > NextBlockOffset) {
+ Growth = CPEEnd - NextBlockOffset;
+ // Compute the padding that would go at the end of the CPE to align the next
+ // block.
+ Growth += OffsetToAlignment(CPEEnd, 1u << NextBlockAlignment);
+
+ // If the CPE is to be inserted before the instruction, that will raise
+ // the offset of the instruction. Also account for unknown alignment padding
+ // in blocks between CPE and the user.
+ if (CPEOffset < UserOffset)
+ UserOffset += Growth + UnknownPadding(MF->getAlignment(), CPELogAlign);
+ } else
+ // CPE fits in existing padding.
+ Growth = 0;
+
+ return isOffsetInRange(UserOffset, CPEOffset, U);
+}
+
+/// isCPEntryInRange - Returns true if the distance between specific MI and
+/// specific ConstPool entry instruction can fit in MI's displacement field.
+bool ARMConstantIslands::isCPEntryInRange(MachineInstr *MI, unsigned UserOffset,
+ MachineInstr *CPEMI, unsigned MaxDisp,
+ bool NegOk, bool DoDump) {
+ unsigned CPEOffset = getOffsetOf(CPEMI);
+
+ if (DoDump) {
+ DEBUG({
+ unsigned Block = MI->getParent()->getNumber();
+ const BasicBlockInfo &BBI = BBInfo[Block];
+ dbgs() << "User of CPE#" << CPEMI->getOperand(0).getImm()
+ << " max delta=" << MaxDisp
+ << format(" insn address=%#x", UserOffset)
+ << " in BB#" << Block << ": "
+ << format("%#x-%x\t", BBI.Offset, BBI.postOffset()) << *MI
+ << format("CPE address=%#x offset=%+d: ", CPEOffset,
+ int(CPEOffset-UserOffset));
+ });
+ }
+
+ return isOffsetInRange(UserOffset, CPEOffset, MaxDisp, NegOk);
+}
+
+#ifndef NDEBUG
+/// BBIsJumpedOver - Return true of the specified basic block's only predecessor
+/// unconditionally branches to its only successor.
+static bool BBIsJumpedOver(MachineBasicBlock *MBB) {
+ if (MBB->pred_size() != 1 || MBB->succ_size() != 1)
+ return false;
+
+ MachineBasicBlock *Succ = *MBB->succ_begin();
+ MachineBasicBlock *Pred = *MBB->pred_begin();
+ MachineInstr *PredMI = &Pred->back();
+ if (PredMI->getOpcode() == ARM::B || PredMI->getOpcode() == ARM::tB
+ || PredMI->getOpcode() == ARM::t2B)
+ return PredMI->getOperand(0).getMBB() == Succ;
+ return false;
+}
+#endif // NDEBUG
+
+void ARMConstantIslands::adjustBBOffsetsAfter(MachineBasicBlock *BB) {
+ unsigned BBNum = BB->getNumber();
+ for(unsigned i = BBNum + 1, e = MF->getNumBlockIDs(); i < e; ++i) {
+ // Get the offset and known bits at the end of the layout predecessor.
+ // Include the alignment of the current block.
+ unsigned LogAlign = MF->getBlockNumbered(i)->getAlignment();
+ unsigned Offset = BBInfo[i - 1].postOffset(LogAlign);
+ unsigned KnownBits = BBInfo[i - 1].postKnownBits(LogAlign);
+
+ // This is where block i begins. Stop if the offset is already correct,
+ // and we have updated 2 blocks. This is the maximum number of blocks
+ // changed before calling this function.
+ if (i > BBNum + 2 &&
+ BBInfo[i].Offset == Offset &&
+ BBInfo[i].KnownBits == KnownBits)
+ break;
+
+ BBInfo[i].Offset = Offset;
+ BBInfo[i].KnownBits = KnownBits;
+ }
+}
+
+/// decrementCPEReferenceCount - find the constant pool entry with index CPI
+/// and instruction CPEMI, and decrement its refcount. If the refcount
+/// becomes 0 remove the entry and instruction. Returns true if we removed
+/// the entry, false if we didn't.
+
+bool ARMConstantIslands::decrementCPEReferenceCount(unsigned CPI,
+ MachineInstr *CPEMI) {
+ // Find the old entry. Eliminate it if it is no longer used.
+ CPEntry *CPE = findConstPoolEntry(CPI, CPEMI);
+ assert(CPE && "Unexpected!");
+ if (--CPE->RefCount == 0) {
+ removeDeadCPEMI(CPEMI);
+ CPE->CPEMI = nullptr;
+ --NumCPEs;
+ return true;
+ }
+ return false;
+}
+
+unsigned ARMConstantIslands::getCombinedIndex(const MachineInstr *CPEMI) {
+ if (CPEMI->getOperand(1).isCPI())
+ return CPEMI->getOperand(1).getIndex();
+
+ return JumpTableEntryIndices[CPEMI->getOperand(1).getIndex()];
+}
+
+/// LookForCPEntryInRange - see if the currently referenced CPE is in range;
+/// if not, see if an in-range clone of the CPE is in range, and if so,
+/// change the data structures so the user references the clone. Returns:
+/// 0 = no existing entry found
+/// 1 = entry found, and there were no code insertions or deletions
+/// 2 = entry found, and there were code insertions or deletions
+int ARMConstantIslands::findInRangeCPEntry(CPUser& U, unsigned UserOffset)
+{
+ MachineInstr *UserMI = U.MI;
+ MachineInstr *CPEMI = U.CPEMI;
+
+ // Check to see if the CPE is already in-range.
+ if (isCPEntryInRange(UserMI, UserOffset, CPEMI, U.getMaxDisp(), U.NegOk,
+ true)) {
+ DEBUG(dbgs() << "In range\n");
+ return 1;
+ }
+
+ // No. Look for previously created clones of the CPE that are in range.
+ unsigned CPI = getCombinedIndex(CPEMI);
+ std::vector<CPEntry> &CPEs = CPEntries[CPI];
+ for (unsigned i = 0, e = CPEs.size(); i != e; ++i) {
+ // We already tried this one
+ if (CPEs[i].CPEMI == CPEMI)
+ continue;
+ // Removing CPEs can leave empty entries, skip
+ if (CPEs[i].CPEMI == nullptr)
+ continue;
+ if (isCPEntryInRange(UserMI, UserOffset, CPEs[i].CPEMI, U.getMaxDisp(),
+ U.NegOk)) {
+ DEBUG(dbgs() << "Replacing CPE#" << CPI << " with CPE#"
+ << CPEs[i].CPI << "\n");
+ // Point the CPUser node to the replacement
+ U.CPEMI = CPEs[i].CPEMI;
+ // Change the CPI in the instruction operand to refer to the clone.
+ for (unsigned j = 0, e = UserMI->getNumOperands(); j != e; ++j)
+ if (UserMI->getOperand(j).isCPI()) {
+ UserMI->getOperand(j).setIndex(CPEs[i].CPI);
+ break;
+ }
+ // Adjust the refcount of the clone...
+ CPEs[i].RefCount++;
+ // ...and the original. If we didn't remove the old entry, none of the
+ // addresses changed, so we don't need another pass.
+ return decrementCPEReferenceCount(CPI, CPEMI) ? 2 : 1;
+ }
+ }
+ return 0;
+}
+
+/// getUnconditionalBrDisp - Returns the maximum displacement that can fit in
+/// the specific unconditional branch instruction.
+static inline unsigned getUnconditionalBrDisp(int Opc) {
+ switch (Opc) {
+ case ARM::tB:
+ return ((1<<10)-1)*2;
+ case ARM::t2B:
+ return ((1<<23)-1)*2;
+ default:
+ break;
+ }
+
+ return ((1<<23)-1)*4;
+}
+
+/// findAvailableWater - Look for an existing entry in the WaterList in which
+/// we can place the CPE referenced from U so it's within range of U's MI.
+/// Returns true if found, false if not. If it returns true, WaterIter
+/// is set to the WaterList entry. For Thumb, prefer water that will not
+/// introduce padding to water that will. To ensure that this pass
+/// terminates, the CPE location for a particular CPUser is only allowed to
+/// move to a lower address, so search backward from the end of the list and
+/// prefer the first water that is in range.
+bool ARMConstantIslands::findAvailableWater(CPUser &U, unsigned UserOffset,
+ water_iterator &WaterIter) {
+ if (WaterList.empty())
+ return false;
+
+ unsigned BestGrowth = ~0u;
+ for (water_iterator IP = std::prev(WaterList.end()), B = WaterList.begin();;
+ --IP) {
+ MachineBasicBlock* WaterBB = *IP;
+ // Check if water is in range and is either at a lower address than the
+ // current "high water mark" or a new water block that was created since
+ // the previous iteration by inserting an unconditional branch. In the
+ // latter case, we want to allow resetting the high water mark back to
+ // this new water since we haven't seen it before. Inserting branches
+ // should be relatively uncommon and when it does happen, we want to be
+ // sure to take advantage of it for all the CPEs near that block, so that
+ // we don't insert more branches than necessary.
+ unsigned Growth;
+ if (isWaterInRange(UserOffset, WaterBB, U, Growth) &&
+ (WaterBB->getNumber() < U.HighWaterMark->getNumber() ||
+ NewWaterList.count(WaterBB) || WaterBB == U.MI->getParent()) &&
+ Growth < BestGrowth) {
+ // This is the least amount of required padding seen so far.
+ BestGrowth = Growth;
+ WaterIter = IP;
+ DEBUG(dbgs() << "Found water after BB#" << WaterBB->getNumber()
+ << " Growth=" << Growth << '\n');
+
+ // Keep looking unless it is perfect.
+ if (BestGrowth == 0)
+ return true;
+ }
+ if (IP == B)
+ break;
+ }
+ return BestGrowth != ~0u;
+}
+
+/// createNewWater - No existing WaterList entry will work for
+/// CPUsers[CPUserIndex], so create a place to put the CPE. The end of the
+/// block is used if in range, and the conditional branch munged so control
+/// flow is correct. Otherwise the block is split to create a hole with an
+/// unconditional branch around it. In either case NewMBB is set to a
+/// block following which the new island can be inserted (the WaterList
+/// is not adjusted).
+void ARMConstantIslands::createNewWater(unsigned CPUserIndex,
+ unsigned UserOffset,
+ MachineBasicBlock *&NewMBB) {
+ CPUser &U = CPUsers[CPUserIndex];
+ MachineInstr *UserMI = U.MI;
+ MachineInstr *CPEMI = U.CPEMI;
+ unsigned CPELogAlign = getCPELogAlign(CPEMI);
+ MachineBasicBlock *UserMBB = UserMI->getParent();
+ const BasicBlockInfo &UserBBI = BBInfo[UserMBB->getNumber()];
+
+ // If the block does not end in an unconditional branch already, and if the
+ // end of the block is within range, make new water there. (The addition
+ // below is for the unconditional branch we will be adding: 4 bytes on ARM +
+ // Thumb2, 2 on Thumb1.
+ if (BBHasFallthrough(UserMBB)) {
+ // Size of branch to insert.
+ unsigned Delta = isThumb1 ? 2 : 4;
+ // Compute the offset where the CPE will begin.
+ unsigned CPEOffset = UserBBI.postOffset(CPELogAlign) + Delta;
+
+ if (isOffsetInRange(UserOffset, CPEOffset, U)) {
+ DEBUG(dbgs() << "Split at end of BB#" << UserMBB->getNumber()
+ << format(", expected CPE offset %#x\n", CPEOffset));
+ NewMBB = &*++UserMBB->getIterator();
+ // Add an unconditional branch from UserMBB to fallthrough block. Record
+ // it for branch lengthening; this new branch will not get out of range,
+ // but if the preceding conditional branch is out of range, the targets
+ // will be exchanged, and the altered branch may be out of range, so the
+ // machinery has to know about it.
+ int UncondBr = isThumb ? ((isThumb2) ? ARM::t2B : ARM::tB) : ARM::B;
+ if (!isThumb)
+ BuildMI(UserMBB, DebugLoc(), TII->get(UncondBr)).addMBB(NewMBB);
+ else
+ BuildMI(UserMBB, DebugLoc(), TII->get(UncondBr)).addMBB(NewMBB)
+ .addImm(ARMCC::AL).addReg(0);
+ unsigned MaxDisp = getUnconditionalBrDisp(UncondBr);
+ ImmBranches.push_back(ImmBranch(&UserMBB->back(),
+ MaxDisp, false, UncondBr));
+ computeBlockSize(UserMBB);
+ adjustBBOffsetsAfter(UserMBB);
+ return;
+ }
+ }
+
+ // What a big block. Find a place within the block to split it. This is a
+ // little tricky on Thumb1 since instructions are 2 bytes and constant pool
+ // entries are 4 bytes: if instruction I references island CPE, and
+ // instruction I+1 references CPE', it will not work well to put CPE as far
+ // forward as possible, since then CPE' cannot immediately follow it (that
+ // location is 2 bytes farther away from I+1 than CPE was from I) and we'd
+ // need to create a new island. So, we make a first guess, then walk through
+ // the instructions between the one currently being looked at and the
+ // possible insertion point, and make sure any other instructions that
+ // reference CPEs will be able to use the same island area; if not, we back
+ // up the insertion point.
+
+ // Try to split the block so it's fully aligned. Compute the latest split
+ // point where we can add a 4-byte branch instruction, and then align to
+ // LogAlign which is the largest possible alignment in the function.
+ unsigned LogAlign = MF->getAlignment();
+ assert(LogAlign >= CPELogAlign && "Over-aligned constant pool entry");
+ unsigned KnownBits = UserBBI.internalKnownBits();
+ unsigned UPad = UnknownPadding(LogAlign, KnownBits);
+ unsigned BaseInsertOffset = UserOffset + U.getMaxDisp() - UPad;
+ DEBUG(dbgs() << format("Split in middle of big block before %#x",
+ BaseInsertOffset));
+
+ // The 4 in the following is for the unconditional branch we'll be inserting
+ // (allows for long branch on Thumb1). Alignment of the island is handled
+ // inside isOffsetInRange.
+ BaseInsertOffset -= 4;
+
+ DEBUG(dbgs() << format(", adjusted to %#x", BaseInsertOffset)
+ << " la=" << LogAlign
+ << " kb=" << KnownBits
+ << " up=" << UPad << '\n');
+
+ // This could point off the end of the block if we've already got constant
+ // pool entries following this block; only the last one is in the water list.
+ // Back past any possible branches (allow for a conditional and a maximally
+ // long unconditional).
+ if (BaseInsertOffset + 8 >= UserBBI.postOffset()) {
+ // Ensure BaseInsertOffset is larger than the offset of the instruction
+ // following UserMI so that the loop which searches for the split point
+ // iterates at least once.
+ BaseInsertOffset =
+ std::max(UserBBI.postOffset() - UPad - 8,
+ UserOffset + TII->GetInstSizeInBytes(UserMI) + 1);
+ DEBUG(dbgs() << format("Move inside block: %#x\n", BaseInsertOffset));
+ }
+ unsigned EndInsertOffset = BaseInsertOffset + 4 + UPad +
+ CPEMI->getOperand(2).getImm();
+ MachineBasicBlock::iterator MI = UserMI;
+ ++MI;
+ unsigned CPUIndex = CPUserIndex+1;
+ unsigned NumCPUsers = CPUsers.size();
+ MachineInstr *LastIT = nullptr;
+ for (unsigned Offset = UserOffset+TII->GetInstSizeInBytes(UserMI);
+ Offset < BaseInsertOffset;
+ Offset += TII->GetInstSizeInBytes(MI), MI = std::next(MI)) {
+ assert(MI != UserMBB->end() && "Fell off end of block");
+ if (CPUIndex < NumCPUsers && CPUsers[CPUIndex].MI == MI) {
+ CPUser &U = CPUsers[CPUIndex];
+ if (!isOffsetInRange(Offset, EndInsertOffset, U)) {
+ // Shift intertion point by one unit of alignment so it is within reach.
+ BaseInsertOffset -= 1u << LogAlign;
+ EndInsertOffset -= 1u << LogAlign;
+ }
+ // This is overly conservative, as we don't account for CPEMIs being
+ // reused within the block, but it doesn't matter much. Also assume CPEs
+ // are added in order with alignment padding. We may eventually be able
+ // to pack the aligned CPEs better.
+ EndInsertOffset += U.CPEMI->getOperand(2).getImm();
+ CPUIndex++;
+ }
+
+ // Remember the last IT instruction.
+ if (MI->getOpcode() == ARM::t2IT)
+ LastIT = MI;
+ }
+
+ --MI;
+
+ // Avoid splitting an IT block.
+ if (LastIT) {
+ unsigned PredReg = 0;
+ ARMCC::CondCodes CC = getITInstrPredicate(MI, PredReg);
+ if (CC != ARMCC::AL)
+ MI = LastIT;
+ }
+
+ // We really must not split an IT block.
+ DEBUG(unsigned PredReg;
+ assert(!isThumb || getITInstrPredicate(MI, PredReg) == ARMCC::AL));
+
+ NewMBB = splitBlockBeforeInstr(MI);
+}
+
+/// handleConstantPoolUser - Analyze the specified user, checking to see if it
+/// is out-of-range. If so, pick up the constant pool value and move it some
+/// place in-range. Return true if we changed any addresses (thus must run
+/// another pass of branch lengthening), false otherwise.
+bool ARMConstantIslands::handleConstantPoolUser(unsigned CPUserIndex) {
+ CPUser &U = CPUsers[CPUserIndex];
+ MachineInstr *UserMI = U.MI;
+ MachineInstr *CPEMI = U.CPEMI;
+ unsigned CPI = getCombinedIndex(CPEMI);
+ unsigned Size = CPEMI->getOperand(2).getImm();
+ // Compute this only once, it's expensive.
+ unsigned UserOffset = getUserOffset(U);
+
+ // See if the current entry is within range, or there is a clone of it
+ // in range.
+ int result = findInRangeCPEntry(U, UserOffset);
+ if (result==1) return false;
+ else if (result==2) return true;
+
+ // No existing clone of this CPE is within range.
+ // We will be generating a new clone. Get a UID for it.
+ unsigned ID = AFI->createPICLabelUId();
+
+ // Look for water where we can place this CPE.
+ MachineBasicBlock *NewIsland = MF->CreateMachineBasicBlock();
+ MachineBasicBlock *NewMBB;
+ water_iterator IP;
+ if (findAvailableWater(U, UserOffset, IP)) {
+ DEBUG(dbgs() << "Found water in range\n");
+ MachineBasicBlock *WaterBB = *IP;
+
+ // If the original WaterList entry was "new water" on this iteration,
+ // propagate that to the new island. This is just keeping NewWaterList
+ // updated to match the WaterList, which will be updated below.
+ if (NewWaterList.erase(WaterBB))
+ NewWaterList.insert(NewIsland);
+
+ // The new CPE goes before the following block (NewMBB).
+ NewMBB = &*++WaterBB->getIterator();
+ } else {
+ // No water found.
+ DEBUG(dbgs() << "No water found\n");
+ createNewWater(CPUserIndex, UserOffset, NewMBB);
+
+ // splitBlockBeforeInstr adds to WaterList, which is important when it is
+ // called while handling branches so that the water will be seen on the
+ // next iteration for constant pools, but in this context, we don't want
+ // it. Check for this so it will be removed from the WaterList.
+ // Also remove any entry from NewWaterList.
+ MachineBasicBlock *WaterBB = &*--NewMBB->getIterator();
+ IP = std::find(WaterList.begin(), WaterList.end(), WaterBB);
+ if (IP != WaterList.end())
+ NewWaterList.erase(WaterBB);
+
+ // We are adding new water. Update NewWaterList.
+ NewWaterList.insert(NewIsland);
+ }
+
+ // Remove the original WaterList entry; we want subsequent insertions in
+ // this vicinity to go after the one we're about to insert. This
+ // considerably reduces the number of times we have to move the same CPE
+ // more than once and is also important to ensure the algorithm terminates.
+ if (IP != WaterList.end())
+ WaterList.erase(IP);
+
+ // Okay, we know we can put an island before NewMBB now, do it!
+ MF->insert(NewMBB->getIterator(), NewIsland);
+
+ // Update internal data structures to account for the newly inserted MBB.
+ updateForInsertedWaterBlock(NewIsland);
+
+ // Now that we have an island to add the CPE to, clone the original CPE and
+ // add it to the island.
+ U.HighWaterMark = NewIsland;
+ U.CPEMI = BuildMI(NewIsland, DebugLoc(), CPEMI->getDesc())
+ .addImm(ID).addOperand(CPEMI->getOperand(1)).addImm(Size);
+ CPEntries[CPI].push_back(CPEntry(U.CPEMI, ID, 1));
+ ++NumCPEs;
+
+ // Decrement the old entry, and remove it if refcount becomes 0.
+ decrementCPEReferenceCount(CPI, CPEMI);
+
+ // Mark the basic block as aligned as required by the const-pool entry.
+ NewIsland->setAlignment(getCPELogAlign(U.CPEMI));
+
+ // Increase the size of the island block to account for the new entry.
+ BBInfo[NewIsland->getNumber()].Size += Size;
+ adjustBBOffsetsAfter(&*--NewIsland->getIterator());
+
+ // Finally, change the CPI in the instruction operand to be ID.
+ for (unsigned i = 0, e = UserMI->getNumOperands(); i != e; ++i)
+ if (UserMI->getOperand(i).isCPI()) {
+ UserMI->getOperand(i).setIndex(ID);
+ break;
+ }
+
+ DEBUG(dbgs() << " Moved CPE to #" << ID << " CPI=" << CPI
+ << format(" offset=%#x\n", BBInfo[NewIsland->getNumber()].Offset));
+
+ return true;
+}
+
+/// removeDeadCPEMI - Remove a dead constant pool entry instruction. Update
+/// sizes and offsets of impacted basic blocks.
+void ARMConstantIslands::removeDeadCPEMI(MachineInstr *CPEMI) {
+ MachineBasicBlock *CPEBB = CPEMI->getParent();
+ unsigned Size = CPEMI->getOperand(2).getImm();
+ CPEMI->eraseFromParent();
+ BBInfo[CPEBB->getNumber()].Size -= Size;
+ // All succeeding offsets have the current size value added in, fix this.
+ if (CPEBB->empty()) {
+ BBInfo[CPEBB->getNumber()].Size = 0;
+
+ // This block no longer needs to be aligned.
+ CPEBB->setAlignment(0);
+ } else
+ // Entries are sorted by descending alignment, so realign from the front.
+ CPEBB->setAlignment(getCPELogAlign(CPEBB->begin()));
+
+ adjustBBOffsetsAfter(CPEBB);
+ // An island has only one predecessor BB and one successor BB. Check if
+ // this BB's predecessor jumps directly to this BB's successor. This
+ // shouldn't happen currently.
+ assert(!BBIsJumpedOver(CPEBB) && "How did this happen?");
+ // FIXME: remove the empty blocks after all the work is done?
+}
+
+/// removeUnusedCPEntries - Remove constant pool entries whose refcounts
+/// are zero.
+bool ARMConstantIslands::removeUnusedCPEntries() {
+ unsigned MadeChange = false;
+ for (unsigned i = 0, e = CPEntries.size(); i != e; ++i) {
+ std::vector<CPEntry> &CPEs = CPEntries[i];
+ for (unsigned j = 0, ee = CPEs.size(); j != ee; ++j) {
+ if (CPEs[j].RefCount == 0 && CPEs[j].CPEMI) {
+ removeDeadCPEMI(CPEs[j].CPEMI);
+ CPEs[j].CPEMI = nullptr;
+ MadeChange = true;
+ }
+ }
+ }
+ return MadeChange;
+}
+
+/// isBBInRange - Returns true if the distance between specific MI and
+/// specific BB can fit in MI's displacement field.
+bool ARMConstantIslands::isBBInRange(MachineInstr *MI,MachineBasicBlock *DestBB,
+ unsigned MaxDisp) {
+ unsigned PCAdj = isThumb ? 4 : 8;
+ unsigned BrOffset = getOffsetOf(MI) + PCAdj;
+ unsigned DestOffset = BBInfo[DestBB->getNumber()].Offset;
+
+ DEBUG(dbgs() << "Branch of destination BB#" << DestBB->getNumber()
+ << " from BB#" << MI->getParent()->getNumber()
+ << " max delta=" << MaxDisp
+ << " from " << getOffsetOf(MI) << " to " << DestOffset
+ << " offset " << int(DestOffset-BrOffset) << "\t" << *MI);
+
+ if (BrOffset <= DestOffset) {
+ // Branch before the Dest.
+ if (DestOffset-BrOffset <= MaxDisp)
+ return true;
+ } else {
+ if (BrOffset-DestOffset <= MaxDisp)
+ return true;
+ }
+ return false;
+}
+
+/// fixupImmediateBr - Fix up an immediate branch whose destination is too far
+/// away to fit in its displacement field.
+bool ARMConstantIslands::fixupImmediateBr(ImmBranch &Br) {
+ MachineInstr *MI = Br.MI;
+ MachineBasicBlock *DestBB = MI->getOperand(0).getMBB();
+
+ // Check to see if the DestBB is already in-range.
+ if (isBBInRange(MI, DestBB, Br.MaxDisp))
+ return false;
+
+ if (!Br.isCond)
+ return fixupUnconditionalBr(Br);
+ return fixupConditionalBr(Br);
+}
+
+/// fixupUnconditionalBr - Fix up an unconditional branch whose destination is
+/// too far away to fit in its displacement field. If the LR register has been
+/// spilled in the epilogue, then we can use BL to implement a far jump.
+/// Otherwise, add an intermediate branch instruction to a branch.
+bool
+ARMConstantIslands::fixupUnconditionalBr(ImmBranch &Br) {
+ MachineInstr *MI = Br.MI;
+ MachineBasicBlock *MBB = MI->getParent();
+ if (!isThumb1)
+ llvm_unreachable("fixupUnconditionalBr is Thumb1 only!");
+
+ // Use BL to implement far jump.
+ Br.MaxDisp = (1 << 21) * 2;
+ MI->setDesc(TII->get(ARM::tBfar));
+ BBInfo[MBB->getNumber()].Size += 2;
+ adjustBBOffsetsAfter(MBB);
+ HasFarJump = true;
+ ++NumUBrFixed;
+
+ DEBUG(dbgs() << " Changed B to long jump " << *MI);
+
+ return true;
+}
+
+/// fixupConditionalBr - Fix up a conditional branch whose destination is too
+/// far away to fit in its displacement field. It is converted to an inverse
+/// conditional branch + an unconditional branch to the destination.
+bool
+ARMConstantIslands::fixupConditionalBr(ImmBranch &Br) {
+ MachineInstr *MI = Br.MI;
+ MachineBasicBlock *DestBB = MI->getOperand(0).getMBB();
+
+ // Add an unconditional branch to the destination and invert the branch
+ // condition to jump over it:
+ // blt L1
+ // =>
+ // bge L2
+ // b L1
+ // L2:
+ ARMCC::CondCodes CC = (ARMCC::CondCodes)MI->getOperand(1).getImm();
+ CC = ARMCC::getOppositeCondition(CC);
+ unsigned CCReg = MI->getOperand(2).getReg();
+
+ // If the branch is at the end of its MBB and that has a fall-through block,
+ // direct the updated conditional branch to the fall-through block. Otherwise,
+ // split the MBB before the next instruction.
+ MachineBasicBlock *MBB = MI->getParent();
+ MachineInstr *BMI = &MBB->back();
+ bool NeedSplit = (BMI != MI) || !BBHasFallthrough(MBB);
+
+ ++NumCBrFixed;
+ if (BMI != MI) {
+ if (std::next(MachineBasicBlock::iterator(MI)) == std::prev(MBB->end()) &&
+ BMI->getOpcode() == Br.UncondBr) {
+ // Last MI in the BB is an unconditional branch. Can we simply invert the
+ // condition and swap destinations:
+ // beq L1
+ // b L2
+ // =>
+ // bne L2
+ // b L1
+ MachineBasicBlock *NewDest = BMI->getOperand(0).getMBB();
+ if (isBBInRange(MI, NewDest, Br.MaxDisp)) {
+ DEBUG(dbgs() << " Invert Bcc condition and swap its destination with "
+ << *BMI);
+ BMI->getOperand(0).setMBB(DestBB);
+ MI->getOperand(0).setMBB(NewDest);
+ MI->getOperand(1).setImm(CC);
+ return true;
+ }
+ }
+ }
+
+ if (NeedSplit) {
+ splitBlockBeforeInstr(MI);
+ // No need for the branch to the next block. We're adding an unconditional
+ // branch to the destination.
+ int delta = TII->GetInstSizeInBytes(&MBB->back());
+ BBInfo[MBB->getNumber()].Size -= delta;
+ MBB->back().eraseFromParent();
+ // BBInfo[SplitBB].Offset is wrong temporarily, fixed below
+ }
+ MachineBasicBlock *NextBB = &*++MBB->getIterator();
+
+ DEBUG(dbgs() << " Insert B to BB#" << DestBB->getNumber()
+ << " also invert condition and change dest. to BB#"
+ << NextBB->getNumber() << "\n");
+
+ // Insert a new conditional branch and a new unconditional branch.
+ // Also update the ImmBranch as well as adding a new entry for the new branch.
+ BuildMI(MBB, DebugLoc(), TII->get(MI->getOpcode()))
+ .addMBB(NextBB).addImm(CC).addReg(CCReg);
+ Br.MI = &MBB->back();
+ BBInfo[MBB->getNumber()].Size += TII->GetInstSizeInBytes(&MBB->back());
+ if (isThumb)
+ BuildMI(MBB, DebugLoc(), TII->get(Br.UncondBr)).addMBB(DestBB)
+ .addImm(ARMCC::AL).addReg(0);
+ else
+ BuildMI(MBB, DebugLoc(), TII->get(Br.UncondBr)).addMBB(DestBB);
+ BBInfo[MBB->getNumber()].Size += TII->GetInstSizeInBytes(&MBB->back());
+ unsigned MaxDisp = getUnconditionalBrDisp(Br.UncondBr);
+ ImmBranches.push_back(ImmBranch(&MBB->back(), MaxDisp, false, Br.UncondBr));
+
+ // Remove the old conditional branch. It may or may not still be in MBB.
+ BBInfo[MI->getParent()->getNumber()].Size -= TII->GetInstSizeInBytes(MI);
+ MI->eraseFromParent();
+ adjustBBOffsetsAfter(MBB);
+ return true;
+}
+
+/// undoLRSpillRestore - Remove Thumb push / pop instructions that only spills
+/// LR / restores LR to pc. FIXME: This is done here because it's only possible
+/// to do this if tBfar is not used.
+bool ARMConstantIslands::undoLRSpillRestore() {
+ bool MadeChange = false;
+ for (unsigned i = 0, e = PushPopMIs.size(); i != e; ++i) {
+ MachineInstr *MI = PushPopMIs[i];
+ // First two operands are predicates.
+ if (MI->getOpcode() == ARM::tPOP_RET &&
+ MI->getOperand(2).getReg() == ARM::PC &&
+ MI->getNumExplicitOperands() == 3) {
+ // Create the new insn and copy the predicate from the old.
+ BuildMI(MI->getParent(), MI->getDebugLoc(), TII->get(ARM::tBX_RET))
+ .addOperand(MI->getOperand(0))
+ .addOperand(MI->getOperand(1));
+ MI->eraseFromParent();
+ MadeChange = true;
+ }
+ }
+ return MadeChange;
+}
+
+// mayOptimizeThumb2Instruction - Returns true if optimizeThumb2Instructions
+// below may shrink MI.
+bool
+ARMConstantIslands::mayOptimizeThumb2Instruction(const MachineInstr *MI) const {
+ switch(MI->getOpcode()) {
+ // optimizeThumb2Instructions.
+ case ARM::t2LEApcrel:
+ case ARM::t2LDRpci:
+ // optimizeThumb2Branches.
+ case ARM::t2B:
+ case ARM::t2Bcc:
+ case ARM::tBcc:
+ // optimizeThumb2JumpTables.
+ case ARM::t2BR_JT:
+ return true;
+ }
+ return false;
+}
+
+bool ARMConstantIslands::optimizeThumb2Instructions() {
+ bool MadeChange = false;
+
+ // Shrink ADR and LDR from constantpool.
+ for (unsigned i = 0, e = CPUsers.size(); i != e; ++i) {
+ CPUser &U = CPUsers[i];
+ unsigned Opcode = U.MI->getOpcode();
+ unsigned NewOpc = 0;
+ unsigned Scale = 1;
+ unsigned Bits = 0;
+ switch (Opcode) {
+ default: break;
+ case ARM::t2LEApcrel:
+ if (isARMLowRegister(U.MI->getOperand(0).getReg())) {
+ NewOpc = ARM::tLEApcrel;
+ Bits = 8;
+ Scale = 4;
+ }
+ break;
+ case ARM::t2LDRpci:
+ if (isARMLowRegister(U.MI->getOperand(0).getReg())) {
+ NewOpc = ARM::tLDRpci;
+ Bits = 8;
+ Scale = 4;
+ }
+ break;
+ }
+
+ if (!NewOpc)
+ continue;
+
+ unsigned UserOffset = getUserOffset(U);
+ unsigned MaxOffs = ((1 << Bits) - 1) * Scale;
+
+ // Be conservative with inline asm.
+ if (!U.KnownAlignment)
+ MaxOffs -= 2;
+
+ // FIXME: Check if offset is multiple of scale if scale is not 4.
+ if (isCPEntryInRange(U.MI, UserOffset, U.CPEMI, MaxOffs, false, true)) {
+ DEBUG(dbgs() << "Shrink: " << *U.MI);
+ U.MI->setDesc(TII->get(NewOpc));
+ MachineBasicBlock *MBB = U.MI->getParent();
+ BBInfo[MBB->getNumber()].Size -= 2;
+ adjustBBOffsetsAfter(MBB);
+ ++NumT2CPShrunk;
+ MadeChange = true;
+ }
+ }
+
+ MadeChange |= optimizeThumb2Branches();
+ MadeChange |= optimizeThumb2JumpTables();
+ return MadeChange;
+}
+
+bool ARMConstantIslands::optimizeThumb2Branches() {
+ bool MadeChange = false;
+
+ // The order in which branches appear in ImmBranches is approximately their
+ // order within the function body. By visiting later branches first, we reduce
+ // the distance between earlier forward branches and their targets, making it
+ // more likely that the cbn?z optimization, which can only apply to forward
+ // branches, will succeed.
+ for (unsigned i = ImmBranches.size(); i != 0; --i) {
+ ImmBranch &Br = ImmBranches[i-1];
+ unsigned Opcode = Br.MI->getOpcode();
+ unsigned NewOpc = 0;
+ unsigned Scale = 1;
+ unsigned Bits = 0;
+ switch (Opcode) {
+ default: break;
+ case ARM::t2B:
+ NewOpc = ARM::tB;
+ Bits = 11;
+ Scale = 2;
+ break;
+ case ARM::t2Bcc: {
+ NewOpc = ARM::tBcc;
+ Bits = 8;
+ Scale = 2;
+ break;
+ }
+ }
+ if (NewOpc) {
+ unsigned MaxOffs = ((1 << (Bits-1))-1) * Scale;
+ MachineBasicBlock *DestBB = Br.MI->getOperand(0).getMBB();
+ if (isBBInRange(Br.MI, DestBB, MaxOffs)) {
+ DEBUG(dbgs() << "Shrink branch: " << *Br.MI);
+ Br.MI->setDesc(TII->get(NewOpc));
+ MachineBasicBlock *MBB = Br.MI->getParent();
+ BBInfo[MBB->getNumber()].Size -= 2;
+ adjustBBOffsetsAfter(MBB);
+ ++NumT2BrShrunk;
+ MadeChange = true;
+ }
+ }
+
+ Opcode = Br.MI->getOpcode();
+ if (Opcode != ARM::tBcc)
+ continue;
+
+ // If the conditional branch doesn't kill CPSR, then CPSR can be liveout
+ // so this transformation is not safe.
+ if (!Br.MI->killsRegister(ARM::CPSR))
+ continue;
+
+ NewOpc = 0;
+ unsigned PredReg = 0;
+ ARMCC::CondCodes Pred = getInstrPredicate(Br.MI, PredReg);
+ if (Pred == ARMCC::EQ)
+ NewOpc = ARM::tCBZ;
+ else if (Pred == ARMCC::NE)
+ NewOpc = ARM::tCBNZ;
+ if (!NewOpc)
+ continue;
+ MachineBasicBlock *DestBB = Br.MI->getOperand(0).getMBB();
+ // Check if the distance is within 126. Subtract starting offset by 2
+ // because the cmp will be eliminated.
+ unsigned BrOffset = getOffsetOf(Br.MI) + 4 - 2;
+ unsigned DestOffset = BBInfo[DestBB->getNumber()].Offset;
+ if (BrOffset < DestOffset && (DestOffset - BrOffset) <= 126) {
+ MachineBasicBlock::iterator CmpMI = Br.MI;
+ if (CmpMI != Br.MI->getParent()->begin()) {
+ --CmpMI;
+ if (CmpMI->getOpcode() == ARM::tCMPi8) {
+ unsigned Reg = CmpMI->getOperand(0).getReg();
+ Pred = getInstrPredicate(CmpMI, PredReg);
+ if (Pred == ARMCC::AL &&
+ CmpMI->getOperand(1).getImm() == 0 &&
+ isARMLowRegister(Reg)) {
+ MachineBasicBlock *MBB = Br.MI->getParent();
+ DEBUG(dbgs() << "Fold: " << *CmpMI << " and: " << *Br.MI);
+ MachineInstr *NewBR =
+ BuildMI(*MBB, CmpMI, Br.MI->getDebugLoc(), TII->get(NewOpc))
+ .addReg(Reg).addMBB(DestBB,Br.MI->getOperand(0).getTargetFlags());
+ CmpMI->eraseFromParent();
+ Br.MI->eraseFromParent();
+ Br.MI = NewBR;
+ BBInfo[MBB->getNumber()].Size -= 2;
+ adjustBBOffsetsAfter(MBB);
+ ++NumCBZ;
+ MadeChange = true;
+ }
+ }
+ }
+ }
+ }
+
+ return MadeChange;
+}
+
+static bool isSimpleIndexCalc(MachineInstr &I, unsigned EntryReg,
+ unsigned BaseReg) {
+ if (I.getOpcode() != ARM::t2ADDrs)
+ return false;
+
+ if (I.getOperand(0).getReg() != EntryReg)
+ return false;
+
+ if (I.getOperand(1).getReg() != BaseReg)
+ return false;
+
+ // FIXME: what about CC and IdxReg?
+ return true;
+}
+
+/// \brief While trying to form a TBB/TBH instruction, we may (if the table
+/// doesn't immediately follow the BR_JT) need access to the start of the
+/// jump-table. We know one instruction that produces such a register; this
+/// function works out whether that definition can be preserved to the BR_JT,
+/// possibly by removing an intervening addition (which is usually needed to
+/// calculate the actual entry to jump to).
+bool ARMConstantIslands::preserveBaseRegister(MachineInstr *JumpMI,
+ MachineInstr *LEAMI,
+ unsigned &DeadSize,
+ bool &CanDeleteLEA,
+ bool &BaseRegKill) {
+ if (JumpMI->getParent() != LEAMI->getParent())
+ return false;
+
+ // Now we hope that we have at least these instructions in the basic block:
+ // BaseReg = t2LEA ...
+ // [...]
+ // EntryReg = t2ADDrs BaseReg, ...
+ // [...]
+ // t2BR_JT EntryReg
+ //
+ // We have to be very conservative about what we recognise here though. The
+ // main perturbing factors to watch out for are:
+ // + Spills at any point in the chain: not direct problems but we would
+ // expect a blocking Def of the spilled register so in practice what we
+ // can do is limited.
+ // + EntryReg == BaseReg: this is the one situation we should allow a Def
+ // of BaseReg, but only if the t2ADDrs can be removed.
+ // + Some instruction other than t2ADDrs computing the entry. Not seen in
+ // the wild, but we should be careful.
+ unsigned EntryReg = JumpMI->getOperand(0).getReg();
+ unsigned BaseReg = LEAMI->getOperand(0).getReg();
+
+ CanDeleteLEA = true;
+ BaseRegKill = false;
+ MachineInstr *RemovableAdd = nullptr;
+ MachineBasicBlock::iterator I(LEAMI);
+ for (++I; &*I != JumpMI; ++I) {
+ if (isSimpleIndexCalc(*I, EntryReg, BaseReg)) {
+ RemovableAdd = &*I;
+ break;
+ }
+
+ for (unsigned K = 0, E = I->getNumOperands(); K != E; ++K) {
+ const MachineOperand &MO = I->getOperand(K);
+ if (!MO.isReg() || !MO.getReg())
+ continue;
+ if (MO.isDef() && MO.getReg() == BaseReg)
+ return false;
+ if (MO.isUse() && MO.getReg() == BaseReg) {
+ BaseRegKill = BaseRegKill || MO.isKill();
+ CanDeleteLEA = false;
+ }
+ }
+ }
+
+ if (!RemovableAdd)
+ return true;
+
+ // Check the add really is removable, and that nothing else in the block
+ // clobbers BaseReg.
+ for (++I; &*I != JumpMI; ++I) {
+ for (unsigned K = 0, E = I->getNumOperands(); K != E; ++K) {
+ const MachineOperand &MO = I->getOperand(K);
+ if (!MO.isReg() || !MO.getReg())
+ continue;
+ if (MO.isDef() && MO.getReg() == BaseReg)
+ return false;
+ if (MO.isUse() && MO.getReg() == EntryReg)
+ RemovableAdd = nullptr;
+ }
+ }
+
+ if (RemovableAdd) {
+ RemovableAdd->eraseFromParent();
+ DeadSize += 4;
+ } else if (BaseReg == EntryReg) {
+ // The add wasn't removable, but clobbered the base for the TBB. So we can't
+ // preserve it.
+ return false;
+ }
+
+ // We reached the end of the block without seeing another definition of
+ // BaseReg (except, possibly the t2ADDrs, which was removed). BaseReg can be
+ // used in the TBB/TBH if necessary.
+ return true;
+}
+
+/// \brief Returns whether CPEMI is the first instruction in the block
+/// immediately following JTMI (assumed to be a TBB or TBH terminator). If so,
+/// we can switch the first register to PC and usually remove the address
+/// calculation that preceded it.
+static bool jumpTableFollowsTB(MachineInstr *JTMI, MachineInstr *CPEMI) {
+ MachineFunction::iterator MBB = JTMI->getParent()->getIterator();
+ MachineFunction *MF = MBB->getParent();
+ ++MBB;
+
+ return MBB != MF->end() && MBB->begin() != MBB->end() &&
+ &*MBB->begin() == CPEMI;
+}
+
+/// optimizeThumb2JumpTables - Use tbb / tbh instructions to generate smaller
+/// jumptables when it's possible.
+bool ARMConstantIslands::optimizeThumb2JumpTables() {
+ bool MadeChange = false;
+
+ // FIXME: After the tables are shrunk, can we get rid some of the
+ // constantpool tables?
+ MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
+ if (!MJTI) return false;
+
+ const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
+ for (unsigned i = 0, e = T2JumpTables.size(); i != e; ++i) {
+ MachineInstr *MI = T2JumpTables[i];
+ const MCInstrDesc &MCID = MI->getDesc();
+ unsigned NumOps = MCID.getNumOperands();
+ unsigned JTOpIdx = NumOps - (MI->isPredicable() ? 2 : 1);
+ MachineOperand JTOP = MI->getOperand(JTOpIdx);
+ unsigned JTI = JTOP.getIndex();
+ assert(JTI < JT.size());
+
+ bool ByteOk = true;
+ bool HalfWordOk = true;
+ unsigned JTOffset = getOffsetOf(MI) + 4;
+ const std::vector<MachineBasicBlock*> &JTBBs = JT[JTI].MBBs;
+ for (unsigned j = 0, ee = JTBBs.size(); j != ee; ++j) {
+ MachineBasicBlock *MBB = JTBBs[j];
+ unsigned DstOffset = BBInfo[MBB->getNumber()].Offset;
+ // Negative offset is not ok. FIXME: We should change BB layout to make
+ // sure all the branches are forward.
+ if (ByteOk && (DstOffset - JTOffset) > ((1<<8)-1)*2)
+ ByteOk = false;
+ unsigned TBHLimit = ((1<<16)-1)*2;
+ if (HalfWordOk && (DstOffset - JTOffset) > TBHLimit)
+ HalfWordOk = false;
+ if (!ByteOk && !HalfWordOk)
+ break;
+ }
+
+ if (!ByteOk && !HalfWordOk)
+ continue;
+
+ MachineBasicBlock *MBB = MI->getParent();
+ if (!MI->getOperand(0).isKill()) // FIXME: needed now?
+ continue;
+ unsigned IdxReg = MI->getOperand(1).getReg();
+ bool IdxRegKill = MI->getOperand(1).isKill();
+
+ CPUser &User = CPUsers[JumpTableUserIndices[JTI]];
+ unsigned DeadSize = 0;
+ bool CanDeleteLEA = false;
+ bool BaseRegKill = false;
+ bool PreservedBaseReg =
+ preserveBaseRegister(MI, User.MI, DeadSize, CanDeleteLEA, BaseRegKill);
+
+ if (!jumpTableFollowsTB(MI, User.CPEMI) && !PreservedBaseReg)
+ continue;
+
+ DEBUG(dbgs() << "Shrink JT: " << *MI);
+ MachineInstr *CPEMI = User.CPEMI;
+ unsigned Opc = ByteOk ? ARM::t2TBB_JT : ARM::t2TBH_JT;
+ MachineBasicBlock::iterator MI_JT = MI;
+ MachineInstr *NewJTMI =
+ BuildMI(*MBB, MI_JT, MI->getDebugLoc(), TII->get(Opc))
+ .addReg(User.MI->getOperand(0).getReg(),
+ getKillRegState(BaseRegKill))
+ .addReg(IdxReg, getKillRegState(IdxRegKill))
+ .addJumpTableIndex(JTI, JTOP.getTargetFlags())
+ .addImm(CPEMI->getOperand(0).getImm());
+ DEBUG(dbgs() << "BB#" << MBB->getNumber() << ": " << *NewJTMI);
+
+ unsigned JTOpc = ByteOk ? ARM::JUMPTABLE_TBB : ARM::JUMPTABLE_TBH;
+ CPEMI->setDesc(TII->get(JTOpc));
+
+ if (jumpTableFollowsTB(MI, User.CPEMI)) {
+ NewJTMI->getOperand(0).setReg(ARM::PC);
+ NewJTMI->getOperand(0).setIsKill(false);
+
+ if (CanDeleteLEA) {
+ User.MI->eraseFromParent();
+ DeadSize += 4;
+
+ // The LEA was eliminated, the TBB instruction becomes the only new user
+ // of the jump table.
+ User.MI = NewJTMI;
+ User.MaxDisp = 4;
+ User.NegOk = false;
+ User.IsSoImm = false;
+ User.KnownAlignment = false;
+ } else {
+ // The LEA couldn't be eliminated, so we must add another CPUser to
+ // record the TBB or TBH use.
+ int CPEntryIdx = JumpTableEntryIndices[JTI];
+ auto &CPEs = CPEntries[CPEntryIdx];
+ auto Entry = std::find_if(CPEs.begin(), CPEs.end(), [&](CPEntry &E) {
+ return E.CPEMI == User.CPEMI;
+ });
+ ++Entry->RefCount;
+ CPUsers.emplace_back(CPUser(NewJTMI, User.CPEMI, 4, false, false));
+ }
+ }
+
+ unsigned NewSize = TII->GetInstSizeInBytes(NewJTMI);
+ unsigned OrigSize = TII->GetInstSizeInBytes(MI);
+ MI->eraseFromParent();
+
+ int Delta = OrigSize - NewSize + DeadSize;
+ BBInfo[MBB->getNumber()].Size -= Delta;
+ adjustBBOffsetsAfter(MBB);
+
+ ++NumTBs;
+ MadeChange = true;
+ }
+
+ return MadeChange;
+}
+
+/// reorderThumb2JumpTables - Adjust the function's block layout to ensure that
+/// jump tables always branch forwards, since that's what tbb and tbh need.
+bool ARMConstantIslands::reorderThumb2JumpTables() {
+ bool MadeChange = false;
+
+ MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
+ if (!MJTI) return false;
+
+ const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
+ for (unsigned i = 0, e = T2JumpTables.size(); i != e; ++i) {
+ MachineInstr *MI = T2JumpTables[i];
+ const MCInstrDesc &MCID = MI->getDesc();
+ unsigned NumOps = MCID.getNumOperands();
+ unsigned JTOpIdx = NumOps - (MI->isPredicable() ? 2 : 1);
+ MachineOperand JTOP = MI->getOperand(JTOpIdx);
+ unsigned JTI = JTOP.getIndex();
+ assert(JTI < JT.size());
+
+ // We prefer if target blocks for the jump table come after the jump
+ // instruction so we can use TB[BH]. Loop through the target blocks
+ // and try to adjust them such that that's true.
+ int JTNumber = MI->getParent()->getNumber();
+ const std::vector<MachineBasicBlock*> &JTBBs = JT[JTI].MBBs;
+ for (unsigned j = 0, ee = JTBBs.size(); j != ee; ++j) {
+ MachineBasicBlock *MBB = JTBBs[j];
+ int DTNumber = MBB->getNumber();
+
+ if (DTNumber < JTNumber) {
+ // The destination precedes the switch. Try to move the block forward
+ // so we have a positive offset.
+ MachineBasicBlock *NewBB =
+ adjustJTTargetBlockForward(MBB, MI->getParent());
+ if (NewBB)
+ MJTI->ReplaceMBBInJumpTable(JTI, JTBBs[j], NewBB);
+ MadeChange = true;
+ }
+ }
+ }
+
+ return MadeChange;
+}
+
+MachineBasicBlock *ARMConstantIslands::
+adjustJTTargetBlockForward(MachineBasicBlock *BB, MachineBasicBlock *JTBB) {
+ // If the destination block is terminated by an unconditional branch,
+ // try to move it; otherwise, create a new block following the jump
+ // table that branches back to the actual target. This is a very simple
+ // heuristic. FIXME: We can definitely improve it.
+ MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
+ SmallVector<MachineOperand, 4> Cond;
+ SmallVector<MachineOperand, 4> CondPrior;
+ MachineFunction::iterator BBi = BB->getIterator();
+ MachineFunction::iterator OldPrior = std::prev(BBi);
+
+ // If the block terminator isn't analyzable, don't try to move the block
+ bool B = TII->AnalyzeBranch(*BB, TBB, FBB, Cond);
+
+ // If the block ends in an unconditional branch, move it. The prior block
+ // has to have an analyzable terminator for us to move this one. Be paranoid
+ // and make sure we're not trying to move the entry block of the function.
+ if (!B && Cond.empty() && BB != MF->begin() &&
+ !TII->AnalyzeBranch(*OldPrior, TBB, FBB, CondPrior)) {
+ BB->moveAfter(JTBB);
+ OldPrior->updateTerminator();
+ BB->updateTerminator();
+ // Update numbering to account for the block being moved.
+ MF->RenumberBlocks();
+ ++NumJTMoved;
+ return nullptr;
+ }
+
+ // Create a new MBB for the code after the jump BB.
+ MachineBasicBlock *NewBB =
+ MF->CreateMachineBasicBlock(JTBB->getBasicBlock());
+ MachineFunction::iterator MBBI = ++JTBB->getIterator();
+ MF->insert(MBBI, NewBB);
+
+ // Add an unconditional branch from NewBB to BB.
+ // There doesn't seem to be meaningful DebugInfo available; this doesn't
+ // correspond directly to anything in the source.
+ assert (isThumb2 && "Adjusting for TB[BH] but not in Thumb2?");
+ BuildMI(NewBB, DebugLoc(), TII->get(ARM::t2B)).addMBB(BB)
+ .addImm(ARMCC::AL).addReg(0);
+
+ // Update internal data structures to account for the newly inserted MBB.
+ MF->RenumberBlocks(NewBB);
+
+ // Update the CFG.
+ NewBB->addSuccessor(BB);
+ JTBB->replaceSuccessor(BB, NewBB);
+
+ ++NumJTInserted;
+ return NewBB;
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.cpp b/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.cpp
new file mode 100644
index 0000000..c9849b2
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.cpp
@@ -0,0 +1,261 @@
+//===-- ARMConstantPoolValue.cpp - ARM constantpool value -----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the ARM specific constantpool value class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARMConstantPoolValue.h"
+#include "llvm/ADT/FoldingSet.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/IR/Constant.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cstdlib>
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// ARMConstantPoolValue
+//===----------------------------------------------------------------------===//
+
+ARMConstantPoolValue::ARMConstantPoolValue(Type *Ty, unsigned id,
+ ARMCP::ARMCPKind kind,
+ unsigned char PCAdj,
+ ARMCP::ARMCPModifier modifier,
+ bool addCurrentAddress)
+ : MachineConstantPoolValue(Ty), LabelId(id), Kind(kind),
+ PCAdjust(PCAdj), Modifier(modifier),
+ AddCurrentAddress(addCurrentAddress) {}
+
+ARMConstantPoolValue::ARMConstantPoolValue(LLVMContext &C, unsigned id,
+ ARMCP::ARMCPKind kind,
+ unsigned char PCAdj,
+ ARMCP::ARMCPModifier modifier,
+ bool addCurrentAddress)
+ : MachineConstantPoolValue((Type*)Type::getInt32Ty(C)),
+ LabelId(id), Kind(kind), PCAdjust(PCAdj), Modifier(modifier),
+ AddCurrentAddress(addCurrentAddress) {}
+
+ARMConstantPoolValue::~ARMConstantPoolValue() {}
+
+const char *ARMConstantPoolValue::getModifierText() const {
+ switch (Modifier) {
+ // FIXME: Are these case sensitive? It'd be nice to lower-case all the
+ // strings if that's legal.
+ case ARMCP::no_modifier: return "none";
+ case ARMCP::TLSGD: return "tlsgd";
+ case ARMCP::GOT_PREL: return "GOT_PREL";
+ case ARMCP::GOTTPOFF: return "gottpoff";
+ case ARMCP::TPOFF: return "tpoff";
+ }
+ llvm_unreachable("Unknown modifier!");
+}
+
+int ARMConstantPoolValue::getExistingMachineCPValue(MachineConstantPool *CP,
+ unsigned Alignment) {
+ llvm_unreachable("Shouldn't be calling this directly!");
+}
+
+void
+ARMConstantPoolValue::addSelectionDAGCSEId(FoldingSetNodeID &ID) {
+ ID.AddInteger(LabelId);
+ ID.AddInteger(PCAdjust);
+}
+
+bool
+ARMConstantPoolValue::hasSameValue(ARMConstantPoolValue *ACPV) {
+ if (ACPV->Kind == Kind &&
+ ACPV->PCAdjust == PCAdjust &&
+ ACPV->Modifier == Modifier) {
+ if (ACPV->LabelId == LabelId)
+ return true;
+ // Two PC relative constpool entries containing the same GV address or
+ // external symbols. FIXME: What about blockaddress?
+ if (Kind == ARMCP::CPValue || Kind == ARMCP::CPExtSymbol)
+ return true;
+ }
+ return false;
+}
+
+void ARMConstantPoolValue::dump() const {
+ errs() << " " << *this;
+}
+
+void ARMConstantPoolValue::print(raw_ostream &O) const {
+ if (Modifier) O << "(" << getModifierText() << ")";
+ if (PCAdjust != 0) {
+ O << "-(LPC" << LabelId << "+" << (unsigned)PCAdjust;
+ if (AddCurrentAddress) O << "-.";
+ O << ")";
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// ARMConstantPoolConstant
+//===----------------------------------------------------------------------===//
+
+ARMConstantPoolConstant::ARMConstantPoolConstant(Type *Ty,
+ const Constant *C,
+ unsigned ID,
+ ARMCP::ARMCPKind Kind,
+ unsigned char PCAdj,
+ ARMCP::ARMCPModifier Modifier,
+ bool AddCurrentAddress)
+ : ARMConstantPoolValue(Ty, ID, Kind, PCAdj, Modifier, AddCurrentAddress),
+ CVal(C) {}
+
+ARMConstantPoolConstant::ARMConstantPoolConstant(const Constant *C,
+ unsigned ID,
+ ARMCP::ARMCPKind Kind,
+ unsigned char PCAdj,
+ ARMCP::ARMCPModifier Modifier,
+ bool AddCurrentAddress)
+ : ARMConstantPoolValue((Type*)C->getType(), ID, Kind, PCAdj, Modifier,
+ AddCurrentAddress),
+ CVal(C) {}
+
+ARMConstantPoolConstant *
+ARMConstantPoolConstant::Create(const Constant *C, unsigned ID) {
+ return new ARMConstantPoolConstant(C, ID, ARMCP::CPValue, 0,
+ ARMCP::no_modifier, false);
+}
+
+ARMConstantPoolConstant *
+ARMConstantPoolConstant::Create(const GlobalValue *GV,
+ ARMCP::ARMCPModifier Modifier) {
+ return new ARMConstantPoolConstant((Type*)Type::getInt32Ty(GV->getContext()),
+ GV, 0, ARMCP::CPValue, 0,
+ Modifier, false);
+}
+
+ARMConstantPoolConstant *
+ARMConstantPoolConstant::Create(const Constant *C, unsigned ID,
+ ARMCP::ARMCPKind Kind, unsigned char PCAdj) {
+ return new ARMConstantPoolConstant(C, ID, Kind, PCAdj,
+ ARMCP::no_modifier, false);
+}
+
+ARMConstantPoolConstant *
+ARMConstantPoolConstant::Create(const Constant *C, unsigned ID,
+ ARMCP::ARMCPKind Kind, unsigned char PCAdj,
+ ARMCP::ARMCPModifier Modifier,
+ bool AddCurrentAddress) {
+ return new ARMConstantPoolConstant(C, ID, Kind, PCAdj, Modifier,
+ AddCurrentAddress);
+}
+
+const GlobalValue *ARMConstantPoolConstant::getGV() const {
+ return dyn_cast_or_null<GlobalValue>(CVal);
+}
+
+const BlockAddress *ARMConstantPoolConstant::getBlockAddress() const {
+ return dyn_cast_or_null<BlockAddress>(CVal);
+}
+
+int ARMConstantPoolConstant::getExistingMachineCPValue(MachineConstantPool *CP,
+ unsigned Alignment) {
+ return getExistingMachineCPValueImpl<ARMConstantPoolConstant>(CP, Alignment);
+}
+
+bool ARMConstantPoolConstant::hasSameValue(ARMConstantPoolValue *ACPV) {
+ const ARMConstantPoolConstant *ACPC = dyn_cast<ARMConstantPoolConstant>(ACPV);
+ return ACPC && ACPC->CVal == CVal && ARMConstantPoolValue::hasSameValue(ACPV);
+}
+
+void ARMConstantPoolConstant::addSelectionDAGCSEId(FoldingSetNodeID &ID) {
+ ID.AddPointer(CVal);
+ ARMConstantPoolValue::addSelectionDAGCSEId(ID);
+}
+
+void ARMConstantPoolConstant::print(raw_ostream &O) const {
+ O << CVal->getName();
+ ARMConstantPoolValue::print(O);
+}
+
+//===----------------------------------------------------------------------===//
+// ARMConstantPoolSymbol
+//===----------------------------------------------------------------------===//
+
+ARMConstantPoolSymbol::ARMConstantPoolSymbol(LLVMContext &C, const char *s,
+ unsigned id,
+ unsigned char PCAdj,
+ ARMCP::ARMCPModifier Modifier,
+ bool AddCurrentAddress)
+ : ARMConstantPoolValue(C, id, ARMCP::CPExtSymbol, PCAdj, Modifier,
+ AddCurrentAddress),
+ S(s) {}
+
+ARMConstantPoolSymbol *
+ARMConstantPoolSymbol::Create(LLVMContext &C, const char *s,
+ unsigned ID, unsigned char PCAdj) {
+ return new ARMConstantPoolSymbol(C, s, ID, PCAdj, ARMCP::no_modifier, false);
+}
+
+int ARMConstantPoolSymbol::getExistingMachineCPValue(MachineConstantPool *CP,
+ unsigned Alignment) {
+ return getExistingMachineCPValueImpl<ARMConstantPoolSymbol>(CP, Alignment);
+}
+
+bool ARMConstantPoolSymbol::hasSameValue(ARMConstantPoolValue *ACPV) {
+ const ARMConstantPoolSymbol *ACPS = dyn_cast<ARMConstantPoolSymbol>(ACPV);
+ return ACPS && ACPS->S == S && ARMConstantPoolValue::hasSameValue(ACPV);
+}
+
+void ARMConstantPoolSymbol::addSelectionDAGCSEId(FoldingSetNodeID &ID) {
+ ID.AddString(S);
+ ARMConstantPoolValue::addSelectionDAGCSEId(ID);
+}
+
+void ARMConstantPoolSymbol::print(raw_ostream &O) const {
+ O << S;
+ ARMConstantPoolValue::print(O);
+}
+
+//===----------------------------------------------------------------------===//
+// ARMConstantPoolMBB
+//===----------------------------------------------------------------------===//
+
+ARMConstantPoolMBB::ARMConstantPoolMBB(LLVMContext &C,
+ const MachineBasicBlock *mbb,
+ unsigned id, unsigned char PCAdj,
+ ARMCP::ARMCPModifier Modifier,
+ bool AddCurrentAddress)
+ : ARMConstantPoolValue(C, id, ARMCP::CPMachineBasicBlock, PCAdj,
+ Modifier, AddCurrentAddress),
+ MBB(mbb) {}
+
+ARMConstantPoolMBB *ARMConstantPoolMBB::Create(LLVMContext &C,
+ const MachineBasicBlock *mbb,
+ unsigned ID,
+ unsigned char PCAdj) {
+ return new ARMConstantPoolMBB(C, mbb, ID, PCAdj, ARMCP::no_modifier, false);
+}
+
+int ARMConstantPoolMBB::getExistingMachineCPValue(MachineConstantPool *CP,
+ unsigned Alignment) {
+ return getExistingMachineCPValueImpl<ARMConstantPoolMBB>(CP, Alignment);
+}
+
+bool ARMConstantPoolMBB::hasSameValue(ARMConstantPoolValue *ACPV) {
+ const ARMConstantPoolMBB *ACPMBB = dyn_cast<ARMConstantPoolMBB>(ACPV);
+ return ACPMBB && ACPMBB->MBB == MBB &&
+ ARMConstantPoolValue::hasSameValue(ACPV);
+}
+
+void ARMConstantPoolMBB::addSelectionDAGCSEId(FoldingSetNodeID &ID) {
+ ID.AddPointer(MBB);
+ ARMConstantPoolValue::addSelectionDAGCSEId(ID);
+}
+
+void ARMConstantPoolMBB::print(raw_ostream &O) const {
+ O << "BB#" << MBB->getNumber();
+ ARMConstantPoolValue::print(O);
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.h b/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.h
new file mode 100644
index 0000000..6b18a4e
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.h
@@ -0,0 +1,256 @@
+//===-- ARMConstantPoolValue.h - ARM constantpool value ---------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the ARM specific constantpool value class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_ARM_ARMCONSTANTPOOLVALUE_H
+#define LLVM_LIB_TARGET_ARM_ARMCONSTANTPOOLVALUE_H
+
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/ErrorHandling.h"
+#include <cstddef>
+
+namespace llvm {
+
+class BlockAddress;
+class Constant;
+class GlobalValue;
+class LLVMContext;
+class MachineBasicBlock;
+
+namespace ARMCP {
+ enum ARMCPKind {
+ CPValue,
+ CPExtSymbol,
+ CPBlockAddress,
+ CPLSDA,
+ CPMachineBasicBlock
+ };
+
+ enum ARMCPModifier {
+ no_modifier,
+ TLSGD,
+ GOT_PREL,
+ GOTTPOFF,
+ TPOFF
+ };
+}
+
+/// ARMConstantPoolValue - ARM specific constantpool value. This is used to
+/// represent PC-relative displacement between the address of the load
+/// instruction and the constant being loaded, i.e. (&GV-(LPIC+8)).
+class ARMConstantPoolValue : public MachineConstantPoolValue {
+ unsigned LabelId; // Label id of the load.
+ ARMCP::ARMCPKind Kind; // Kind of constant.
+ unsigned char PCAdjust; // Extra adjustment if constantpool is pc-relative.
+ // 8 for ARM, 4 for Thumb.
+ ARMCP::ARMCPModifier Modifier; // GV modifier i.e. (&GV(modifier)-(LPIC+8))
+ bool AddCurrentAddress;
+
+protected:
+ ARMConstantPoolValue(Type *Ty, unsigned id, ARMCP::ARMCPKind Kind,
+ unsigned char PCAdj, ARMCP::ARMCPModifier Modifier,
+ bool AddCurrentAddress);
+
+ ARMConstantPoolValue(LLVMContext &C, unsigned id, ARMCP::ARMCPKind Kind,
+ unsigned char PCAdj, ARMCP::ARMCPModifier Modifier,
+ bool AddCurrentAddress);
+
+ template <typename Derived>
+ int getExistingMachineCPValueImpl(MachineConstantPool *CP,
+ unsigned Alignment) {
+ unsigned AlignMask = Alignment - 1;
+ const std::vector<MachineConstantPoolEntry> &Constants = CP->getConstants();
+ for (unsigned i = 0, e = Constants.size(); i != e; ++i) {
+ if (Constants[i].isMachineConstantPoolEntry() &&
+ (Constants[i].getAlignment() & AlignMask) == 0) {
+ ARMConstantPoolValue *CPV =
+ (ARMConstantPoolValue *)Constants[i].Val.MachineCPVal;
+ if (Derived *APC = dyn_cast<Derived>(CPV))
+ if (cast<Derived>(this)->equals(APC))
+ return i;
+ }
+ }
+
+ return -1;
+ }
+
+public:
+ ~ARMConstantPoolValue() override;
+
+ ARMCP::ARMCPModifier getModifier() const { return Modifier; }
+ const char *getModifierText() const;
+ bool hasModifier() const { return Modifier != ARMCP::no_modifier; }
+
+ bool mustAddCurrentAddress() const { return AddCurrentAddress; }
+
+ unsigned getLabelId() const { return LabelId; }
+ unsigned char getPCAdjustment() const { return PCAdjust; }
+
+ bool isGlobalValue() const { return Kind == ARMCP::CPValue; }
+ bool isExtSymbol() const { return Kind == ARMCP::CPExtSymbol; }
+ bool isBlockAddress() const { return Kind == ARMCP::CPBlockAddress; }
+ bool isLSDA() const { return Kind == ARMCP::CPLSDA; }
+ bool isMachineBasicBlock() const{ return Kind == ARMCP::CPMachineBasicBlock; }
+
+ int getExistingMachineCPValue(MachineConstantPool *CP,
+ unsigned Alignment) override;
+
+ void addSelectionDAGCSEId(FoldingSetNodeID &ID) override;
+
+ /// hasSameValue - Return true if this ARM constpool value can share the same
+ /// constantpool entry as another ARM constpool value.
+ virtual bool hasSameValue(ARMConstantPoolValue *ACPV);
+
+ bool equals(const ARMConstantPoolValue *A) const {
+ return this->LabelId == A->LabelId &&
+ this->PCAdjust == A->PCAdjust &&
+ this->Modifier == A->Modifier;
+ }
+
+ void print(raw_ostream &O) const override;
+ void print(raw_ostream *O) const { if (O) print(*O); }
+ void dump() const;
+};
+
+inline raw_ostream &operator<<(raw_ostream &O, const ARMConstantPoolValue &V) {
+ V.print(O);
+ return O;
+}
+
+/// ARMConstantPoolConstant - ARM-specific constant pool values for Constants,
+/// Functions, and BlockAddresses.
+class ARMConstantPoolConstant : public ARMConstantPoolValue {
+ const Constant *CVal; // Constant being loaded.
+
+ ARMConstantPoolConstant(const Constant *C,
+ unsigned ID,
+ ARMCP::ARMCPKind Kind,
+ unsigned char PCAdj,
+ ARMCP::ARMCPModifier Modifier,
+ bool AddCurrentAddress);
+ ARMConstantPoolConstant(Type *Ty, const Constant *C,
+ unsigned ID,
+ ARMCP::ARMCPKind Kind,
+ unsigned char PCAdj,
+ ARMCP::ARMCPModifier Modifier,
+ bool AddCurrentAddress);
+
+public:
+ static ARMConstantPoolConstant *Create(const Constant *C, unsigned ID);
+ static ARMConstantPoolConstant *Create(const GlobalValue *GV,
+ ARMCP::ARMCPModifier Modifier);
+ static ARMConstantPoolConstant *Create(const Constant *C, unsigned ID,
+ ARMCP::ARMCPKind Kind,
+ unsigned char PCAdj);
+ static ARMConstantPoolConstant *Create(const Constant *C, unsigned ID,
+ ARMCP::ARMCPKind Kind,
+ unsigned char PCAdj,
+ ARMCP::ARMCPModifier Modifier,
+ bool AddCurrentAddress);
+
+ const GlobalValue *getGV() const;
+ const BlockAddress *getBlockAddress() const;
+
+ int getExistingMachineCPValue(MachineConstantPool *CP,
+ unsigned Alignment) override;
+
+ /// hasSameValue - Return true if this ARM constpool value can share the same
+ /// constantpool entry as another ARM constpool value.
+ bool hasSameValue(ARMConstantPoolValue *ACPV) override;
+
+ void addSelectionDAGCSEId(FoldingSetNodeID &ID) override;
+
+ void print(raw_ostream &O) const override;
+ static bool classof(const ARMConstantPoolValue *APV) {
+ return APV->isGlobalValue() || APV->isBlockAddress() || APV->isLSDA();
+ }
+
+ bool equals(const ARMConstantPoolConstant *A) const {
+ return CVal == A->CVal && ARMConstantPoolValue::equals(A);
+ }
+};
+
+/// ARMConstantPoolSymbol - ARM-specific constantpool values for external
+/// symbols.
+class ARMConstantPoolSymbol : public ARMConstantPoolValue {
+ const std::string S; // ExtSymbol being loaded.
+
+ ARMConstantPoolSymbol(LLVMContext &C, const char *s, unsigned id,
+ unsigned char PCAdj, ARMCP::ARMCPModifier Modifier,
+ bool AddCurrentAddress);
+
+public:
+ static ARMConstantPoolSymbol *Create(LLVMContext &C, const char *s,
+ unsigned ID, unsigned char PCAdj);
+
+ const char *getSymbol() const { return S.c_str(); }
+
+ int getExistingMachineCPValue(MachineConstantPool *CP,
+ unsigned Alignment) override;
+
+ void addSelectionDAGCSEId(FoldingSetNodeID &ID) override;
+
+ /// hasSameValue - Return true if this ARM constpool value can share the same
+ /// constantpool entry as another ARM constpool value.
+ bool hasSameValue(ARMConstantPoolValue *ACPV) override;
+
+ void print(raw_ostream &O) const override;
+
+ static bool classof(const ARMConstantPoolValue *ACPV) {
+ return ACPV->isExtSymbol();
+ }
+
+ bool equals(const ARMConstantPoolSymbol *A) const {
+ return S == A->S && ARMConstantPoolValue::equals(A);
+ }
+};
+
+/// ARMConstantPoolMBB - ARM-specific constantpool value of a machine basic
+/// block.
+class ARMConstantPoolMBB : public ARMConstantPoolValue {
+ const MachineBasicBlock *MBB; // Machine basic block.
+
+ ARMConstantPoolMBB(LLVMContext &C, const MachineBasicBlock *mbb, unsigned id,
+ unsigned char PCAdj, ARMCP::ARMCPModifier Modifier,
+ bool AddCurrentAddress);
+
+public:
+ static ARMConstantPoolMBB *Create(LLVMContext &C,
+ const MachineBasicBlock *mbb,
+ unsigned ID, unsigned char PCAdj);
+
+ const MachineBasicBlock *getMBB() const { return MBB; }
+
+ int getExistingMachineCPValue(MachineConstantPool *CP,
+ unsigned Alignment) override;
+
+ void addSelectionDAGCSEId(FoldingSetNodeID &ID) override;
+
+ /// hasSameValue - Return true if this ARM constpool value can share the same
+ /// constantpool entry as another ARM constpool value.
+ bool hasSameValue(ARMConstantPoolValue *ACPV) override;
+
+ void print(raw_ostream &O) const override;
+
+ static bool classof(const ARMConstantPoolValue *ACPV) {
+ return ACPV->isMachineBasicBlock();
+ }
+
+ bool equals(const ARMConstantPoolMBB *A) const {
+ return MBB == A->MBB && ARMConstantPoolValue::equals(A);
+ }
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/contrib/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
new file mode 100644
index 0000000..56f3498
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
@@ -0,0 +1,1413 @@
+//===-- ARMExpandPseudoInsts.cpp - Expand pseudo instructions -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a pass that expands pseudo instructions into target
+// instructions to allow proper scheduling, if-conversion, and other late
+// optimizations. This pass should be run after register allocation but before
+// the post-regalloc scheduling pass.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARM.h"
+#include "ARMBaseInstrInfo.h"
+#include "ARMBaseRegisterInfo.h"
+#include "ARMConstantPoolValue.h"
+#include "ARMMachineFunctionInfo.h"
+#include "MCTargetDesc/ARMAddressingModes.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineInstrBundle.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/raw_ostream.h" // FIXME: for debug only. remove!
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "arm-pseudo"
+
+static cl::opt<bool>
+VerifyARMPseudo("verify-arm-pseudo-expand", cl::Hidden,
+ cl::desc("Verify machine code after expanding ARM pseudos"));
+
+namespace {
+ class ARMExpandPseudo : public MachineFunctionPass {
+ public:
+ static char ID;
+ ARMExpandPseudo() : MachineFunctionPass(ID) {}
+
+ const ARMBaseInstrInfo *TII;
+ const TargetRegisterInfo *TRI;
+ const ARMSubtarget *STI;
+ ARMFunctionInfo *AFI;
+
+ bool runOnMachineFunction(MachineFunction &Fn) override;
+
+ const char *getPassName() const override {
+ return "ARM pseudo instruction expansion pass";
+ }
+
+ private:
+ void TransferImpOps(MachineInstr &OldMI,
+ MachineInstrBuilder &UseMI, MachineInstrBuilder &DefMI);
+ bool ExpandMI(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI);
+ bool ExpandMBB(MachineBasicBlock &MBB);
+ void ExpandVLD(MachineBasicBlock::iterator &MBBI);
+ void ExpandVST(MachineBasicBlock::iterator &MBBI);
+ void ExpandLaneOp(MachineBasicBlock::iterator &MBBI);
+ void ExpandVTBL(MachineBasicBlock::iterator &MBBI,
+ unsigned Opc, bool IsExt);
+ void ExpandMOV32BitImm(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI);
+ };
+ char ARMExpandPseudo::ID = 0;
+}
+
+/// TransferImpOps - Transfer implicit operands on the pseudo instruction to
+/// the instructions created from the expansion.
+void ARMExpandPseudo::TransferImpOps(MachineInstr &OldMI,
+ MachineInstrBuilder &UseMI,
+ MachineInstrBuilder &DefMI) {
+ const MCInstrDesc &Desc = OldMI.getDesc();
+ for (unsigned i = Desc.getNumOperands(), e = OldMI.getNumOperands();
+ i != e; ++i) {
+ const MachineOperand &MO = OldMI.getOperand(i);
+ assert(MO.isReg() && MO.getReg());
+ if (MO.isUse())
+ UseMI.addOperand(MO);
+ else
+ DefMI.addOperand(MO);
+ }
+}
+
+namespace {
+ // Constants for register spacing in NEON load/store instructions.
+ // For quad-register load-lane and store-lane pseudo instructors, the
+ // spacing is initially assumed to be EvenDblSpc, and that is changed to
+ // OddDblSpc depending on the lane number operand.
+ enum NEONRegSpacing {
+ SingleSpc,
+ EvenDblSpc,
+ OddDblSpc
+ };
+
+ // Entries for NEON load/store information table. The table is sorted by
+ // PseudoOpc for fast binary-search lookups.
+ struct NEONLdStTableEntry {
+ uint16_t PseudoOpc;
+ uint16_t RealOpc;
+ bool IsLoad;
+ bool isUpdating;
+ bool hasWritebackOperand;
+ uint8_t RegSpacing; // One of type NEONRegSpacing
+ uint8_t NumRegs; // D registers loaded or stored
+ uint8_t RegElts; // elements per D register; used for lane ops
+ // FIXME: Temporary flag to denote whether the real instruction takes
+ // a single register (like the encoding) or all of the registers in
+ // the list (like the asm syntax and the isel DAG). When all definitions
+ // are converted to take only the single encoded register, this will
+ // go away.
+ bool copyAllListRegs;
+
+ // Comparison methods for binary search of the table.
+ bool operator<(const NEONLdStTableEntry &TE) const {
+ return PseudoOpc < TE.PseudoOpc;
+ }
+ friend bool operator<(const NEONLdStTableEntry &TE, unsigned PseudoOpc) {
+ return TE.PseudoOpc < PseudoOpc;
+ }
+ friend bool LLVM_ATTRIBUTE_UNUSED operator<(unsigned PseudoOpc,
+ const NEONLdStTableEntry &TE) {
+ return PseudoOpc < TE.PseudoOpc;
+ }
+ };
+}
+
+static const NEONLdStTableEntry NEONLdStTable[] = {
+{ ARM::VLD1LNq16Pseudo, ARM::VLD1LNd16, true, false, false, EvenDblSpc, 1, 4 ,true},
+{ ARM::VLD1LNq16Pseudo_UPD, ARM::VLD1LNd16_UPD, true, true, true, EvenDblSpc, 1, 4 ,true},
+{ ARM::VLD1LNq32Pseudo, ARM::VLD1LNd32, true, false, false, EvenDblSpc, 1, 2 ,true},
+{ ARM::VLD1LNq32Pseudo_UPD, ARM::VLD1LNd32_UPD, true, true, true, EvenDblSpc, 1, 2 ,true},
+{ ARM::VLD1LNq8Pseudo, ARM::VLD1LNd8, true, false, false, EvenDblSpc, 1, 8 ,true},
+{ ARM::VLD1LNq8Pseudo_UPD, ARM::VLD1LNd8_UPD, true, true, true, EvenDblSpc, 1, 8 ,true},
+
+{ ARM::VLD1d64QPseudo, ARM::VLD1d64Q, true, false, false, SingleSpc, 4, 1 ,false},
+{ ARM::VLD1d64QPseudoWB_fixed, ARM::VLD1d64Qwb_fixed, true, true, false, SingleSpc, 4, 1 ,false},
+{ ARM::VLD1d64TPseudo, ARM::VLD1d64T, true, false, false, SingleSpc, 3, 1 ,false},
+{ ARM::VLD1d64TPseudoWB_fixed, ARM::VLD1d64Twb_fixed, true, true, false, SingleSpc, 3, 1 ,false},
+
+{ ARM::VLD2LNd16Pseudo, ARM::VLD2LNd16, true, false, false, SingleSpc, 2, 4 ,true},
+{ ARM::VLD2LNd16Pseudo_UPD, ARM::VLD2LNd16_UPD, true, true, true, SingleSpc, 2, 4 ,true},
+{ ARM::VLD2LNd32Pseudo, ARM::VLD2LNd32, true, false, false, SingleSpc, 2, 2 ,true},
+{ ARM::VLD2LNd32Pseudo_UPD, ARM::VLD2LNd32_UPD, true, true, true, SingleSpc, 2, 2 ,true},
+{ ARM::VLD2LNd8Pseudo, ARM::VLD2LNd8, true, false, false, SingleSpc, 2, 8 ,true},
+{ ARM::VLD2LNd8Pseudo_UPD, ARM::VLD2LNd8_UPD, true, true, true, SingleSpc, 2, 8 ,true},
+{ ARM::VLD2LNq16Pseudo, ARM::VLD2LNq16, true, false, false, EvenDblSpc, 2, 4 ,true},
+{ ARM::VLD2LNq16Pseudo_UPD, ARM::VLD2LNq16_UPD, true, true, true, EvenDblSpc, 2, 4 ,true},
+{ ARM::VLD2LNq32Pseudo, ARM::VLD2LNq32, true, false, false, EvenDblSpc, 2, 2 ,true},
+{ ARM::VLD2LNq32Pseudo_UPD, ARM::VLD2LNq32_UPD, true, true, true, EvenDblSpc, 2, 2 ,true},
+
+{ ARM::VLD2q16Pseudo, ARM::VLD2q16, true, false, false, SingleSpc, 4, 4 ,false},
+{ ARM::VLD2q16PseudoWB_fixed, ARM::VLD2q16wb_fixed, true, true, false, SingleSpc, 4, 4 ,false},
+{ ARM::VLD2q16PseudoWB_register, ARM::VLD2q16wb_register, true, true, true, SingleSpc, 4, 4 ,false},
+{ ARM::VLD2q32Pseudo, ARM::VLD2q32, true, false, false, SingleSpc, 4, 2 ,false},
+{ ARM::VLD2q32PseudoWB_fixed, ARM::VLD2q32wb_fixed, true, true, false, SingleSpc, 4, 2 ,false},
+{ ARM::VLD2q32PseudoWB_register, ARM::VLD2q32wb_register, true, true, true, SingleSpc, 4, 2 ,false},
+{ ARM::VLD2q8Pseudo, ARM::VLD2q8, true, false, false, SingleSpc, 4, 8 ,false},
+{ ARM::VLD2q8PseudoWB_fixed, ARM::VLD2q8wb_fixed, true, true, false, SingleSpc, 4, 8 ,false},
+{ ARM::VLD2q8PseudoWB_register, ARM::VLD2q8wb_register, true, true, true, SingleSpc, 4, 8 ,false},
+
+{ ARM::VLD3DUPd16Pseudo, ARM::VLD3DUPd16, true, false, false, SingleSpc, 3, 4,true},
+{ ARM::VLD3DUPd16Pseudo_UPD, ARM::VLD3DUPd16_UPD, true, true, true, SingleSpc, 3, 4,true},
+{ ARM::VLD3DUPd32Pseudo, ARM::VLD3DUPd32, true, false, false, SingleSpc, 3, 2,true},
+{ ARM::VLD3DUPd32Pseudo_UPD, ARM::VLD3DUPd32_UPD, true, true, true, SingleSpc, 3, 2,true},
+{ ARM::VLD3DUPd8Pseudo, ARM::VLD3DUPd8, true, false, false, SingleSpc, 3, 8,true},
+{ ARM::VLD3DUPd8Pseudo_UPD, ARM::VLD3DUPd8_UPD, true, true, true, SingleSpc, 3, 8,true},
+
+{ ARM::VLD3LNd16Pseudo, ARM::VLD3LNd16, true, false, false, SingleSpc, 3, 4 ,true},
+{ ARM::VLD3LNd16Pseudo_UPD, ARM::VLD3LNd16_UPD, true, true, true, SingleSpc, 3, 4 ,true},
+{ ARM::VLD3LNd32Pseudo, ARM::VLD3LNd32, true, false, false, SingleSpc, 3, 2 ,true},
+{ ARM::VLD3LNd32Pseudo_UPD, ARM::VLD3LNd32_UPD, true, true, true, SingleSpc, 3, 2 ,true},
+{ ARM::VLD3LNd8Pseudo, ARM::VLD3LNd8, true, false, false, SingleSpc, 3, 8 ,true},
+{ ARM::VLD3LNd8Pseudo_UPD, ARM::VLD3LNd8_UPD, true, true, true, SingleSpc, 3, 8 ,true},
+{ ARM::VLD3LNq16Pseudo, ARM::VLD3LNq16, true, false, false, EvenDblSpc, 3, 4 ,true},
+{ ARM::VLD3LNq16Pseudo_UPD, ARM::VLD3LNq16_UPD, true, true, true, EvenDblSpc, 3, 4 ,true},
+{ ARM::VLD3LNq32Pseudo, ARM::VLD3LNq32, true, false, false, EvenDblSpc, 3, 2 ,true},
+{ ARM::VLD3LNq32Pseudo_UPD, ARM::VLD3LNq32_UPD, true, true, true, EvenDblSpc, 3, 2 ,true},
+
+{ ARM::VLD3d16Pseudo, ARM::VLD3d16, true, false, false, SingleSpc, 3, 4 ,true},
+{ ARM::VLD3d16Pseudo_UPD, ARM::VLD3d16_UPD, true, true, true, SingleSpc, 3, 4 ,true},
+{ ARM::VLD3d32Pseudo, ARM::VLD3d32, true, false, false, SingleSpc, 3, 2 ,true},
+{ ARM::VLD3d32Pseudo_UPD, ARM::VLD3d32_UPD, true, true, true, SingleSpc, 3, 2 ,true},
+{ ARM::VLD3d8Pseudo, ARM::VLD3d8, true, false, false, SingleSpc, 3, 8 ,true},
+{ ARM::VLD3d8Pseudo_UPD, ARM::VLD3d8_UPD, true, true, true, SingleSpc, 3, 8 ,true},
+
+{ ARM::VLD3q16Pseudo_UPD, ARM::VLD3q16_UPD, true, true, true, EvenDblSpc, 3, 4 ,true},
+{ ARM::VLD3q16oddPseudo, ARM::VLD3q16, true, false, false, OddDblSpc, 3, 4 ,true},
+{ ARM::VLD3q16oddPseudo_UPD, ARM::VLD3q16_UPD, true, true, true, OddDblSpc, 3, 4 ,true},
+{ ARM::VLD3q32Pseudo_UPD, ARM::VLD3q32_UPD, true, true, true, EvenDblSpc, 3, 2 ,true},
+{ ARM::VLD3q32oddPseudo, ARM::VLD3q32, true, false, false, OddDblSpc, 3, 2 ,true},
+{ ARM::VLD3q32oddPseudo_UPD, ARM::VLD3q32_UPD, true, true, true, OddDblSpc, 3, 2 ,true},
+{ ARM::VLD3q8Pseudo_UPD, ARM::VLD3q8_UPD, true, true, true, EvenDblSpc, 3, 8 ,true},
+{ ARM::VLD3q8oddPseudo, ARM::VLD3q8, true, false, false, OddDblSpc, 3, 8 ,true},
+{ ARM::VLD3q8oddPseudo_UPD, ARM::VLD3q8_UPD, true, true, true, OddDblSpc, 3, 8 ,true},
+
+{ ARM::VLD4DUPd16Pseudo, ARM::VLD4DUPd16, true, false, false, SingleSpc, 4, 4,true},
+{ ARM::VLD4DUPd16Pseudo_UPD, ARM::VLD4DUPd16_UPD, true, true, true, SingleSpc, 4, 4,true},
+{ ARM::VLD4DUPd32Pseudo, ARM::VLD4DUPd32, true, false, false, SingleSpc, 4, 2,true},
+{ ARM::VLD4DUPd32Pseudo_UPD, ARM::VLD4DUPd32_UPD, true, true, true, SingleSpc, 4, 2,true},
+{ ARM::VLD4DUPd8Pseudo, ARM::VLD4DUPd8, true, false, false, SingleSpc, 4, 8,true},
+{ ARM::VLD4DUPd8Pseudo_UPD, ARM::VLD4DUPd8_UPD, true, true, true, SingleSpc, 4, 8,true},
+
+{ ARM::VLD4LNd16Pseudo, ARM::VLD4LNd16, true, false, false, SingleSpc, 4, 4 ,true},
+{ ARM::VLD4LNd16Pseudo_UPD, ARM::VLD4LNd16_UPD, true, true, true, SingleSpc, 4, 4 ,true},
+{ ARM::VLD4LNd32Pseudo, ARM::VLD4LNd32, true, false, false, SingleSpc, 4, 2 ,true},
+{ ARM::VLD4LNd32Pseudo_UPD, ARM::VLD4LNd32_UPD, true, true, true, SingleSpc, 4, 2 ,true},
+{ ARM::VLD4LNd8Pseudo, ARM::VLD4LNd8, true, false, false, SingleSpc, 4, 8 ,true},
+{ ARM::VLD4LNd8Pseudo_UPD, ARM::VLD4LNd8_UPD, true, true, true, SingleSpc, 4, 8 ,true},
+{ ARM::VLD4LNq16Pseudo, ARM::VLD4LNq16, true, false, false, EvenDblSpc, 4, 4 ,true},
+{ ARM::VLD4LNq16Pseudo_UPD, ARM::VLD4LNq16_UPD, true, true, true, EvenDblSpc, 4, 4 ,true},
+{ ARM::VLD4LNq32Pseudo, ARM::VLD4LNq32, true, false, false, EvenDblSpc, 4, 2 ,true},
+{ ARM::VLD4LNq32Pseudo_UPD, ARM::VLD4LNq32_UPD, true, true, true, EvenDblSpc, 4, 2 ,true},
+
+{ ARM::VLD4d16Pseudo, ARM::VLD4d16, true, false, false, SingleSpc, 4, 4 ,true},
+{ ARM::VLD4d16Pseudo_UPD, ARM::VLD4d16_UPD, true, true, true, SingleSpc, 4, 4 ,true},
+{ ARM::VLD4d32Pseudo, ARM::VLD4d32, true, false, false, SingleSpc, 4, 2 ,true},
+{ ARM::VLD4d32Pseudo_UPD, ARM::VLD4d32_UPD, true, true, true, SingleSpc, 4, 2 ,true},
+{ ARM::VLD4d8Pseudo, ARM::VLD4d8, true, false, false, SingleSpc, 4, 8 ,true},
+{ ARM::VLD4d8Pseudo_UPD, ARM::VLD4d8_UPD, true, true, true, SingleSpc, 4, 8 ,true},
+
+{ ARM::VLD4q16Pseudo_UPD, ARM::VLD4q16_UPD, true, true, true, EvenDblSpc, 4, 4 ,true},
+{ ARM::VLD4q16oddPseudo, ARM::VLD4q16, true, false, false, OddDblSpc, 4, 4 ,true},
+{ ARM::VLD4q16oddPseudo_UPD, ARM::VLD4q16_UPD, true, true, true, OddDblSpc, 4, 4 ,true},
+{ ARM::VLD4q32Pseudo_UPD, ARM::VLD4q32_UPD, true, true, true, EvenDblSpc, 4, 2 ,true},
+{ ARM::VLD4q32oddPseudo, ARM::VLD4q32, true, false, false, OddDblSpc, 4, 2 ,true},
+{ ARM::VLD4q32oddPseudo_UPD, ARM::VLD4q32_UPD, true, true, true, OddDblSpc, 4, 2 ,true},
+{ ARM::VLD4q8Pseudo_UPD, ARM::VLD4q8_UPD, true, true, true, EvenDblSpc, 4, 8 ,true},
+{ ARM::VLD4q8oddPseudo, ARM::VLD4q8, true, false, false, OddDblSpc, 4, 8 ,true},
+{ ARM::VLD4q8oddPseudo_UPD, ARM::VLD4q8_UPD, true, true, true, OddDblSpc, 4, 8 ,true},
+
+{ ARM::VST1LNq16Pseudo, ARM::VST1LNd16, false, false, false, EvenDblSpc, 1, 4 ,true},
+{ ARM::VST1LNq16Pseudo_UPD, ARM::VST1LNd16_UPD, false, true, true, EvenDblSpc, 1, 4 ,true},
+{ ARM::VST1LNq32Pseudo, ARM::VST1LNd32, false, false, false, EvenDblSpc, 1, 2 ,true},
+{ ARM::VST1LNq32Pseudo_UPD, ARM::VST1LNd32_UPD, false, true, true, EvenDblSpc, 1, 2 ,true},
+{ ARM::VST1LNq8Pseudo, ARM::VST1LNd8, false, false, false, EvenDblSpc, 1, 8 ,true},
+{ ARM::VST1LNq8Pseudo_UPD, ARM::VST1LNd8_UPD, false, true, true, EvenDblSpc, 1, 8 ,true},
+
+{ ARM::VST1d64QPseudo, ARM::VST1d64Q, false, false, false, SingleSpc, 4, 1 ,false},
+{ ARM::VST1d64QPseudoWB_fixed, ARM::VST1d64Qwb_fixed, false, true, false, SingleSpc, 4, 1 ,false},
+{ ARM::VST1d64QPseudoWB_register, ARM::VST1d64Qwb_register, false, true, true, SingleSpc, 4, 1 ,false},
+{ ARM::VST1d64TPseudo, ARM::VST1d64T, false, false, false, SingleSpc, 3, 1 ,false},
+{ ARM::VST1d64TPseudoWB_fixed, ARM::VST1d64Twb_fixed, false, true, false, SingleSpc, 3, 1 ,false},
+{ ARM::VST1d64TPseudoWB_register, ARM::VST1d64Twb_register, false, true, true, SingleSpc, 3, 1 ,false},
+
+{ ARM::VST2LNd16Pseudo, ARM::VST2LNd16, false, false, false, SingleSpc, 2, 4 ,true},
+{ ARM::VST2LNd16Pseudo_UPD, ARM::VST2LNd16_UPD, false, true, true, SingleSpc, 2, 4 ,true},
+{ ARM::VST2LNd32Pseudo, ARM::VST2LNd32, false, false, false, SingleSpc, 2, 2 ,true},
+{ ARM::VST2LNd32Pseudo_UPD, ARM::VST2LNd32_UPD, false, true, true, SingleSpc, 2, 2 ,true},
+{ ARM::VST2LNd8Pseudo, ARM::VST2LNd8, false, false, false, SingleSpc, 2, 8 ,true},
+{ ARM::VST2LNd8Pseudo_UPD, ARM::VST2LNd8_UPD, false, true, true, SingleSpc, 2, 8 ,true},
+{ ARM::VST2LNq16Pseudo, ARM::VST2LNq16, false, false, false, EvenDblSpc, 2, 4,true},
+{ ARM::VST2LNq16Pseudo_UPD, ARM::VST2LNq16_UPD, false, true, true, EvenDblSpc, 2, 4,true},
+{ ARM::VST2LNq32Pseudo, ARM::VST2LNq32, false, false, false, EvenDblSpc, 2, 2,true},
+{ ARM::VST2LNq32Pseudo_UPD, ARM::VST2LNq32_UPD, false, true, true, EvenDblSpc, 2, 2,true},
+
+{ ARM::VST2q16Pseudo, ARM::VST2q16, false, false, false, SingleSpc, 4, 4 ,false},
+{ ARM::VST2q16PseudoWB_fixed, ARM::VST2q16wb_fixed, false, true, false, SingleSpc, 4, 4 ,false},
+{ ARM::VST2q16PseudoWB_register, ARM::VST2q16wb_register, false, true, true, SingleSpc, 4, 4 ,false},
+{ ARM::VST2q32Pseudo, ARM::VST2q32, false, false, false, SingleSpc, 4, 2 ,false},
+{ ARM::VST2q32PseudoWB_fixed, ARM::VST2q32wb_fixed, false, true, false, SingleSpc, 4, 2 ,false},
+{ ARM::VST2q32PseudoWB_register, ARM::VST2q32wb_register, false, true, true, SingleSpc, 4, 2 ,false},
+{ ARM::VST2q8Pseudo, ARM::VST2q8, false, false, false, SingleSpc, 4, 8 ,false},
+{ ARM::VST2q8PseudoWB_fixed, ARM::VST2q8wb_fixed, false, true, false, SingleSpc, 4, 8 ,false},
+{ ARM::VST2q8PseudoWB_register, ARM::VST2q8wb_register, false, true, true, SingleSpc, 4, 8 ,false},
+
+{ ARM::VST3LNd16Pseudo, ARM::VST3LNd16, false, false, false, SingleSpc, 3, 4 ,true},
+{ ARM::VST3LNd16Pseudo_UPD, ARM::VST3LNd16_UPD, false, true, true, SingleSpc, 3, 4 ,true},
+{ ARM::VST3LNd32Pseudo, ARM::VST3LNd32, false, false, false, SingleSpc, 3, 2 ,true},
+{ ARM::VST3LNd32Pseudo_UPD, ARM::VST3LNd32_UPD, false, true, true, SingleSpc, 3, 2 ,true},
+{ ARM::VST3LNd8Pseudo, ARM::VST3LNd8, false, false, false, SingleSpc, 3, 8 ,true},
+{ ARM::VST3LNd8Pseudo_UPD, ARM::VST3LNd8_UPD, false, true, true, SingleSpc, 3, 8 ,true},
+{ ARM::VST3LNq16Pseudo, ARM::VST3LNq16, false, false, false, EvenDblSpc, 3, 4,true},
+{ ARM::VST3LNq16Pseudo_UPD, ARM::VST3LNq16_UPD, false, true, true, EvenDblSpc, 3, 4,true},
+{ ARM::VST3LNq32Pseudo, ARM::VST3LNq32, false, false, false, EvenDblSpc, 3, 2,true},
+{ ARM::VST3LNq32Pseudo_UPD, ARM::VST3LNq32_UPD, false, true, true, EvenDblSpc, 3, 2,true},
+
+{ ARM::VST3d16Pseudo, ARM::VST3d16, false, false, false, SingleSpc, 3, 4 ,true},
+{ ARM::VST3d16Pseudo_UPD, ARM::VST3d16_UPD, false, true, true, SingleSpc, 3, 4 ,true},
+{ ARM::VST3d32Pseudo, ARM::VST3d32, false, false, false, SingleSpc, 3, 2 ,true},
+{ ARM::VST3d32Pseudo_UPD, ARM::VST3d32_UPD, false, true, true, SingleSpc, 3, 2 ,true},
+{ ARM::VST3d8Pseudo, ARM::VST3d8, false, false, false, SingleSpc, 3, 8 ,true},
+{ ARM::VST3d8Pseudo_UPD, ARM::VST3d8_UPD, false, true, true, SingleSpc, 3, 8 ,true},
+
+{ ARM::VST3q16Pseudo_UPD, ARM::VST3q16_UPD, false, true, true, EvenDblSpc, 3, 4 ,true},
+{ ARM::VST3q16oddPseudo, ARM::VST3q16, false, false, false, OddDblSpc, 3, 4 ,true},
+{ ARM::VST3q16oddPseudo_UPD, ARM::VST3q16_UPD, false, true, true, OddDblSpc, 3, 4 ,true},
+{ ARM::VST3q32Pseudo_UPD, ARM::VST3q32_UPD, false, true, true, EvenDblSpc, 3, 2 ,true},
+{ ARM::VST3q32oddPseudo, ARM::VST3q32, false, false, false, OddDblSpc, 3, 2 ,true},
+{ ARM::VST3q32oddPseudo_UPD, ARM::VST3q32_UPD, false, true, true, OddDblSpc, 3, 2 ,true},
+{ ARM::VST3q8Pseudo_UPD, ARM::VST3q8_UPD, false, true, true, EvenDblSpc, 3, 8 ,true},
+{ ARM::VST3q8oddPseudo, ARM::VST3q8, false, false, false, OddDblSpc, 3, 8 ,true},
+{ ARM::VST3q8oddPseudo_UPD, ARM::VST3q8_UPD, false, true, true, OddDblSpc, 3, 8 ,true},
+
+{ ARM::VST4LNd16Pseudo, ARM::VST4LNd16, false, false, false, SingleSpc, 4, 4 ,true},
+{ ARM::VST4LNd16Pseudo_UPD, ARM::VST4LNd16_UPD, false, true, true, SingleSpc, 4, 4 ,true},
+{ ARM::VST4LNd32Pseudo, ARM::VST4LNd32, false, false, false, SingleSpc, 4, 2 ,true},
+{ ARM::VST4LNd32Pseudo_UPD, ARM::VST4LNd32_UPD, false, true, true, SingleSpc, 4, 2 ,true},
+{ ARM::VST4LNd8Pseudo, ARM::VST4LNd8, false, false, false, SingleSpc, 4, 8 ,true},
+{ ARM::VST4LNd8Pseudo_UPD, ARM::VST4LNd8_UPD, false, true, true, SingleSpc, 4, 8 ,true},
+{ ARM::VST4LNq16Pseudo, ARM::VST4LNq16, false, false, false, EvenDblSpc, 4, 4,true},
+{ ARM::VST4LNq16Pseudo_UPD, ARM::VST4LNq16_UPD, false, true, true, EvenDblSpc, 4, 4,true},
+{ ARM::VST4LNq32Pseudo, ARM::VST4LNq32, false, false, false, EvenDblSpc, 4, 2,true},
+{ ARM::VST4LNq32Pseudo_UPD, ARM::VST4LNq32_UPD, false, true, true, EvenDblSpc, 4, 2,true},
+
+{ ARM::VST4d16Pseudo, ARM::VST4d16, false, false, false, SingleSpc, 4, 4 ,true},
+{ ARM::VST4d16Pseudo_UPD, ARM::VST4d16_UPD, false, true, true, SingleSpc, 4, 4 ,true},
+{ ARM::VST4d32Pseudo, ARM::VST4d32, false, false, false, SingleSpc, 4, 2 ,true},
+{ ARM::VST4d32Pseudo_UPD, ARM::VST4d32_UPD, false, true, true, SingleSpc, 4, 2 ,true},
+{ ARM::VST4d8Pseudo, ARM::VST4d8, false, false, false, SingleSpc, 4, 8 ,true},
+{ ARM::VST4d8Pseudo_UPD, ARM::VST4d8_UPD, false, true, true, SingleSpc, 4, 8 ,true},
+
+{ ARM::VST4q16Pseudo_UPD, ARM::VST4q16_UPD, false, true, true, EvenDblSpc, 4, 4 ,true},
+{ ARM::VST4q16oddPseudo, ARM::VST4q16, false, false, false, OddDblSpc, 4, 4 ,true},
+{ ARM::VST4q16oddPseudo_UPD, ARM::VST4q16_UPD, false, true, true, OddDblSpc, 4, 4 ,true},
+{ ARM::VST4q32Pseudo_UPD, ARM::VST4q32_UPD, false, true, true, EvenDblSpc, 4, 2 ,true},
+{ ARM::VST4q32oddPseudo, ARM::VST4q32, false, false, false, OddDblSpc, 4, 2 ,true},
+{ ARM::VST4q32oddPseudo_UPD, ARM::VST4q32_UPD, false, true, true, OddDblSpc, 4, 2 ,true},
+{ ARM::VST4q8Pseudo_UPD, ARM::VST4q8_UPD, false, true, true, EvenDblSpc, 4, 8 ,true},
+{ ARM::VST4q8oddPseudo, ARM::VST4q8, false, false, false, OddDblSpc, 4, 8 ,true},
+{ ARM::VST4q8oddPseudo_UPD, ARM::VST4q8_UPD, false, true, true, OddDblSpc, 4, 8 ,true}
+};
+
+/// LookupNEONLdSt - Search the NEONLdStTable for information about a NEON
+/// load or store pseudo instruction.
+static const NEONLdStTableEntry *LookupNEONLdSt(unsigned Opcode) {
+#ifndef NDEBUG
+ // Make sure the table is sorted.
+ static bool TableChecked = false;
+ if (!TableChecked) {
+ assert(std::is_sorted(std::begin(NEONLdStTable), std::end(NEONLdStTable)) &&
+ "NEONLdStTable is not sorted!");
+ TableChecked = true;
+ }
+#endif
+
+ auto I = std::lower_bound(std::begin(NEONLdStTable),
+ std::end(NEONLdStTable), Opcode);
+ if (I != std::end(NEONLdStTable) && I->PseudoOpc == Opcode)
+ return I;
+ return nullptr;
+}
+
+/// GetDSubRegs - Get 4 D subregisters of a Q, QQ, or QQQQ register,
+/// corresponding to the specified register spacing. Not all of the results
+/// are necessarily valid, e.g., a Q register only has 2 D subregisters.
+static void GetDSubRegs(unsigned Reg, NEONRegSpacing RegSpc,
+ const TargetRegisterInfo *TRI, unsigned &D0,
+ unsigned &D1, unsigned &D2, unsigned &D3) {
+ if (RegSpc == SingleSpc) {
+ D0 = TRI->getSubReg(Reg, ARM::dsub_0);
+ D1 = TRI->getSubReg(Reg, ARM::dsub_1);
+ D2 = TRI->getSubReg(Reg, ARM::dsub_2);
+ D3 = TRI->getSubReg(Reg, ARM::dsub_3);
+ } else if (RegSpc == EvenDblSpc) {
+ D0 = TRI->getSubReg(Reg, ARM::dsub_0);
+ D1 = TRI->getSubReg(Reg, ARM::dsub_2);
+ D2 = TRI->getSubReg(Reg, ARM::dsub_4);
+ D3 = TRI->getSubReg(Reg, ARM::dsub_6);
+ } else {
+ assert(RegSpc == OddDblSpc && "unknown register spacing");
+ D0 = TRI->getSubReg(Reg, ARM::dsub_1);
+ D1 = TRI->getSubReg(Reg, ARM::dsub_3);
+ D2 = TRI->getSubReg(Reg, ARM::dsub_5);
+ D3 = TRI->getSubReg(Reg, ARM::dsub_7);
+ }
+}
+
+/// ExpandVLD - Translate VLD pseudo instructions with Q, QQ or QQQQ register
+/// operands to real VLD instructions with D register operands.
+void ARMExpandPseudo::ExpandVLD(MachineBasicBlock::iterator &MBBI) {
+ MachineInstr &MI = *MBBI;
+ MachineBasicBlock &MBB = *MI.getParent();
+
+ const NEONLdStTableEntry *TableEntry = LookupNEONLdSt(MI.getOpcode());
+ assert(TableEntry && TableEntry->IsLoad && "NEONLdStTable lookup failed");
+ NEONRegSpacing RegSpc = (NEONRegSpacing)TableEntry->RegSpacing;
+ unsigned NumRegs = TableEntry->NumRegs;
+
+ MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(),
+ TII->get(TableEntry->RealOpc));
+ unsigned OpIdx = 0;
+
+ bool DstIsDead = MI.getOperand(OpIdx).isDead();
+ unsigned DstReg = MI.getOperand(OpIdx++).getReg();
+ unsigned D0, D1, D2, D3;
+ GetDSubRegs(DstReg, RegSpc, TRI, D0, D1, D2, D3);
+ MIB.addReg(D0, RegState::Define | getDeadRegState(DstIsDead));
+ if (NumRegs > 1 && TableEntry->copyAllListRegs)
+ MIB.addReg(D1, RegState::Define | getDeadRegState(DstIsDead));
+ if (NumRegs > 2 && TableEntry->copyAllListRegs)
+ MIB.addReg(D2, RegState::Define | getDeadRegState(DstIsDead));
+ if (NumRegs > 3 && TableEntry->copyAllListRegs)
+ MIB.addReg(D3, RegState::Define | getDeadRegState(DstIsDead));
+
+ if (TableEntry->isUpdating)
+ MIB.addOperand(MI.getOperand(OpIdx++));
+
+ // Copy the addrmode6 operands.
+ MIB.addOperand(MI.getOperand(OpIdx++));
+ MIB.addOperand(MI.getOperand(OpIdx++));
+ // Copy the am6offset operand.
+ if (TableEntry->hasWritebackOperand)
+ MIB.addOperand(MI.getOperand(OpIdx++));
+
+ // For an instruction writing double-spaced subregs, the pseudo instruction
+ // has an extra operand that is a use of the super-register. Record the
+ // operand index and skip over it.
+ unsigned SrcOpIdx = 0;
+ if (RegSpc == EvenDblSpc || RegSpc == OddDblSpc)
+ SrcOpIdx = OpIdx++;
+
+ // Copy the predicate operands.
+ MIB.addOperand(MI.getOperand(OpIdx++));
+ MIB.addOperand(MI.getOperand(OpIdx++));
+
+ // Copy the super-register source operand used for double-spaced subregs over
+ // to the new instruction as an implicit operand.
+ if (SrcOpIdx != 0) {
+ MachineOperand MO = MI.getOperand(SrcOpIdx);
+ MO.setImplicit(true);
+ MIB.addOperand(MO);
+ }
+ // Add an implicit def for the super-register.
+ MIB.addReg(DstReg, RegState::ImplicitDefine | getDeadRegState(DstIsDead));
+ TransferImpOps(MI, MIB, MIB);
+
+ // Transfer memoperands.
+ MIB->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+
+ MI.eraseFromParent();
+}
+
+/// ExpandVST - Translate VST pseudo instructions with Q, QQ or QQQQ register
+/// operands to real VST instructions with D register operands.
+void ARMExpandPseudo::ExpandVST(MachineBasicBlock::iterator &MBBI) {
+ MachineInstr &MI = *MBBI;
+ MachineBasicBlock &MBB = *MI.getParent();
+
+ const NEONLdStTableEntry *TableEntry = LookupNEONLdSt(MI.getOpcode());
+ assert(TableEntry && !TableEntry->IsLoad && "NEONLdStTable lookup failed");
+ NEONRegSpacing RegSpc = (NEONRegSpacing)TableEntry->RegSpacing;
+ unsigned NumRegs = TableEntry->NumRegs;
+
+ MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(),
+ TII->get(TableEntry->RealOpc));
+ unsigned OpIdx = 0;
+ if (TableEntry->isUpdating)
+ MIB.addOperand(MI.getOperand(OpIdx++));
+
+ // Copy the addrmode6 operands.
+ MIB.addOperand(MI.getOperand(OpIdx++));
+ MIB.addOperand(MI.getOperand(OpIdx++));
+ // Copy the am6offset operand.
+ if (TableEntry->hasWritebackOperand)
+ MIB.addOperand(MI.getOperand(OpIdx++));
+
+ bool SrcIsKill = MI.getOperand(OpIdx).isKill();
+ bool SrcIsUndef = MI.getOperand(OpIdx).isUndef();
+ unsigned SrcReg = MI.getOperand(OpIdx++).getReg();
+ unsigned D0, D1, D2, D3;
+ GetDSubRegs(SrcReg, RegSpc, TRI, D0, D1, D2, D3);
+ MIB.addReg(D0, getUndefRegState(SrcIsUndef));
+ if (NumRegs > 1 && TableEntry->copyAllListRegs)
+ MIB.addReg(D1, getUndefRegState(SrcIsUndef));
+ if (NumRegs > 2 && TableEntry->copyAllListRegs)
+ MIB.addReg(D2, getUndefRegState(SrcIsUndef));
+ if (NumRegs > 3 && TableEntry->copyAllListRegs)
+ MIB.addReg(D3, getUndefRegState(SrcIsUndef));
+
+ // Copy the predicate operands.
+ MIB.addOperand(MI.getOperand(OpIdx++));
+ MIB.addOperand(MI.getOperand(OpIdx++));
+
+ if (SrcIsKill && !SrcIsUndef) // Add an implicit kill for the super-reg.
+ MIB->addRegisterKilled(SrcReg, TRI, true);
+ else if (!SrcIsUndef)
+ MIB.addReg(SrcReg, RegState::Implicit); // Add implicit uses for src reg.
+ TransferImpOps(MI, MIB, MIB);
+
+ // Transfer memoperands.
+ MIB->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+
+ MI.eraseFromParent();
+}
+
+/// ExpandLaneOp - Translate VLD*LN and VST*LN instructions with Q, QQ or QQQQ
+/// register operands to real instructions with D register operands.
+void ARMExpandPseudo::ExpandLaneOp(MachineBasicBlock::iterator &MBBI) {
+ MachineInstr &MI = *MBBI;
+ MachineBasicBlock &MBB = *MI.getParent();
+
+ const NEONLdStTableEntry *TableEntry = LookupNEONLdSt(MI.getOpcode());
+ assert(TableEntry && "NEONLdStTable lookup failed");
+ NEONRegSpacing RegSpc = (NEONRegSpacing)TableEntry->RegSpacing;
+ unsigned NumRegs = TableEntry->NumRegs;
+ unsigned RegElts = TableEntry->RegElts;
+
+ MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(),
+ TII->get(TableEntry->RealOpc));
+ unsigned OpIdx = 0;
+ // The lane operand is always the 3rd from last operand, before the 2
+ // predicate operands.
+ unsigned Lane = MI.getOperand(MI.getDesc().getNumOperands() - 3).getImm();
+
+ // Adjust the lane and spacing as needed for Q registers.
+ assert(RegSpc != OddDblSpc && "unexpected register spacing for VLD/VST-lane");
+ if (RegSpc == EvenDblSpc && Lane >= RegElts) {
+ RegSpc = OddDblSpc;
+ Lane -= RegElts;
+ }
+ assert(Lane < RegElts && "out of range lane for VLD/VST-lane");
+
+ unsigned D0 = 0, D1 = 0, D2 = 0, D3 = 0;
+ unsigned DstReg = 0;
+ bool DstIsDead = false;
+ if (TableEntry->IsLoad) {
+ DstIsDead = MI.getOperand(OpIdx).isDead();
+ DstReg = MI.getOperand(OpIdx++).getReg();
+ GetDSubRegs(DstReg, RegSpc, TRI, D0, D1, D2, D3);
+ MIB.addReg(D0, RegState::Define | getDeadRegState(DstIsDead));
+ if (NumRegs > 1)
+ MIB.addReg(D1, RegState::Define | getDeadRegState(DstIsDead));
+ if (NumRegs > 2)
+ MIB.addReg(D2, RegState::Define | getDeadRegState(DstIsDead));
+ if (NumRegs > 3)
+ MIB.addReg(D3, RegState::Define | getDeadRegState(DstIsDead));
+ }
+
+ if (TableEntry->isUpdating)
+ MIB.addOperand(MI.getOperand(OpIdx++));
+
+ // Copy the addrmode6 operands.
+ MIB.addOperand(MI.getOperand(OpIdx++));
+ MIB.addOperand(MI.getOperand(OpIdx++));
+ // Copy the am6offset operand.
+ if (TableEntry->hasWritebackOperand)
+ MIB.addOperand(MI.getOperand(OpIdx++));
+
+ // Grab the super-register source.
+ MachineOperand MO = MI.getOperand(OpIdx++);
+ if (!TableEntry->IsLoad)
+ GetDSubRegs(MO.getReg(), RegSpc, TRI, D0, D1, D2, D3);
+
+ // Add the subregs as sources of the new instruction.
+ unsigned SrcFlags = (getUndefRegState(MO.isUndef()) |
+ getKillRegState(MO.isKill()));
+ MIB.addReg(D0, SrcFlags);
+ if (NumRegs > 1)
+ MIB.addReg(D1, SrcFlags);
+ if (NumRegs > 2)
+ MIB.addReg(D2, SrcFlags);
+ if (NumRegs > 3)
+ MIB.addReg(D3, SrcFlags);
+
+ // Add the lane number operand.
+ MIB.addImm(Lane);
+ OpIdx += 1;
+
+ // Copy the predicate operands.
+ MIB.addOperand(MI.getOperand(OpIdx++));
+ MIB.addOperand(MI.getOperand(OpIdx++));
+
+ // Copy the super-register source to be an implicit source.
+ MO.setImplicit(true);
+ MIB.addOperand(MO);
+ if (TableEntry->IsLoad)
+ // Add an implicit def for the super-register.
+ MIB.addReg(DstReg, RegState::ImplicitDefine | getDeadRegState(DstIsDead));
+ TransferImpOps(MI, MIB, MIB);
+ // Transfer memoperands.
+ MIB->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+ MI.eraseFromParent();
+}
+
+/// ExpandVTBL - Translate VTBL and VTBX pseudo instructions with Q or QQ
+/// register operands to real instructions with D register operands.
+void ARMExpandPseudo::ExpandVTBL(MachineBasicBlock::iterator &MBBI,
+ unsigned Opc, bool IsExt) {
+ MachineInstr &MI = *MBBI;
+ MachineBasicBlock &MBB = *MI.getParent();
+
+ MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc));
+ unsigned OpIdx = 0;
+
+ // Transfer the destination register operand.
+ MIB.addOperand(MI.getOperand(OpIdx++));
+ if (IsExt)
+ MIB.addOperand(MI.getOperand(OpIdx++));
+
+ bool SrcIsKill = MI.getOperand(OpIdx).isKill();
+ unsigned SrcReg = MI.getOperand(OpIdx++).getReg();
+ unsigned D0, D1, D2, D3;
+ GetDSubRegs(SrcReg, SingleSpc, TRI, D0, D1, D2, D3);
+ MIB.addReg(D0);
+
+ // Copy the other source register operand.
+ MIB.addOperand(MI.getOperand(OpIdx++));
+
+ // Copy the predicate operands.
+ MIB.addOperand(MI.getOperand(OpIdx++));
+ MIB.addOperand(MI.getOperand(OpIdx++));
+
+ // Add an implicit kill and use for the super-reg.
+ MIB.addReg(SrcReg, RegState::Implicit | getKillRegState(SrcIsKill));
+ TransferImpOps(MI, MIB, MIB);
+ MI.eraseFromParent();
+}
+
+static bool IsAnAddressOperand(const MachineOperand &MO) {
+ // This check is overly conservative. Unless we are certain that the machine
+ // operand is not a symbol reference, we return that it is a symbol reference.
+ // This is important as the load pair may not be split up Windows.
+ switch (MO.getType()) {
+ case MachineOperand::MO_Register:
+ case MachineOperand::MO_Immediate:
+ case MachineOperand::MO_CImmediate:
+ case MachineOperand::MO_FPImmediate:
+ return false;
+ case MachineOperand::MO_MachineBasicBlock:
+ return true;
+ case MachineOperand::MO_FrameIndex:
+ return false;
+ case MachineOperand::MO_ConstantPoolIndex:
+ case MachineOperand::MO_TargetIndex:
+ case MachineOperand::MO_JumpTableIndex:
+ case MachineOperand::MO_ExternalSymbol:
+ case MachineOperand::MO_GlobalAddress:
+ case MachineOperand::MO_BlockAddress:
+ return true;
+ case MachineOperand::MO_RegisterMask:
+ case MachineOperand::MO_RegisterLiveOut:
+ return false;
+ case MachineOperand::MO_Metadata:
+ case MachineOperand::MO_MCSymbol:
+ return true;
+ case MachineOperand::MO_CFIIndex:
+ return false;
+ }
+ llvm_unreachable("unhandled machine operand type");
+}
+
+void ARMExpandPseudo::ExpandMOV32BitImm(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI) {
+ MachineInstr &MI = *MBBI;
+ unsigned Opcode = MI.getOpcode();
+ unsigned PredReg = 0;
+ ARMCC::CondCodes Pred = getInstrPredicate(&MI, PredReg);
+ unsigned DstReg = MI.getOperand(0).getReg();
+ bool DstIsDead = MI.getOperand(0).isDead();
+ bool isCC = Opcode == ARM::MOVCCi32imm || Opcode == ARM::t2MOVCCi32imm;
+ const MachineOperand &MO = MI.getOperand(isCC ? 2 : 1);
+ bool RequiresBundling = STI->isTargetWindows() && IsAnAddressOperand(MO);
+ MachineInstrBuilder LO16, HI16;
+
+ if (!STI->hasV6T2Ops() &&
+ (Opcode == ARM::MOVi32imm || Opcode == ARM::MOVCCi32imm)) {
+ // FIXME Windows CE supports older ARM CPUs
+ assert(!STI->isTargetWindows() && "Windows on ARM requires ARMv7+");
+
+ // Expand into a movi + orr.
+ LO16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVi), DstReg);
+ HI16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::ORRri))
+ .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
+ .addReg(DstReg);
+
+ assert (MO.isImm() && "MOVi32imm w/ non-immediate source operand!");
+ unsigned ImmVal = (unsigned)MO.getImm();
+ unsigned SOImmValV1 = ARM_AM::getSOImmTwoPartFirst(ImmVal);
+ unsigned SOImmValV2 = ARM_AM::getSOImmTwoPartSecond(ImmVal);
+ LO16 = LO16.addImm(SOImmValV1);
+ HI16 = HI16.addImm(SOImmValV2);
+ LO16->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+ HI16->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+ LO16.addImm(Pred).addReg(PredReg).addReg(0);
+ HI16.addImm(Pred).addReg(PredReg).addReg(0);
+ TransferImpOps(MI, LO16, HI16);
+ MI.eraseFromParent();
+ return;
+ }
+
+ unsigned LO16Opc = 0;
+ unsigned HI16Opc = 0;
+ if (Opcode == ARM::t2MOVi32imm || Opcode == ARM::t2MOVCCi32imm) {
+ LO16Opc = ARM::t2MOVi16;
+ HI16Opc = ARM::t2MOVTi16;
+ } else {
+ LO16Opc = ARM::MOVi16;
+ HI16Opc = ARM::MOVTi16;
+ }
+
+ LO16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(LO16Opc), DstReg);
+ HI16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(HI16Opc))
+ .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
+ .addReg(DstReg);
+
+ switch (MO.getType()) {
+ case MachineOperand::MO_Immediate: {
+ unsigned Imm = MO.getImm();
+ unsigned Lo16 = Imm & 0xffff;
+ unsigned Hi16 = (Imm >> 16) & 0xffff;
+ LO16 = LO16.addImm(Lo16);
+ HI16 = HI16.addImm(Hi16);
+ break;
+ }
+ case MachineOperand::MO_ExternalSymbol: {
+ const char *ES = MO.getSymbolName();
+ unsigned TF = MO.getTargetFlags();
+ LO16 = LO16.addExternalSymbol(ES, TF | ARMII::MO_LO16);
+ HI16 = HI16.addExternalSymbol(ES, TF | ARMII::MO_HI16);
+ break;
+ }
+ default: {
+ const GlobalValue *GV = MO.getGlobal();
+ unsigned TF = MO.getTargetFlags();
+ LO16 = LO16.addGlobalAddress(GV, MO.getOffset(), TF | ARMII::MO_LO16);
+ HI16 = HI16.addGlobalAddress(GV, MO.getOffset(), TF | ARMII::MO_HI16);
+ break;
+ }
+ }
+
+ LO16->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+ HI16->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+ LO16.addImm(Pred).addReg(PredReg);
+ HI16.addImm(Pred).addReg(PredReg);
+
+ if (RequiresBundling)
+ finalizeBundle(MBB, LO16->getIterator(), MBBI->getIterator());
+
+ TransferImpOps(MI, LO16, HI16);
+ MI.eraseFromParent();
+}
+
+bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI) {
+ MachineInstr &MI = *MBBI;
+ unsigned Opcode = MI.getOpcode();
+ switch (Opcode) {
+ default:
+ return false;
+
+ case ARM::TCRETURNdi:
+ case ARM::TCRETURNri: {
+ MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
+ assert(MBBI->isReturn() &&
+ "Can only insert epilog into returning blocks");
+ unsigned RetOpcode = MBBI->getOpcode();
+ DebugLoc dl = MBBI->getDebugLoc();
+ const ARMBaseInstrInfo &TII = *static_cast<const ARMBaseInstrInfo *>(
+ MBB.getParent()->getSubtarget().getInstrInfo());
+
+ // Tail call return: adjust the stack pointer and jump to callee.
+ MBBI = MBB.getLastNonDebugInstr();
+ MachineOperand &JumpTarget = MBBI->getOperand(0);
+
+ // Jump to label or value in register.
+ if (RetOpcode == ARM::TCRETURNdi) {
+ unsigned TCOpcode =
+ STI->isThumb()
+ ? (STI->isTargetMachO() ? ARM::tTAILJMPd : ARM::tTAILJMPdND)
+ : ARM::TAILJMPd;
+ MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(TCOpcode));
+ if (JumpTarget.isGlobal())
+ MIB.addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(),
+ JumpTarget.getTargetFlags());
+ else {
+ assert(JumpTarget.isSymbol());
+ MIB.addExternalSymbol(JumpTarget.getSymbolName(),
+ JumpTarget.getTargetFlags());
+ }
+
+ // Add the default predicate in Thumb mode.
+ if (STI->isThumb())
+ MIB.addImm(ARMCC::AL).addReg(0);
+ } else if (RetOpcode == ARM::TCRETURNri) {
+ BuildMI(MBB, MBBI, dl,
+ TII.get(STI->isThumb() ? ARM::tTAILJMPr : ARM::TAILJMPr))
+ .addReg(JumpTarget.getReg(), RegState::Kill);
+ }
+
+ MachineInstr *NewMI = std::prev(MBBI);
+ for (unsigned i = 1, e = MBBI->getNumOperands(); i != e; ++i)
+ NewMI->addOperand(MBBI->getOperand(i));
+
+ // Delete the pseudo instruction TCRETURN.
+ MBB.erase(MBBI);
+ MBBI = NewMI;
+ return true;
+ }
+ case ARM::VMOVScc:
+ case ARM::VMOVDcc: {
+ unsigned newOpc = Opcode == ARM::VMOVScc ? ARM::VMOVS : ARM::VMOVD;
+ BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(newOpc),
+ MI.getOperand(1).getReg())
+ .addOperand(MI.getOperand(2))
+ .addImm(MI.getOperand(3).getImm()) // 'pred'
+ .addOperand(MI.getOperand(4));
+
+ MI.eraseFromParent();
+ return true;
+ }
+ case ARM::t2MOVCCr:
+ case ARM::MOVCCr: {
+ unsigned Opc = AFI->isThumbFunction() ? ARM::t2MOVr : ARM::MOVr;
+ BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc),
+ MI.getOperand(1).getReg())
+ .addOperand(MI.getOperand(2))
+ .addImm(MI.getOperand(3).getImm()) // 'pred'
+ .addOperand(MI.getOperand(4))
+ .addReg(0); // 's' bit
+
+ MI.eraseFromParent();
+ return true;
+ }
+ case ARM::MOVCCsi: {
+ BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVsi),
+ (MI.getOperand(1).getReg()))
+ .addOperand(MI.getOperand(2))
+ .addImm(MI.getOperand(3).getImm())
+ .addImm(MI.getOperand(4).getImm()) // 'pred'
+ .addOperand(MI.getOperand(5))
+ .addReg(0); // 's' bit
+
+ MI.eraseFromParent();
+ return true;
+ }
+ case ARM::MOVCCsr: {
+ BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVsr),
+ (MI.getOperand(1).getReg()))
+ .addOperand(MI.getOperand(2))
+ .addOperand(MI.getOperand(3))
+ .addImm(MI.getOperand(4).getImm())
+ .addImm(MI.getOperand(5).getImm()) // 'pred'
+ .addOperand(MI.getOperand(6))
+ .addReg(0); // 's' bit
+
+ MI.eraseFromParent();
+ return true;
+ }
+ case ARM::t2MOVCCi16:
+ case ARM::MOVCCi16: {
+ unsigned NewOpc = AFI->isThumbFunction() ? ARM::t2MOVi16 : ARM::MOVi16;
+ BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc),
+ MI.getOperand(1).getReg())
+ .addImm(MI.getOperand(2).getImm())
+ .addImm(MI.getOperand(3).getImm()) // 'pred'
+ .addOperand(MI.getOperand(4));
+ MI.eraseFromParent();
+ return true;
+ }
+ case ARM::t2MOVCCi:
+ case ARM::MOVCCi: {
+ unsigned Opc = AFI->isThumbFunction() ? ARM::t2MOVi : ARM::MOVi;
+ BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc),
+ MI.getOperand(1).getReg())
+ .addImm(MI.getOperand(2).getImm())
+ .addImm(MI.getOperand(3).getImm()) // 'pred'
+ .addOperand(MI.getOperand(4))
+ .addReg(0); // 's' bit
+
+ MI.eraseFromParent();
+ return true;
+ }
+ case ARM::t2MVNCCi:
+ case ARM::MVNCCi: {
+ unsigned Opc = AFI->isThumbFunction() ? ARM::t2MVNi : ARM::MVNi;
+ BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc),
+ MI.getOperand(1).getReg())
+ .addImm(MI.getOperand(2).getImm())
+ .addImm(MI.getOperand(3).getImm()) // 'pred'
+ .addOperand(MI.getOperand(4))
+ .addReg(0); // 's' bit
+
+ MI.eraseFromParent();
+ return true;
+ }
+ case ARM::t2MOVCClsl:
+ case ARM::t2MOVCClsr:
+ case ARM::t2MOVCCasr:
+ case ARM::t2MOVCCror: {
+ unsigned NewOpc;
+ switch (Opcode) {
+ case ARM::t2MOVCClsl: NewOpc = ARM::t2LSLri; break;
+ case ARM::t2MOVCClsr: NewOpc = ARM::t2LSRri; break;
+ case ARM::t2MOVCCasr: NewOpc = ARM::t2ASRri; break;
+ case ARM::t2MOVCCror: NewOpc = ARM::t2RORri; break;
+ default: llvm_unreachable("unexpeced conditional move");
+ }
+ BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc),
+ MI.getOperand(1).getReg())
+ .addOperand(MI.getOperand(2))
+ .addImm(MI.getOperand(3).getImm())
+ .addImm(MI.getOperand(4).getImm()) // 'pred'
+ .addOperand(MI.getOperand(5))
+ .addReg(0); // 's' bit
+ MI.eraseFromParent();
+ return true;
+ }
+ case ARM::Int_eh_sjlj_dispatchsetup: {
+ MachineFunction &MF = *MI.getParent()->getParent();
+ const ARMBaseInstrInfo *AII =
+ static_cast<const ARMBaseInstrInfo*>(TII);
+ const ARMBaseRegisterInfo &RI = AII->getRegisterInfo();
+ // For functions using a base pointer, we rematerialize it (via the frame
+ // pointer) here since eh.sjlj.setjmp and eh.sjlj.longjmp don't do it
+ // for us. Otherwise, expand to nothing.
+ if (RI.hasBasePointer(MF)) {
+ int32_t NumBytes = AFI->getFramePtrSpillOffset();
+ unsigned FramePtr = RI.getFrameRegister(MF);
+ assert(MF.getSubtarget().getFrameLowering()->hasFP(MF) &&
+ "base pointer without frame pointer?");
+
+ if (AFI->isThumb2Function()) {
+ emitT2RegPlusImmediate(MBB, MBBI, MI.getDebugLoc(), ARM::R6,
+ FramePtr, -NumBytes, ARMCC::AL, 0, *TII);
+ } else if (AFI->isThumbFunction()) {
+ emitThumbRegPlusImmediate(MBB, MBBI, MI.getDebugLoc(), ARM::R6,
+ FramePtr, -NumBytes, *TII, RI);
+ } else {
+ emitARMRegPlusImmediate(MBB, MBBI, MI.getDebugLoc(), ARM::R6,
+ FramePtr, -NumBytes, ARMCC::AL, 0,
+ *TII);
+ }
+ // If there's dynamic realignment, adjust for it.
+ if (RI.needsStackRealignment(MF)) {
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ unsigned MaxAlign = MFI->getMaxAlignment();
+ assert (!AFI->isThumb1OnlyFunction());
+ // Emit bic r6, r6, MaxAlign
+ assert(MaxAlign <= 256 && "The BIC instruction cannot encode "
+ "immediates larger than 256 with all lower "
+ "bits set.");
+ unsigned bicOpc = AFI->isThumbFunction() ?
+ ARM::t2BICri : ARM::BICri;
+ AddDefaultCC(AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(),
+ TII->get(bicOpc), ARM::R6)
+ .addReg(ARM::R6, RegState::Kill)
+ .addImm(MaxAlign-1)));
+ }
+
+ }
+ MI.eraseFromParent();
+ return true;
+ }
+
+ case ARM::MOVsrl_flag:
+ case ARM::MOVsra_flag: {
+ // These are just fancy MOVs instructions.
+ AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVsi),
+ MI.getOperand(0).getReg())
+ .addOperand(MI.getOperand(1))
+ .addImm(ARM_AM::getSORegOpc((Opcode == ARM::MOVsrl_flag ?
+ ARM_AM::lsr : ARM_AM::asr),
+ 1)))
+ .addReg(ARM::CPSR, RegState::Define);
+ MI.eraseFromParent();
+ return true;
+ }
+ case ARM::RRX: {
+ // This encodes as "MOVs Rd, Rm, rrx
+ MachineInstrBuilder MIB =
+ AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(),TII->get(ARM::MOVsi),
+ MI.getOperand(0).getReg())
+ .addOperand(MI.getOperand(1))
+ .addImm(ARM_AM::getSORegOpc(ARM_AM::rrx, 0)))
+ .addReg(0);
+ TransferImpOps(MI, MIB, MIB);
+ MI.eraseFromParent();
+ return true;
+ }
+ case ARM::tTPsoft:
+ case ARM::TPsoft: {
+ MachineInstrBuilder MIB;
+ if (Opcode == ARM::tTPsoft)
+ MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(),
+ TII->get( ARM::tBL))
+ .addImm((unsigned)ARMCC::AL).addReg(0)
+ .addExternalSymbol("__aeabi_read_tp", 0);
+ else
+ MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(),
+ TII->get( ARM::BL))
+ .addExternalSymbol("__aeabi_read_tp", 0);
+
+ MIB->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+ TransferImpOps(MI, MIB, MIB);
+ MI.eraseFromParent();
+ return true;
+ }
+ case ARM::tLDRpci_pic:
+ case ARM::t2LDRpci_pic: {
+ unsigned NewLdOpc = (Opcode == ARM::tLDRpci_pic)
+ ? ARM::tLDRpci : ARM::t2LDRpci;
+ unsigned DstReg = MI.getOperand(0).getReg();
+ bool DstIsDead = MI.getOperand(0).isDead();
+ MachineInstrBuilder MIB1 =
+ AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(),
+ TII->get(NewLdOpc), DstReg)
+ .addOperand(MI.getOperand(1)));
+ MIB1->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+ MachineInstrBuilder MIB2 = BuildMI(MBB, MBBI, MI.getDebugLoc(),
+ TII->get(ARM::tPICADD))
+ .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
+ .addReg(DstReg)
+ .addOperand(MI.getOperand(2));
+ TransferImpOps(MI, MIB1, MIB2);
+ MI.eraseFromParent();
+ return true;
+ }
+
+ case ARM::LDRLIT_ga_abs:
+ case ARM::LDRLIT_ga_pcrel:
+ case ARM::LDRLIT_ga_pcrel_ldr:
+ case ARM::tLDRLIT_ga_abs:
+ case ARM::tLDRLIT_ga_pcrel: {
+ unsigned DstReg = MI.getOperand(0).getReg();
+ bool DstIsDead = MI.getOperand(0).isDead();
+ const MachineOperand &MO1 = MI.getOperand(1);
+ const GlobalValue *GV = MO1.getGlobal();
+ bool IsARM =
+ Opcode != ARM::tLDRLIT_ga_pcrel && Opcode != ARM::tLDRLIT_ga_abs;
+ bool IsPIC =
+ Opcode != ARM::LDRLIT_ga_abs && Opcode != ARM::tLDRLIT_ga_abs;
+ unsigned LDRLITOpc = IsARM ? ARM::LDRi12 : ARM::tLDRpci;
+ unsigned PICAddOpc =
+ IsARM
+ ? (Opcode == ARM::LDRLIT_ga_pcrel_ldr ? ARM::PICLDR : ARM::PICADD)
+ : ARM::tPICADD;
+
+ // We need a new const-pool entry to load from.
+ MachineConstantPool *MCP = MBB.getParent()->getConstantPool();
+ unsigned ARMPCLabelIndex = 0;
+ MachineConstantPoolValue *CPV;
+
+ if (IsPIC) {
+ unsigned PCAdj = IsARM ? 8 : 4;
+ ARMPCLabelIndex = AFI->createPICLabelUId();
+ CPV = ARMConstantPoolConstant::Create(GV, ARMPCLabelIndex,
+ ARMCP::CPValue, PCAdj);
+ } else
+ CPV = ARMConstantPoolConstant::Create(GV, ARMCP::no_modifier);
+
+ MachineInstrBuilder MIB =
+ BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(LDRLITOpc), DstReg)
+ .addConstantPoolIndex(MCP->getConstantPoolIndex(CPV, 4));
+ if (IsARM)
+ MIB.addImm(0);
+ AddDefaultPred(MIB);
+
+ if (IsPIC) {
+ MachineInstrBuilder MIB =
+ BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(PICAddOpc))
+ .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
+ .addReg(DstReg)
+ .addImm(ARMPCLabelIndex);
+
+ if (IsARM)
+ AddDefaultPred(MIB);
+ }
+
+ MI.eraseFromParent();
+ return true;
+ }
+ case ARM::MOV_ga_pcrel:
+ case ARM::MOV_ga_pcrel_ldr:
+ case ARM::t2MOV_ga_pcrel: {
+ // Expand into movw + movw. Also "add pc" / ldr [pc] in PIC mode.
+ unsigned LabelId = AFI->createPICLabelUId();
+ unsigned DstReg = MI.getOperand(0).getReg();
+ bool DstIsDead = MI.getOperand(0).isDead();
+ const MachineOperand &MO1 = MI.getOperand(1);
+ const GlobalValue *GV = MO1.getGlobal();
+ unsigned TF = MO1.getTargetFlags();
+ bool isARM = Opcode != ARM::t2MOV_ga_pcrel;
+ unsigned LO16Opc = isARM ? ARM::MOVi16_ga_pcrel : ARM::t2MOVi16_ga_pcrel;
+ unsigned HI16Opc = isARM ? ARM::MOVTi16_ga_pcrel :ARM::t2MOVTi16_ga_pcrel;
+ unsigned LO16TF = TF | ARMII::MO_LO16;
+ unsigned HI16TF = TF | ARMII::MO_HI16;
+ unsigned PICAddOpc = isARM
+ ? (Opcode == ARM::MOV_ga_pcrel_ldr ? ARM::PICLDR : ARM::PICADD)
+ : ARM::tPICADD;
+ MachineInstrBuilder MIB1 = BuildMI(MBB, MBBI, MI.getDebugLoc(),
+ TII->get(LO16Opc), DstReg)
+ .addGlobalAddress(GV, MO1.getOffset(), TF | LO16TF)
+ .addImm(LabelId);
+
+ BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(HI16Opc), DstReg)
+ .addReg(DstReg)
+ .addGlobalAddress(GV, MO1.getOffset(), TF | HI16TF)
+ .addImm(LabelId);
+
+ MachineInstrBuilder MIB3 = BuildMI(MBB, MBBI, MI.getDebugLoc(),
+ TII->get(PICAddOpc))
+ .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
+ .addReg(DstReg).addImm(LabelId);
+ if (isARM) {
+ AddDefaultPred(MIB3);
+ if (Opcode == ARM::MOV_ga_pcrel_ldr)
+ MIB3->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+ }
+ TransferImpOps(MI, MIB1, MIB3);
+ MI.eraseFromParent();
+ return true;
+ }
+
+ case ARM::MOVi32imm:
+ case ARM::MOVCCi32imm:
+ case ARM::t2MOVi32imm:
+ case ARM::t2MOVCCi32imm:
+ ExpandMOV32BitImm(MBB, MBBI);
+ return true;
+
+ case ARM::SUBS_PC_LR: {
+ MachineInstrBuilder MIB =
+ BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::SUBri), ARM::PC)
+ .addReg(ARM::LR)
+ .addOperand(MI.getOperand(0))
+ .addOperand(MI.getOperand(1))
+ .addOperand(MI.getOperand(2))
+ .addReg(ARM::CPSR, RegState::Undef);
+ TransferImpOps(MI, MIB, MIB);
+ MI.eraseFromParent();
+ return true;
+ }
+ case ARM::VLDMQIA: {
+ unsigned NewOpc = ARM::VLDMDIA;
+ MachineInstrBuilder MIB =
+ BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc));
+ unsigned OpIdx = 0;
+
+ // Grab the Q register destination.
+ bool DstIsDead = MI.getOperand(OpIdx).isDead();
+ unsigned DstReg = MI.getOperand(OpIdx++).getReg();
+
+ // Copy the source register.
+ MIB.addOperand(MI.getOperand(OpIdx++));
+
+ // Copy the predicate operands.
+ MIB.addOperand(MI.getOperand(OpIdx++));
+ MIB.addOperand(MI.getOperand(OpIdx++));
+
+ // Add the destination operands (D subregs).
+ unsigned D0 = TRI->getSubReg(DstReg, ARM::dsub_0);
+ unsigned D1 = TRI->getSubReg(DstReg, ARM::dsub_1);
+ MIB.addReg(D0, RegState::Define | getDeadRegState(DstIsDead))
+ .addReg(D1, RegState::Define | getDeadRegState(DstIsDead));
+
+ // Add an implicit def for the super-register.
+ MIB.addReg(DstReg, RegState::ImplicitDefine | getDeadRegState(DstIsDead));
+ TransferImpOps(MI, MIB, MIB);
+ MIB.setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+ MI.eraseFromParent();
+ return true;
+ }
+
+ case ARM::VSTMQIA: {
+ unsigned NewOpc = ARM::VSTMDIA;
+ MachineInstrBuilder MIB =
+ BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc));
+ unsigned OpIdx = 0;
+
+ // Grab the Q register source.
+ bool SrcIsKill = MI.getOperand(OpIdx).isKill();
+ unsigned SrcReg = MI.getOperand(OpIdx++).getReg();
+
+ // Copy the destination register.
+ MIB.addOperand(MI.getOperand(OpIdx++));
+
+ // Copy the predicate operands.
+ MIB.addOperand(MI.getOperand(OpIdx++));
+ MIB.addOperand(MI.getOperand(OpIdx++));
+
+ // Add the source operands (D subregs).
+ unsigned D0 = TRI->getSubReg(SrcReg, ARM::dsub_0);
+ unsigned D1 = TRI->getSubReg(SrcReg, ARM::dsub_1);
+ MIB.addReg(D0, SrcIsKill ? RegState::Kill : 0)
+ .addReg(D1, SrcIsKill ? RegState::Kill : 0);
+
+ if (SrcIsKill) // Add an implicit kill for the Q register.
+ MIB->addRegisterKilled(SrcReg, TRI, true);
+
+ TransferImpOps(MI, MIB, MIB);
+ MIB.setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+ MI.eraseFromParent();
+ return true;
+ }
+
+ case ARM::VLD2q8Pseudo:
+ case ARM::VLD2q16Pseudo:
+ case ARM::VLD2q32Pseudo:
+ case ARM::VLD2q8PseudoWB_fixed:
+ case ARM::VLD2q16PseudoWB_fixed:
+ case ARM::VLD2q32PseudoWB_fixed:
+ case ARM::VLD2q8PseudoWB_register:
+ case ARM::VLD2q16PseudoWB_register:
+ case ARM::VLD2q32PseudoWB_register:
+ case ARM::VLD3d8Pseudo:
+ case ARM::VLD3d16Pseudo:
+ case ARM::VLD3d32Pseudo:
+ case ARM::VLD1d64TPseudo:
+ case ARM::VLD1d64TPseudoWB_fixed:
+ case ARM::VLD3d8Pseudo_UPD:
+ case ARM::VLD3d16Pseudo_UPD:
+ case ARM::VLD3d32Pseudo_UPD:
+ case ARM::VLD3q8Pseudo_UPD:
+ case ARM::VLD3q16Pseudo_UPD:
+ case ARM::VLD3q32Pseudo_UPD:
+ case ARM::VLD3q8oddPseudo:
+ case ARM::VLD3q16oddPseudo:
+ case ARM::VLD3q32oddPseudo:
+ case ARM::VLD3q8oddPseudo_UPD:
+ case ARM::VLD3q16oddPseudo_UPD:
+ case ARM::VLD3q32oddPseudo_UPD:
+ case ARM::VLD4d8Pseudo:
+ case ARM::VLD4d16Pseudo:
+ case ARM::VLD4d32Pseudo:
+ case ARM::VLD1d64QPseudo:
+ case ARM::VLD1d64QPseudoWB_fixed:
+ case ARM::VLD4d8Pseudo_UPD:
+ case ARM::VLD4d16Pseudo_UPD:
+ case ARM::VLD4d32Pseudo_UPD:
+ case ARM::VLD4q8Pseudo_UPD:
+ case ARM::VLD4q16Pseudo_UPD:
+ case ARM::VLD4q32Pseudo_UPD:
+ case ARM::VLD4q8oddPseudo:
+ case ARM::VLD4q16oddPseudo:
+ case ARM::VLD4q32oddPseudo:
+ case ARM::VLD4q8oddPseudo_UPD:
+ case ARM::VLD4q16oddPseudo_UPD:
+ case ARM::VLD4q32oddPseudo_UPD:
+ case ARM::VLD3DUPd8Pseudo:
+ case ARM::VLD3DUPd16Pseudo:
+ case ARM::VLD3DUPd32Pseudo:
+ case ARM::VLD3DUPd8Pseudo_UPD:
+ case ARM::VLD3DUPd16Pseudo_UPD:
+ case ARM::VLD3DUPd32Pseudo_UPD:
+ case ARM::VLD4DUPd8Pseudo:
+ case ARM::VLD4DUPd16Pseudo:
+ case ARM::VLD4DUPd32Pseudo:
+ case ARM::VLD4DUPd8Pseudo_UPD:
+ case ARM::VLD4DUPd16Pseudo_UPD:
+ case ARM::VLD4DUPd32Pseudo_UPD:
+ ExpandVLD(MBBI);
+ return true;
+
+ case ARM::VST2q8Pseudo:
+ case ARM::VST2q16Pseudo:
+ case ARM::VST2q32Pseudo:
+ case ARM::VST2q8PseudoWB_fixed:
+ case ARM::VST2q16PseudoWB_fixed:
+ case ARM::VST2q32PseudoWB_fixed:
+ case ARM::VST2q8PseudoWB_register:
+ case ARM::VST2q16PseudoWB_register:
+ case ARM::VST2q32PseudoWB_register:
+ case ARM::VST3d8Pseudo:
+ case ARM::VST3d16Pseudo:
+ case ARM::VST3d32Pseudo:
+ case ARM::VST1d64TPseudo:
+ case ARM::VST3d8Pseudo_UPD:
+ case ARM::VST3d16Pseudo_UPD:
+ case ARM::VST3d32Pseudo_UPD:
+ case ARM::VST1d64TPseudoWB_fixed:
+ case ARM::VST1d64TPseudoWB_register:
+ case ARM::VST3q8Pseudo_UPD:
+ case ARM::VST3q16Pseudo_UPD:
+ case ARM::VST3q32Pseudo_UPD:
+ case ARM::VST3q8oddPseudo:
+ case ARM::VST3q16oddPseudo:
+ case ARM::VST3q32oddPseudo:
+ case ARM::VST3q8oddPseudo_UPD:
+ case ARM::VST3q16oddPseudo_UPD:
+ case ARM::VST3q32oddPseudo_UPD:
+ case ARM::VST4d8Pseudo:
+ case ARM::VST4d16Pseudo:
+ case ARM::VST4d32Pseudo:
+ case ARM::VST1d64QPseudo:
+ case ARM::VST4d8Pseudo_UPD:
+ case ARM::VST4d16Pseudo_UPD:
+ case ARM::VST4d32Pseudo_UPD:
+ case ARM::VST1d64QPseudoWB_fixed:
+ case ARM::VST1d64QPseudoWB_register:
+ case ARM::VST4q8Pseudo_UPD:
+ case ARM::VST4q16Pseudo_UPD:
+ case ARM::VST4q32Pseudo_UPD:
+ case ARM::VST4q8oddPseudo:
+ case ARM::VST4q16oddPseudo:
+ case ARM::VST4q32oddPseudo:
+ case ARM::VST4q8oddPseudo_UPD:
+ case ARM::VST4q16oddPseudo_UPD:
+ case ARM::VST4q32oddPseudo_UPD:
+ ExpandVST(MBBI);
+ return true;
+
+ case ARM::VLD1LNq8Pseudo:
+ case ARM::VLD1LNq16Pseudo:
+ case ARM::VLD1LNq32Pseudo:
+ case ARM::VLD1LNq8Pseudo_UPD:
+ case ARM::VLD1LNq16Pseudo_UPD:
+ case ARM::VLD1LNq32Pseudo_UPD:
+ case ARM::VLD2LNd8Pseudo:
+ case ARM::VLD2LNd16Pseudo:
+ case ARM::VLD2LNd32Pseudo:
+ case ARM::VLD2LNq16Pseudo:
+ case ARM::VLD2LNq32Pseudo:
+ case ARM::VLD2LNd8Pseudo_UPD:
+ case ARM::VLD2LNd16Pseudo_UPD:
+ case ARM::VLD2LNd32Pseudo_UPD:
+ case ARM::VLD2LNq16Pseudo_UPD:
+ case ARM::VLD2LNq32Pseudo_UPD:
+ case ARM::VLD3LNd8Pseudo:
+ case ARM::VLD3LNd16Pseudo:
+ case ARM::VLD3LNd32Pseudo:
+ case ARM::VLD3LNq16Pseudo:
+ case ARM::VLD3LNq32Pseudo:
+ case ARM::VLD3LNd8Pseudo_UPD:
+ case ARM::VLD3LNd16Pseudo_UPD:
+ case ARM::VLD3LNd32Pseudo_UPD:
+ case ARM::VLD3LNq16Pseudo_UPD:
+ case ARM::VLD3LNq32Pseudo_UPD:
+ case ARM::VLD4LNd8Pseudo:
+ case ARM::VLD4LNd16Pseudo:
+ case ARM::VLD4LNd32Pseudo:
+ case ARM::VLD4LNq16Pseudo:
+ case ARM::VLD4LNq32Pseudo:
+ case ARM::VLD4LNd8Pseudo_UPD:
+ case ARM::VLD4LNd16Pseudo_UPD:
+ case ARM::VLD4LNd32Pseudo_UPD:
+ case ARM::VLD4LNq16Pseudo_UPD:
+ case ARM::VLD4LNq32Pseudo_UPD:
+ case ARM::VST1LNq8Pseudo:
+ case ARM::VST1LNq16Pseudo:
+ case ARM::VST1LNq32Pseudo:
+ case ARM::VST1LNq8Pseudo_UPD:
+ case ARM::VST1LNq16Pseudo_UPD:
+ case ARM::VST1LNq32Pseudo_UPD:
+ case ARM::VST2LNd8Pseudo:
+ case ARM::VST2LNd16Pseudo:
+ case ARM::VST2LNd32Pseudo:
+ case ARM::VST2LNq16Pseudo:
+ case ARM::VST2LNq32Pseudo:
+ case ARM::VST2LNd8Pseudo_UPD:
+ case ARM::VST2LNd16Pseudo_UPD:
+ case ARM::VST2LNd32Pseudo_UPD:
+ case ARM::VST2LNq16Pseudo_UPD:
+ case ARM::VST2LNq32Pseudo_UPD:
+ case ARM::VST3LNd8Pseudo:
+ case ARM::VST3LNd16Pseudo:
+ case ARM::VST3LNd32Pseudo:
+ case ARM::VST3LNq16Pseudo:
+ case ARM::VST3LNq32Pseudo:
+ case ARM::VST3LNd8Pseudo_UPD:
+ case ARM::VST3LNd16Pseudo_UPD:
+ case ARM::VST3LNd32Pseudo_UPD:
+ case ARM::VST3LNq16Pseudo_UPD:
+ case ARM::VST3LNq32Pseudo_UPD:
+ case ARM::VST4LNd8Pseudo:
+ case ARM::VST4LNd16Pseudo:
+ case ARM::VST4LNd32Pseudo:
+ case ARM::VST4LNq16Pseudo:
+ case ARM::VST4LNq32Pseudo:
+ case ARM::VST4LNd8Pseudo_UPD:
+ case ARM::VST4LNd16Pseudo_UPD:
+ case ARM::VST4LNd32Pseudo_UPD:
+ case ARM::VST4LNq16Pseudo_UPD:
+ case ARM::VST4LNq32Pseudo_UPD:
+ ExpandLaneOp(MBBI);
+ return true;
+
+ case ARM::VTBL3Pseudo: ExpandVTBL(MBBI, ARM::VTBL3, false); return true;
+ case ARM::VTBL4Pseudo: ExpandVTBL(MBBI, ARM::VTBL4, false); return true;
+ case ARM::VTBX3Pseudo: ExpandVTBL(MBBI, ARM::VTBX3, true); return true;
+ case ARM::VTBX4Pseudo: ExpandVTBL(MBBI, ARM::VTBX4, true); return true;
+ }
+}
+
+bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) {
+ bool Modified = false;
+
+ MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
+ while (MBBI != E) {
+ MachineBasicBlock::iterator NMBBI = std::next(MBBI);
+ Modified |= ExpandMI(MBB, MBBI);
+ MBBI = NMBBI;
+ }
+
+ return Modified;
+}
+
+bool ARMExpandPseudo::runOnMachineFunction(MachineFunction &MF) {
+ STI = &static_cast<const ARMSubtarget &>(MF.getSubtarget());
+ TII = STI->getInstrInfo();
+ TRI = STI->getRegisterInfo();
+ AFI = MF.getInfo<ARMFunctionInfo>();
+
+ bool Modified = false;
+ for (MachineFunction::iterator MFI = MF.begin(), E = MF.end(); MFI != E;
+ ++MFI)
+ Modified |= ExpandMBB(*MFI);
+ if (VerifyARMPseudo)
+ MF.verify(this, "After expanding ARM pseudo instructions.");
+ return Modified;
+}
+
+/// createARMExpandPseudoPass - returns an instance of the pseudo instruction
+/// expansion pass.
+FunctionPass *llvm::createARMExpandPseudoPass() {
+ return new ARMExpandPseudo();
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp b/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp
new file mode 100644
index 0000000..9bdf823c
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp
@@ -0,0 +1,3070 @@
+//===-- ARMFastISel.cpp - ARM FastISel implementation ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the ARM-specific support for the FastISel class. Some
+// of the target-specific code is generated by tablegen in the file
+// ARMGenFastISel.inc, which is #included here.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARM.h"
+#include "ARMBaseRegisterInfo.h"
+#include "ARMCallingConv.h"
+#include "ARMConstantPoolValue.h"
+#include "ARMISelLowering.h"
+#include "ARMMachineFunctionInfo.h"
+#include "ARMSubtarget.h"
+#include "MCTargetDesc/ARMAddressingModes.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/CodeGen/Analysis.h"
+#include "llvm/CodeGen/FastISel.h"
+#include "llvm/CodeGen/FunctionLoweringInfo.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/CallSite.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/GetElementPtrTypeIterator.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+using namespace llvm;
+
+namespace {
+
+ // All possible address modes, plus some.
+ typedef struct Address {
+ enum {
+ RegBase,
+ FrameIndexBase
+ } BaseType;
+
+ union {
+ unsigned Reg;
+ int FI;
+ } Base;
+
+ int Offset;
+
+ // Innocuous defaults for our address.
+ Address()
+ : BaseType(RegBase), Offset(0) {
+ Base.Reg = 0;
+ }
+ } Address;
+
+class ARMFastISel final : public FastISel {
+
+ /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
+ /// make the right decision when generating code for different targets.
+ const ARMSubtarget *Subtarget;
+ Module &M;
+ const TargetMachine &TM;
+ const TargetInstrInfo &TII;
+ const TargetLowering &TLI;
+ ARMFunctionInfo *AFI;
+
+ // Convenience variables to avoid some queries.
+ bool isThumb2;
+ LLVMContext *Context;
+
+ public:
+ explicit ARMFastISel(FunctionLoweringInfo &funcInfo,
+ const TargetLibraryInfo *libInfo)
+ : FastISel(funcInfo, libInfo),
+ Subtarget(
+ &static_cast<const ARMSubtarget &>(funcInfo.MF->getSubtarget())),
+ M(const_cast<Module &>(*funcInfo.Fn->getParent())),
+ TM(funcInfo.MF->getTarget()), TII(*Subtarget->getInstrInfo()),
+ TLI(*Subtarget->getTargetLowering()) {
+ AFI = funcInfo.MF->getInfo<ARMFunctionInfo>();
+ isThumb2 = AFI->isThumbFunction();
+ Context = &funcInfo.Fn->getContext();
+ }
+
+ // Code from FastISel.cpp.
+ private:
+ unsigned fastEmitInst_r(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill);
+ unsigned fastEmitInst_rr(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill,
+ unsigned Op1, bool Op1IsKill);
+ unsigned fastEmitInst_rrr(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill,
+ unsigned Op1, bool Op1IsKill,
+ unsigned Op2, bool Op2IsKill);
+ unsigned fastEmitInst_ri(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill,
+ uint64_t Imm);
+ unsigned fastEmitInst_rri(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill,
+ unsigned Op1, bool Op1IsKill,
+ uint64_t Imm);
+ unsigned fastEmitInst_i(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ uint64_t Imm);
+
+ // Backend specific FastISel code.
+ private:
+ bool fastSelectInstruction(const Instruction *I) override;
+ unsigned fastMaterializeConstant(const Constant *C) override;
+ unsigned fastMaterializeAlloca(const AllocaInst *AI) override;
+ bool tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
+ const LoadInst *LI) override;
+ bool fastLowerArguments() override;
+ private:
+ #include "ARMGenFastISel.inc"
+
+ // Instruction selection routines.
+ private:
+ bool SelectLoad(const Instruction *I);
+ bool SelectStore(const Instruction *I);
+ bool SelectBranch(const Instruction *I);
+ bool SelectIndirectBr(const Instruction *I);
+ bool SelectCmp(const Instruction *I);
+ bool SelectFPExt(const Instruction *I);
+ bool SelectFPTrunc(const Instruction *I);
+ bool SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode);
+ bool SelectBinaryFPOp(const Instruction *I, unsigned ISDOpcode);
+ bool SelectIToFP(const Instruction *I, bool isSigned);
+ bool SelectFPToI(const Instruction *I, bool isSigned);
+ bool SelectDiv(const Instruction *I, bool isSigned);
+ bool SelectRem(const Instruction *I, bool isSigned);
+ bool SelectCall(const Instruction *I, const char *IntrMemName);
+ bool SelectIntrinsicCall(const IntrinsicInst &I);
+ bool SelectSelect(const Instruction *I);
+ bool SelectRet(const Instruction *I);
+ bool SelectTrunc(const Instruction *I);
+ bool SelectIntExt(const Instruction *I);
+ bool SelectShift(const Instruction *I, ARM_AM::ShiftOpc ShiftTy);
+
+ // Utility routines.
+ private:
+ bool isTypeLegal(Type *Ty, MVT &VT);
+ bool isLoadTypeLegal(Type *Ty, MVT &VT);
+ bool ARMEmitCmp(const Value *Src1Value, const Value *Src2Value,
+ bool isZExt);
+ bool ARMEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr,
+ unsigned Alignment = 0, bool isZExt = true,
+ bool allocReg = true);
+ bool ARMEmitStore(MVT VT, unsigned SrcReg, Address &Addr,
+ unsigned Alignment = 0);
+ bool ARMComputeAddress(const Value *Obj, Address &Addr);
+ void ARMSimplifyAddress(Address &Addr, MVT VT, bool useAM3);
+ bool ARMIsMemCpySmall(uint64_t Len);
+ bool ARMTryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len,
+ unsigned Alignment);
+ unsigned ARMEmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, bool isZExt);
+ unsigned ARMMaterializeFP(const ConstantFP *CFP, MVT VT);
+ unsigned ARMMaterializeInt(const Constant *C, MVT VT);
+ unsigned ARMMaterializeGV(const GlobalValue *GV, MVT VT);
+ unsigned ARMMoveToFPReg(MVT VT, unsigned SrcReg);
+ unsigned ARMMoveToIntReg(MVT VT, unsigned SrcReg);
+ unsigned ARMSelectCallOp(bool UseReg);
+ unsigned ARMLowerPICELF(const GlobalValue *GV, unsigned Align, MVT VT);
+
+ const TargetLowering *getTargetLowering() { return &TLI; }
+
+ // Call handling routines.
+ private:
+ CCAssignFn *CCAssignFnForCall(CallingConv::ID CC,
+ bool Return,
+ bool isVarArg);
+ bool ProcessCallArgs(SmallVectorImpl<Value*> &Args,
+ SmallVectorImpl<unsigned> &ArgRegs,
+ SmallVectorImpl<MVT> &ArgVTs,
+ SmallVectorImpl<ISD::ArgFlagsTy> &ArgFlags,
+ SmallVectorImpl<unsigned> &RegArgs,
+ CallingConv::ID CC,
+ unsigned &NumBytes,
+ bool isVarArg);
+ unsigned getLibcallReg(const Twine &Name);
+ bool FinishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs,
+ const Instruction *I, CallingConv::ID CC,
+ unsigned &NumBytes, bool isVarArg);
+ bool ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call);
+
+ // OptionalDef handling routines.
+ private:
+ bool isARMNEONPred(const MachineInstr *MI);
+ bool DefinesOptionalPredicate(MachineInstr *MI, bool *CPSR);
+ const MachineInstrBuilder &AddOptionalDefs(const MachineInstrBuilder &MIB);
+ void AddLoadStoreOperands(MVT VT, Address &Addr,
+ const MachineInstrBuilder &MIB,
+ unsigned Flags, bool useAM3);
+};
+
+} // end anonymous namespace
+
+#include "ARMGenCallingConv.inc"
+
+// DefinesOptionalPredicate - This is different from DefinesPredicate in that
+// we don't care about implicit defs here, just places we'll need to add a
+// default CCReg argument. Sets CPSR if we're setting CPSR instead of CCR.
+bool ARMFastISel::DefinesOptionalPredicate(MachineInstr *MI, bool *CPSR) {
+ if (!MI->hasOptionalDef())
+ return false;
+
+ // Look to see if our OptionalDef is defining CPSR or CCR.
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || !MO.isDef()) continue;
+ if (MO.getReg() == ARM::CPSR)
+ *CPSR = true;
+ }
+ return true;
+}
+
+bool ARMFastISel::isARMNEONPred(const MachineInstr *MI) {
+ const MCInstrDesc &MCID = MI->getDesc();
+
+ // If we're a thumb2 or not NEON function we'll be handled via isPredicable.
+ if ((MCID.TSFlags & ARMII::DomainMask) != ARMII::DomainNEON ||
+ AFI->isThumb2Function())
+ return MI->isPredicable();
+
+ for (unsigned i = 0, e = MCID.getNumOperands(); i != e; ++i)
+ if (MCID.OpInfo[i].isPredicate())
+ return true;
+
+ return false;
+}
+
+// If the machine is predicable go ahead and add the predicate operands, if
+// it needs default CC operands add those.
+// TODO: If we want to support thumb1 then we'll need to deal with optional
+// CPSR defs that need to be added before the remaining operands. See s_cc_out
+// for descriptions why.
+const MachineInstrBuilder &
+ARMFastISel::AddOptionalDefs(const MachineInstrBuilder &MIB) {
+ MachineInstr *MI = &*MIB;
+
+ // Do we use a predicate? or...
+ // Are we NEON in ARM mode and have a predicate operand? If so, I know
+ // we're not predicable but add it anyways.
+ if (isARMNEONPred(MI))
+ AddDefaultPred(MIB);
+
+ // Do we optionally set a predicate? Preds is size > 0 iff the predicate
+ // defines CPSR. All other OptionalDefines in ARM are the CCR register.
+ bool CPSR = false;
+ if (DefinesOptionalPredicate(MI, &CPSR)) {
+ if (CPSR)
+ AddDefaultT1CC(MIB);
+ else
+ AddDefaultCC(MIB);
+ }
+ return MIB;
+}
+
+unsigned ARMFastISel::fastEmitInst_r(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill) {
+ unsigned ResultReg = createResultReg(RC);
+ const MCInstrDesc &II = TII.get(MachineInstOpcode);
+
+ // Make sure the input operand is sufficiently constrained to be legal
+ // for this instruction.
+ Op0 = constrainOperandRegClass(II, Op0, 1);
+ if (II.getNumDefs() >= 1) {
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II,
+ ResultReg).addReg(Op0, Op0IsKill * RegState::Kill));
+ } else {
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
+ .addReg(Op0, Op0IsKill * RegState::Kill));
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::COPY), ResultReg)
+ .addReg(II.ImplicitDefs[0]));
+ }
+ return ResultReg;
+}
+
+unsigned ARMFastISel::fastEmitInst_rr(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill,
+ unsigned Op1, bool Op1IsKill) {
+ unsigned ResultReg = createResultReg(RC);
+ const MCInstrDesc &II = TII.get(MachineInstOpcode);
+
+ // Make sure the input operands are sufficiently constrained to be legal
+ // for this instruction.
+ Op0 = constrainOperandRegClass(II, Op0, 1);
+ Op1 = constrainOperandRegClass(II, Op1, 2);
+
+ if (II.getNumDefs() >= 1) {
+ AddOptionalDefs(
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
+ .addReg(Op0, Op0IsKill * RegState::Kill)
+ .addReg(Op1, Op1IsKill * RegState::Kill));
+ } else {
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
+ .addReg(Op0, Op0IsKill * RegState::Kill)
+ .addReg(Op1, Op1IsKill * RegState::Kill));
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::COPY), ResultReg)
+ .addReg(II.ImplicitDefs[0]));
+ }
+ return ResultReg;
+}
+
+unsigned ARMFastISel::fastEmitInst_rrr(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill,
+ unsigned Op1, bool Op1IsKill,
+ unsigned Op2, bool Op2IsKill) {
+ unsigned ResultReg = createResultReg(RC);
+ const MCInstrDesc &II = TII.get(MachineInstOpcode);
+
+ // Make sure the input operands are sufficiently constrained to be legal
+ // for this instruction.
+ Op0 = constrainOperandRegClass(II, Op0, 1);
+ Op1 = constrainOperandRegClass(II, Op1, 2);
+ Op2 = constrainOperandRegClass(II, Op1, 3);
+
+ if (II.getNumDefs() >= 1) {
+ AddOptionalDefs(
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
+ .addReg(Op0, Op0IsKill * RegState::Kill)
+ .addReg(Op1, Op1IsKill * RegState::Kill)
+ .addReg(Op2, Op2IsKill * RegState::Kill));
+ } else {
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
+ .addReg(Op0, Op0IsKill * RegState::Kill)
+ .addReg(Op1, Op1IsKill * RegState::Kill)
+ .addReg(Op2, Op2IsKill * RegState::Kill));
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::COPY), ResultReg)
+ .addReg(II.ImplicitDefs[0]));
+ }
+ return ResultReg;
+}
+
+unsigned ARMFastISel::fastEmitInst_ri(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill,
+ uint64_t Imm) {
+ unsigned ResultReg = createResultReg(RC);
+ const MCInstrDesc &II = TII.get(MachineInstOpcode);
+
+ // Make sure the input operand is sufficiently constrained to be legal
+ // for this instruction.
+ Op0 = constrainOperandRegClass(II, Op0, 1);
+ if (II.getNumDefs() >= 1) {
+ AddOptionalDefs(
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
+ .addReg(Op0, Op0IsKill * RegState::Kill)
+ .addImm(Imm));
+ } else {
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
+ .addReg(Op0, Op0IsKill * RegState::Kill)
+ .addImm(Imm));
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::COPY), ResultReg)
+ .addReg(II.ImplicitDefs[0]));
+ }
+ return ResultReg;
+}
+
+unsigned ARMFastISel::fastEmitInst_rri(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill,
+ unsigned Op1, bool Op1IsKill,
+ uint64_t Imm) {
+ unsigned ResultReg = createResultReg(RC);
+ const MCInstrDesc &II = TII.get(MachineInstOpcode);
+
+ // Make sure the input operands are sufficiently constrained to be legal
+ // for this instruction.
+ Op0 = constrainOperandRegClass(II, Op0, 1);
+ Op1 = constrainOperandRegClass(II, Op1, 2);
+ if (II.getNumDefs() >= 1) {
+ AddOptionalDefs(
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
+ .addReg(Op0, Op0IsKill * RegState::Kill)
+ .addReg(Op1, Op1IsKill * RegState::Kill)
+ .addImm(Imm));
+ } else {
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
+ .addReg(Op0, Op0IsKill * RegState::Kill)
+ .addReg(Op1, Op1IsKill * RegState::Kill)
+ .addImm(Imm));
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::COPY), ResultReg)
+ .addReg(II.ImplicitDefs[0]));
+ }
+ return ResultReg;
+}
+
+unsigned ARMFastISel::fastEmitInst_i(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ uint64_t Imm) {
+ unsigned ResultReg = createResultReg(RC);
+ const MCInstrDesc &II = TII.get(MachineInstOpcode);
+
+ if (II.getNumDefs() >= 1) {
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II,
+ ResultReg).addImm(Imm));
+ } else {
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
+ .addImm(Imm));
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::COPY), ResultReg)
+ .addReg(II.ImplicitDefs[0]));
+ }
+ return ResultReg;
+}
+
+// TODO: Don't worry about 64-bit now, but when this is fixed remove the
+// checks from the various callers.
+unsigned ARMFastISel::ARMMoveToFPReg(MVT VT, unsigned SrcReg) {
+ if (VT == MVT::f64) return 0;
+
+ unsigned MoveReg = createResultReg(TLI.getRegClassFor(VT));
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(ARM::VMOVSR), MoveReg)
+ .addReg(SrcReg));
+ return MoveReg;
+}
+
+unsigned ARMFastISel::ARMMoveToIntReg(MVT VT, unsigned SrcReg) {
+ if (VT == MVT::i64) return 0;
+
+ unsigned MoveReg = createResultReg(TLI.getRegClassFor(VT));
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(ARM::VMOVRS), MoveReg)
+ .addReg(SrcReg));
+ return MoveReg;
+}
+
+// For double width floating point we need to materialize two constants
+// (the high and the low) into integer registers then use a move to get
+// the combined constant into an FP reg.
+unsigned ARMFastISel::ARMMaterializeFP(const ConstantFP *CFP, MVT VT) {
+ const APFloat Val = CFP->getValueAPF();
+ bool is64bit = VT == MVT::f64;
+
+ // This checks to see if we can use VFP3 instructions to materialize
+ // a constant, otherwise we have to go through the constant pool.
+ if (TLI.isFPImmLegal(Val, VT)) {
+ int Imm;
+ unsigned Opc;
+ if (is64bit) {
+ Imm = ARM_AM::getFP64Imm(Val);
+ Opc = ARM::FCONSTD;
+ } else {
+ Imm = ARM_AM::getFP32Imm(Val);
+ Opc = ARM::FCONSTS;
+ }
+ unsigned DestReg = createResultReg(TLI.getRegClassFor(VT));
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(Opc), DestReg).addImm(Imm));
+ return DestReg;
+ }
+
+ // Require VFP2 for loading fp constants.
+ if (!Subtarget->hasVFP2()) return false;
+
+ // MachineConstantPool wants an explicit alignment.
+ unsigned Align = DL.getPrefTypeAlignment(CFP->getType());
+ if (Align == 0) {
+ // TODO: Figure out if this is correct.
+ Align = DL.getTypeAllocSize(CFP->getType());
+ }
+ unsigned Idx = MCP.getConstantPoolIndex(cast<Constant>(CFP), Align);
+ unsigned DestReg = createResultReg(TLI.getRegClassFor(VT));
+ unsigned Opc = is64bit ? ARM::VLDRD : ARM::VLDRS;
+
+ // The extra reg is for addrmode5.
+ AddOptionalDefs(
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), DestReg)
+ .addConstantPoolIndex(Idx)
+ .addReg(0));
+ return DestReg;
+}
+
+unsigned ARMFastISel::ARMMaterializeInt(const Constant *C, MVT VT) {
+
+ if (VT != MVT::i32 && VT != MVT::i16 && VT != MVT::i8 && VT != MVT::i1)
+ return 0;
+
+ // If we can do this in a single instruction without a constant pool entry
+ // do so now.
+ const ConstantInt *CI = cast<ConstantInt>(C);
+ if (Subtarget->hasV6T2Ops() && isUInt<16>(CI->getZExtValue())) {
+ unsigned Opc = isThumb2 ? ARM::t2MOVi16 : ARM::MOVi16;
+ const TargetRegisterClass *RC = isThumb2 ? &ARM::rGPRRegClass :
+ &ARM::GPRRegClass;
+ unsigned ImmReg = createResultReg(RC);
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(Opc), ImmReg)
+ .addImm(CI->getZExtValue()));
+ return ImmReg;
+ }
+
+ // Use MVN to emit negative constants.
+ if (VT == MVT::i32 && Subtarget->hasV6T2Ops() && CI->isNegative()) {
+ unsigned Imm = (unsigned)~(CI->getSExtValue());
+ bool UseImm = isThumb2 ? (ARM_AM::getT2SOImmVal(Imm) != -1) :
+ (ARM_AM::getSOImmVal(Imm) != -1);
+ if (UseImm) {
+ unsigned Opc = isThumb2 ? ARM::t2MVNi : ARM::MVNi;
+ const TargetRegisterClass *RC = isThumb2 ? &ARM::rGPRRegClass :
+ &ARM::GPRRegClass;
+ unsigned ImmReg = createResultReg(RC);
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(Opc), ImmReg)
+ .addImm(Imm));
+ return ImmReg;
+ }
+ }
+
+ unsigned ResultReg = 0;
+ if (Subtarget->useMovt(*FuncInfo.MF))
+ ResultReg = fastEmit_i(VT, VT, ISD::Constant, CI->getZExtValue());
+
+ if (ResultReg)
+ return ResultReg;
+
+ // Load from constant pool. For now 32-bit only.
+ if (VT != MVT::i32)
+ return 0;
+
+ // MachineConstantPool wants an explicit alignment.
+ unsigned Align = DL.getPrefTypeAlignment(C->getType());
+ if (Align == 0) {
+ // TODO: Figure out if this is correct.
+ Align = DL.getTypeAllocSize(C->getType());
+ }
+ unsigned Idx = MCP.getConstantPoolIndex(C, Align);
+ ResultReg = createResultReg(TLI.getRegClassFor(VT));
+ if (isThumb2)
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(ARM::t2LDRpci), ResultReg)
+ .addConstantPoolIndex(Idx));
+ else {
+ // The extra immediate is for addrmode2.
+ ResultReg = constrainOperandRegClass(TII.get(ARM::LDRcp), ResultReg, 0);
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(ARM::LDRcp), ResultReg)
+ .addConstantPoolIndex(Idx)
+ .addImm(0));
+ }
+ return ResultReg;
+}
+
+unsigned ARMFastISel::ARMMaterializeGV(const GlobalValue *GV, MVT VT) {
+ // For now 32-bit only.
+ if (VT != MVT::i32) return 0;
+
+ Reloc::Model RelocM = TM.getRelocationModel();
+ bool IsIndirect = Subtarget->GVIsIndirectSymbol(GV, RelocM);
+ const TargetRegisterClass *RC = isThumb2 ? &ARM::rGPRRegClass
+ : &ARM::GPRRegClass;
+ unsigned DestReg = createResultReg(RC);
+
+ // FastISel TLS support on non-MachO is broken, punt to SelectionDAG.
+ const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV);
+ bool IsThreadLocal = GVar && GVar->isThreadLocal();
+ if (!Subtarget->isTargetMachO() && IsThreadLocal) return 0;
+
+ // Use movw+movt when possible, it avoids constant pool entries.
+ // Non-darwin targets only support static movt relocations in FastISel.
+ if (Subtarget->useMovt(*FuncInfo.MF) &&
+ (Subtarget->isTargetMachO() || RelocM == Reloc::Static)) {
+ unsigned Opc;
+ unsigned char TF = 0;
+ if (Subtarget->isTargetMachO())
+ TF = ARMII::MO_NONLAZY;
+
+ switch (RelocM) {
+ case Reloc::PIC_:
+ Opc = isThumb2 ? ARM::t2MOV_ga_pcrel : ARM::MOV_ga_pcrel;
+ break;
+ default:
+ Opc = isThumb2 ? ARM::t2MOVi32imm : ARM::MOVi32imm;
+ break;
+ }
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(Opc), DestReg).addGlobalAddress(GV, 0, TF));
+ } else {
+ // MachineConstantPool wants an explicit alignment.
+ unsigned Align = DL.getPrefTypeAlignment(GV->getType());
+ if (Align == 0) {
+ // TODO: Figure out if this is correct.
+ Align = DL.getTypeAllocSize(GV->getType());
+ }
+
+ if (Subtarget->isTargetELF() && RelocM == Reloc::PIC_)
+ return ARMLowerPICELF(GV, Align, VT);
+
+ // Grab index.
+ unsigned PCAdj = (RelocM != Reloc::PIC_) ? 0 :
+ (Subtarget->isThumb() ? 4 : 8);
+ unsigned Id = AFI->createPICLabelUId();
+ ARMConstantPoolValue *CPV = ARMConstantPoolConstant::Create(GV, Id,
+ ARMCP::CPValue,
+ PCAdj);
+ unsigned Idx = MCP.getConstantPoolIndex(CPV, Align);
+
+ // Load value.
+ MachineInstrBuilder MIB;
+ if (isThumb2) {
+ unsigned Opc = (RelocM!=Reloc::PIC_) ? ARM::t2LDRpci : ARM::t2LDRpci_pic;
+ MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc),
+ DestReg).addConstantPoolIndex(Idx);
+ if (RelocM == Reloc::PIC_)
+ MIB.addImm(Id);
+ AddOptionalDefs(MIB);
+ } else {
+ // The extra immediate is for addrmode2.
+ DestReg = constrainOperandRegClass(TII.get(ARM::LDRcp), DestReg, 0);
+ MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(ARM::LDRcp), DestReg)
+ .addConstantPoolIndex(Idx)
+ .addImm(0);
+ AddOptionalDefs(MIB);
+
+ if (RelocM == Reloc::PIC_) {
+ unsigned Opc = IsIndirect ? ARM::PICLDR : ARM::PICADD;
+ unsigned NewDestReg = createResultReg(TLI.getRegClassFor(VT));
+
+ MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
+ DbgLoc, TII.get(Opc), NewDestReg)
+ .addReg(DestReg)
+ .addImm(Id);
+ AddOptionalDefs(MIB);
+ return NewDestReg;
+ }
+ }
+ }
+
+ if (IsIndirect) {
+ MachineInstrBuilder MIB;
+ unsigned NewDestReg = createResultReg(TLI.getRegClassFor(VT));
+ if (isThumb2)
+ MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(ARM::t2LDRi12), NewDestReg)
+ .addReg(DestReg)
+ .addImm(0);
+ else
+ MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(ARM::LDRi12), NewDestReg)
+ .addReg(DestReg)
+ .addImm(0);
+ DestReg = NewDestReg;
+ AddOptionalDefs(MIB);
+ }
+
+ return DestReg;
+}
+
+unsigned ARMFastISel::fastMaterializeConstant(const Constant *C) {
+ EVT CEVT = TLI.getValueType(DL, C->getType(), true);
+
+ // Only handle simple types.
+ if (!CEVT.isSimple()) return 0;
+ MVT VT = CEVT.getSimpleVT();
+
+ if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
+ return ARMMaterializeFP(CFP, VT);
+ else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
+ return ARMMaterializeGV(GV, VT);
+ else if (isa<ConstantInt>(C))
+ return ARMMaterializeInt(C, VT);
+
+ return 0;
+}
+
+// TODO: unsigned ARMFastISel::TargetMaterializeFloatZero(const ConstantFP *CF);
+
+unsigned ARMFastISel::fastMaterializeAlloca(const AllocaInst *AI) {
+ // Don't handle dynamic allocas.
+ if (!FuncInfo.StaticAllocaMap.count(AI)) return 0;
+
+ MVT VT;
+ if (!isLoadTypeLegal(AI->getType(), VT)) return 0;
+
+ DenseMap<const AllocaInst*, int>::iterator SI =
+ FuncInfo.StaticAllocaMap.find(AI);
+
+ // This will get lowered later into the correct offsets and registers
+ // via rewriteXFrameIndex.
+ if (SI != FuncInfo.StaticAllocaMap.end()) {
+ unsigned Opc = isThumb2 ? ARM::t2ADDri : ARM::ADDri;
+ const TargetRegisterClass* RC = TLI.getRegClassFor(VT);
+ unsigned ResultReg = createResultReg(RC);
+ ResultReg = constrainOperandRegClass(TII.get(Opc), ResultReg, 0);
+
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(Opc), ResultReg)
+ .addFrameIndex(SI->second)
+ .addImm(0));
+ return ResultReg;
+ }
+
+ return 0;
+}
+
+bool ARMFastISel::isTypeLegal(Type *Ty, MVT &VT) {
+ EVT evt = TLI.getValueType(DL, Ty, true);
+
+ // Only handle simple types.
+ if (evt == MVT::Other || !evt.isSimple()) return false;
+ VT = evt.getSimpleVT();
+
+ // Handle all legal types, i.e. a register that will directly hold this
+ // value.
+ return TLI.isTypeLegal(VT);
+}
+
+bool ARMFastISel::isLoadTypeLegal(Type *Ty, MVT &VT) {
+ if (isTypeLegal(Ty, VT)) return true;
+
+ // If this is a type than can be sign or zero-extended to a basic operation
+ // go ahead and accept it now.
+ if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)
+ return true;
+
+ return false;
+}
+
+// Computes the address to get to an object.
+bool ARMFastISel::ARMComputeAddress(const Value *Obj, Address &Addr) {
+ // Some boilerplate from the X86 FastISel.
+ const User *U = nullptr;
+ unsigned Opcode = Instruction::UserOp1;
+ if (const Instruction *I = dyn_cast<Instruction>(Obj)) {
+ // Don't walk into other basic blocks unless the object is an alloca from
+ // another block, otherwise it may not have a virtual register assigned.
+ if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(Obj)) ||
+ FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
+ Opcode = I->getOpcode();
+ U = I;
+ }
+ } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) {
+ Opcode = C->getOpcode();
+ U = C;
+ }
+
+ if (PointerType *Ty = dyn_cast<PointerType>(Obj->getType()))
+ if (Ty->getAddressSpace() > 255)
+ // Fast instruction selection doesn't support the special
+ // address spaces.
+ return false;
+
+ switch (Opcode) {
+ default:
+ break;
+ case Instruction::BitCast:
+ // Look through bitcasts.
+ return ARMComputeAddress(U->getOperand(0), Addr);
+ case Instruction::IntToPtr:
+ // Look past no-op inttoptrs.
+ if (TLI.getValueType(DL, U->getOperand(0)->getType()) ==
+ TLI.getPointerTy(DL))
+ return ARMComputeAddress(U->getOperand(0), Addr);
+ break;
+ case Instruction::PtrToInt:
+ // Look past no-op ptrtoints.
+ if (TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
+ return ARMComputeAddress(U->getOperand(0), Addr);
+ break;
+ case Instruction::GetElementPtr: {
+ Address SavedAddr = Addr;
+ int TmpOffset = Addr.Offset;
+
+ // Iterate through the GEP folding the constants into offsets where
+ // we can.
+ gep_type_iterator GTI = gep_type_begin(U);
+ for (User::const_op_iterator i = U->op_begin() + 1, e = U->op_end();
+ i != e; ++i, ++GTI) {
+ const Value *Op = *i;
+ if (StructType *STy = dyn_cast<StructType>(*GTI)) {
+ const StructLayout *SL = DL.getStructLayout(STy);
+ unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();
+ TmpOffset += SL->getElementOffset(Idx);
+ } else {
+ uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType());
+ for (;;) {
+ if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
+ // Constant-offset addressing.
+ TmpOffset += CI->getSExtValue() * S;
+ break;
+ }
+ if (canFoldAddIntoGEP(U, Op)) {
+ // A compatible add with a constant operand. Fold the constant.
+ ConstantInt *CI =
+ cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
+ TmpOffset += CI->getSExtValue() * S;
+ // Iterate on the other operand.
+ Op = cast<AddOperator>(Op)->getOperand(0);
+ continue;
+ }
+ // Unsupported
+ goto unsupported_gep;
+ }
+ }
+ }
+
+ // Try to grab the base operand now.
+ Addr.Offset = TmpOffset;
+ if (ARMComputeAddress(U->getOperand(0), Addr)) return true;
+
+ // We failed, restore everything and try the other options.
+ Addr = SavedAddr;
+
+ unsupported_gep:
+ break;
+ }
+ case Instruction::Alloca: {
+ const AllocaInst *AI = cast<AllocaInst>(Obj);
+ DenseMap<const AllocaInst*, int>::iterator SI =
+ FuncInfo.StaticAllocaMap.find(AI);
+ if (SI != FuncInfo.StaticAllocaMap.end()) {
+ Addr.BaseType = Address::FrameIndexBase;
+ Addr.Base.FI = SI->second;
+ return true;
+ }
+ break;
+ }
+ }
+
+ // Try to get this in a register if nothing else has worked.
+ if (Addr.Base.Reg == 0) Addr.Base.Reg = getRegForValue(Obj);
+ return Addr.Base.Reg != 0;
+}
+
+void ARMFastISel::ARMSimplifyAddress(Address &Addr, MVT VT, bool useAM3) {
+ bool needsLowering = false;
+ switch (VT.SimpleTy) {
+ default: llvm_unreachable("Unhandled load/store type!");
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ if (!useAM3) {
+ // Integer loads/stores handle 12-bit offsets.
+ needsLowering = ((Addr.Offset & 0xfff) != Addr.Offset);
+ // Handle negative offsets.
+ if (needsLowering && isThumb2)
+ needsLowering = !(Subtarget->hasV6T2Ops() && Addr.Offset < 0 &&
+ Addr.Offset > -256);
+ } else {
+ // ARM halfword load/stores and signed byte loads use +/-imm8 offsets.
+ needsLowering = (Addr.Offset > 255 || Addr.Offset < -255);
+ }
+ break;
+ case MVT::f32:
+ case MVT::f64:
+ // Floating point operands handle 8-bit offsets.
+ needsLowering = ((Addr.Offset & 0xff) != Addr.Offset);
+ break;
+ }
+
+ // If this is a stack pointer and the offset needs to be simplified then
+ // put the alloca address into a register, set the base type back to
+ // register and continue. This should almost never happen.
+ if (needsLowering && Addr.BaseType == Address::FrameIndexBase) {
+ const TargetRegisterClass *RC = isThumb2 ? &ARM::tGPRRegClass
+ : &ARM::GPRRegClass;
+ unsigned ResultReg = createResultReg(RC);
+ unsigned Opc = isThumb2 ? ARM::t2ADDri : ARM::ADDri;
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(Opc), ResultReg)
+ .addFrameIndex(Addr.Base.FI)
+ .addImm(0));
+ Addr.Base.Reg = ResultReg;
+ Addr.BaseType = Address::RegBase;
+ }
+
+ // Since the offset is too large for the load/store instruction
+ // get the reg+offset into a register.
+ if (needsLowering) {
+ Addr.Base.Reg = fastEmit_ri_(MVT::i32, ISD::ADD, Addr.Base.Reg,
+ /*Op0IsKill*/false, Addr.Offset, MVT::i32);
+ Addr.Offset = 0;
+ }
+}
+
+void ARMFastISel::AddLoadStoreOperands(MVT VT, Address &Addr,
+ const MachineInstrBuilder &MIB,
+ unsigned Flags, bool useAM3) {
+ // addrmode5 output depends on the selection dag addressing dividing the
+ // offset by 4 that it then later multiplies. Do this here as well.
+ if (VT.SimpleTy == MVT::f32 || VT.SimpleTy == MVT::f64)
+ Addr.Offset /= 4;
+
+ // Frame base works a bit differently. Handle it separately.
+ if (Addr.BaseType == Address::FrameIndexBase) {
+ int FI = Addr.Base.FI;
+ int Offset = Addr.Offset;
+ MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
+ MachinePointerInfo::getFixedStack(*FuncInfo.MF, FI, Offset), Flags,
+ MFI.getObjectSize(FI), MFI.getObjectAlignment(FI));
+ // Now add the rest of the operands.
+ MIB.addFrameIndex(FI);
+
+ // ARM halfword load/stores and signed byte loads need an additional
+ // operand.
+ if (useAM3) {
+ signed Imm = (Addr.Offset < 0) ? (0x100 | -Addr.Offset) : Addr.Offset;
+ MIB.addReg(0);
+ MIB.addImm(Imm);
+ } else {
+ MIB.addImm(Addr.Offset);
+ }
+ MIB.addMemOperand(MMO);
+ } else {
+ // Now add the rest of the operands.
+ MIB.addReg(Addr.Base.Reg);
+
+ // ARM halfword load/stores and signed byte loads need an additional
+ // operand.
+ if (useAM3) {
+ signed Imm = (Addr.Offset < 0) ? (0x100 | -Addr.Offset) : Addr.Offset;
+ MIB.addReg(0);
+ MIB.addImm(Imm);
+ } else {
+ MIB.addImm(Addr.Offset);
+ }
+ }
+ AddOptionalDefs(MIB);
+}
+
+bool ARMFastISel::ARMEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr,
+ unsigned Alignment, bool isZExt, bool allocReg) {
+ unsigned Opc;
+ bool useAM3 = false;
+ bool needVMOV = false;
+ const TargetRegisterClass *RC;
+ switch (VT.SimpleTy) {
+ // This is mostly going to be Neon/vector support.
+ default: return false;
+ case MVT::i1:
+ case MVT::i8:
+ if (isThumb2) {
+ if (Addr.Offset < 0 && Addr.Offset > -256 && Subtarget->hasV6T2Ops())
+ Opc = isZExt ? ARM::t2LDRBi8 : ARM::t2LDRSBi8;
+ else
+ Opc = isZExt ? ARM::t2LDRBi12 : ARM::t2LDRSBi12;
+ } else {
+ if (isZExt) {
+ Opc = ARM::LDRBi12;
+ } else {
+ Opc = ARM::LDRSB;
+ useAM3 = true;
+ }
+ }
+ RC = isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRnopcRegClass;
+ break;
+ case MVT::i16:
+ if (Alignment && Alignment < 2 && !Subtarget->allowsUnalignedMem())
+ return false;
+
+ if (isThumb2) {
+ if (Addr.Offset < 0 && Addr.Offset > -256 && Subtarget->hasV6T2Ops())
+ Opc = isZExt ? ARM::t2LDRHi8 : ARM::t2LDRSHi8;
+ else
+ Opc = isZExt ? ARM::t2LDRHi12 : ARM::t2LDRSHi12;
+ } else {
+ Opc = isZExt ? ARM::LDRH : ARM::LDRSH;
+ useAM3 = true;
+ }
+ RC = isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRnopcRegClass;
+ break;
+ case MVT::i32:
+ if (Alignment && Alignment < 4 && !Subtarget->allowsUnalignedMem())
+ return false;
+
+ if (isThumb2) {
+ if (Addr.Offset < 0 && Addr.Offset > -256 && Subtarget->hasV6T2Ops())
+ Opc = ARM::t2LDRi8;
+ else
+ Opc = ARM::t2LDRi12;
+ } else {
+ Opc = ARM::LDRi12;
+ }
+ RC = isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRnopcRegClass;
+ break;
+ case MVT::f32:
+ if (!Subtarget->hasVFP2()) return false;
+ // Unaligned loads need special handling. Floats require word-alignment.
+ if (Alignment && Alignment < 4) {
+ needVMOV = true;
+ VT = MVT::i32;
+ Opc = isThumb2 ? ARM::t2LDRi12 : ARM::LDRi12;
+ RC = isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRnopcRegClass;
+ } else {
+ Opc = ARM::VLDRS;
+ RC = TLI.getRegClassFor(VT);
+ }
+ break;
+ case MVT::f64:
+ if (!Subtarget->hasVFP2()) return false;
+ // FIXME: Unaligned loads need special handling. Doublewords require
+ // word-alignment.
+ if (Alignment && Alignment < 4)
+ return false;
+
+ Opc = ARM::VLDRD;
+ RC = TLI.getRegClassFor(VT);
+ break;
+ }
+ // Simplify this down to something we can handle.
+ ARMSimplifyAddress(Addr, VT, useAM3);
+
+ // Create the base instruction, then add the operands.
+ if (allocReg)
+ ResultReg = createResultReg(RC);
+ assert (ResultReg > 255 && "Expected an allocated virtual register.");
+ MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(Opc), ResultReg);
+ AddLoadStoreOperands(VT, Addr, MIB, MachineMemOperand::MOLoad, useAM3);
+
+ // If we had an unaligned load of a float we've converted it to an regular
+ // load. Now we must move from the GRP to the FP register.
+ if (needVMOV) {
+ unsigned MoveReg = createResultReg(TLI.getRegClassFor(MVT::f32));
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(ARM::VMOVSR), MoveReg)
+ .addReg(ResultReg));
+ ResultReg = MoveReg;
+ }
+ return true;
+}
+
+bool ARMFastISel::SelectLoad(const Instruction *I) {
+ // Atomic loads need special handling.
+ if (cast<LoadInst>(I)->isAtomic())
+ return false;
+
+ // Verify we have a legal type before going any further.
+ MVT VT;
+ if (!isLoadTypeLegal(I->getType(), VT))
+ return false;
+
+ // See if we can handle this address.
+ Address Addr;
+ if (!ARMComputeAddress(I->getOperand(0), Addr)) return false;
+
+ unsigned ResultReg;
+ if (!ARMEmitLoad(VT, ResultReg, Addr, cast<LoadInst>(I)->getAlignment()))
+ return false;
+ updateValueMap(I, ResultReg);
+ return true;
+}
+
+bool ARMFastISel::ARMEmitStore(MVT VT, unsigned SrcReg, Address &Addr,
+ unsigned Alignment) {
+ unsigned StrOpc;
+ bool useAM3 = false;
+ switch (VT.SimpleTy) {
+ // This is mostly going to be Neon/vector support.
+ default: return false;
+ case MVT::i1: {
+ unsigned Res = createResultReg(isThumb2 ? &ARM::tGPRRegClass
+ : &ARM::GPRRegClass);
+ unsigned Opc = isThumb2 ? ARM::t2ANDri : ARM::ANDri;
+ SrcReg = constrainOperandRegClass(TII.get(Opc), SrcReg, 1);
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(Opc), Res)
+ .addReg(SrcReg).addImm(1));
+ SrcReg = Res;
+ } // Fallthrough here.
+ case MVT::i8:
+ if (isThumb2) {
+ if (Addr.Offset < 0 && Addr.Offset > -256 && Subtarget->hasV6T2Ops())
+ StrOpc = ARM::t2STRBi8;
+ else
+ StrOpc = ARM::t2STRBi12;
+ } else {
+ StrOpc = ARM::STRBi12;
+ }
+ break;
+ case MVT::i16:
+ if (Alignment && Alignment < 2 && !Subtarget->allowsUnalignedMem())
+ return false;
+
+ if (isThumb2) {
+ if (Addr.Offset < 0 && Addr.Offset > -256 && Subtarget->hasV6T2Ops())
+ StrOpc = ARM::t2STRHi8;
+ else
+ StrOpc = ARM::t2STRHi12;
+ } else {
+ StrOpc = ARM::STRH;
+ useAM3 = true;
+ }
+ break;
+ case MVT::i32:
+ if (Alignment && Alignment < 4 && !Subtarget->allowsUnalignedMem())
+ return false;
+
+ if (isThumb2) {
+ if (Addr.Offset < 0 && Addr.Offset > -256 && Subtarget->hasV6T2Ops())
+ StrOpc = ARM::t2STRi8;
+ else
+ StrOpc = ARM::t2STRi12;
+ } else {
+ StrOpc = ARM::STRi12;
+ }
+ break;
+ case MVT::f32:
+ if (!Subtarget->hasVFP2()) return false;
+ // Unaligned stores need special handling. Floats require word-alignment.
+ if (Alignment && Alignment < 4) {
+ unsigned MoveReg = createResultReg(TLI.getRegClassFor(MVT::i32));
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(ARM::VMOVRS), MoveReg)
+ .addReg(SrcReg));
+ SrcReg = MoveReg;
+ VT = MVT::i32;
+ StrOpc = isThumb2 ? ARM::t2STRi12 : ARM::STRi12;
+ } else {
+ StrOpc = ARM::VSTRS;
+ }
+ break;
+ case MVT::f64:
+ if (!Subtarget->hasVFP2()) return false;
+ // FIXME: Unaligned stores need special handling. Doublewords require
+ // word-alignment.
+ if (Alignment && Alignment < 4)
+ return false;
+
+ StrOpc = ARM::VSTRD;
+ break;
+ }
+ // Simplify this down to something we can handle.
+ ARMSimplifyAddress(Addr, VT, useAM3);
+
+ // Create the base instruction, then add the operands.
+ SrcReg = constrainOperandRegClass(TII.get(StrOpc), SrcReg, 0);
+ MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(StrOpc))
+ .addReg(SrcReg);
+ AddLoadStoreOperands(VT, Addr, MIB, MachineMemOperand::MOStore, useAM3);
+ return true;
+}
+
+bool ARMFastISel::SelectStore(const Instruction *I) {
+ Value *Op0 = I->getOperand(0);
+ unsigned SrcReg = 0;
+
+ // Atomic stores need special handling.
+ if (cast<StoreInst>(I)->isAtomic())
+ return false;
+
+ // Verify we have a legal type before going any further.
+ MVT VT;
+ if (!isLoadTypeLegal(I->getOperand(0)->getType(), VT))
+ return false;
+
+ // Get the value to be stored into a register.
+ SrcReg = getRegForValue(Op0);
+ if (SrcReg == 0) return false;
+
+ // See if we can handle this address.
+ Address Addr;
+ if (!ARMComputeAddress(I->getOperand(1), Addr))
+ return false;
+
+ if (!ARMEmitStore(VT, SrcReg, Addr, cast<StoreInst>(I)->getAlignment()))
+ return false;
+ return true;
+}
+
+static ARMCC::CondCodes getComparePred(CmpInst::Predicate Pred) {
+ switch (Pred) {
+ // Needs two compares...
+ case CmpInst::FCMP_ONE:
+ case CmpInst::FCMP_UEQ:
+ default:
+ // AL is our "false" for now. The other two need more compares.
+ return ARMCC::AL;
+ case CmpInst::ICMP_EQ:
+ case CmpInst::FCMP_OEQ:
+ return ARMCC::EQ;
+ case CmpInst::ICMP_SGT:
+ case CmpInst::FCMP_OGT:
+ return ARMCC::GT;
+ case CmpInst::ICMP_SGE:
+ case CmpInst::FCMP_OGE:
+ return ARMCC::GE;
+ case CmpInst::ICMP_UGT:
+ case CmpInst::FCMP_UGT:
+ return ARMCC::HI;
+ case CmpInst::FCMP_OLT:
+ return ARMCC::MI;
+ case CmpInst::ICMP_ULE:
+ case CmpInst::FCMP_OLE:
+ return ARMCC::LS;
+ case CmpInst::FCMP_ORD:
+ return ARMCC::VC;
+ case CmpInst::FCMP_UNO:
+ return ARMCC::VS;
+ case CmpInst::FCMP_UGE:
+ return ARMCC::PL;
+ case CmpInst::ICMP_SLT:
+ case CmpInst::FCMP_ULT:
+ return ARMCC::LT;
+ case CmpInst::ICMP_SLE:
+ case CmpInst::FCMP_ULE:
+ return ARMCC::LE;
+ case CmpInst::FCMP_UNE:
+ case CmpInst::ICMP_NE:
+ return ARMCC::NE;
+ case CmpInst::ICMP_UGE:
+ return ARMCC::HS;
+ case CmpInst::ICMP_ULT:
+ return ARMCC::LO;
+ }
+}
+
+bool ARMFastISel::SelectBranch(const Instruction *I) {
+ const BranchInst *BI = cast<BranchInst>(I);
+ MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
+ MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
+
+ // Simple branch support.
+
+ // If we can, avoid recomputing the compare - redoing it could lead to wonky
+ // behavior.
+ if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
+ if (CI->hasOneUse() && (CI->getParent() == I->getParent())) {
+
+ // Get the compare predicate.
+ // Try to take advantage of fallthrough opportunities.
+ CmpInst::Predicate Predicate = CI->getPredicate();
+ if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
+ std::swap(TBB, FBB);
+ Predicate = CmpInst::getInversePredicate(Predicate);
+ }
+
+ ARMCC::CondCodes ARMPred = getComparePred(Predicate);
+
+ // We may not handle every CC for now.
+ if (ARMPred == ARMCC::AL) return false;
+
+ // Emit the compare.
+ if (!ARMEmitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
+ return false;
+
+ unsigned BrOpc = isThumb2 ? ARM::t2Bcc : ARM::Bcc;
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(BrOpc))
+ .addMBB(TBB).addImm(ARMPred).addReg(ARM::CPSR);
+ finishCondBranch(BI->getParent(), TBB, FBB);
+ return true;
+ }
+ } else if (TruncInst *TI = dyn_cast<TruncInst>(BI->getCondition())) {
+ MVT SourceVT;
+ if (TI->hasOneUse() && TI->getParent() == I->getParent() &&
+ (isLoadTypeLegal(TI->getOperand(0)->getType(), SourceVT))) {
+ unsigned TstOpc = isThumb2 ? ARM::t2TSTri : ARM::TSTri;
+ unsigned OpReg = getRegForValue(TI->getOperand(0));
+ OpReg = constrainOperandRegClass(TII.get(TstOpc), OpReg, 0);
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TstOpc))
+ .addReg(OpReg).addImm(1));
+
+ unsigned CCMode = ARMCC::NE;
+ if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
+ std::swap(TBB, FBB);
+ CCMode = ARMCC::EQ;
+ }
+
+ unsigned BrOpc = isThumb2 ? ARM::t2Bcc : ARM::Bcc;
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(BrOpc))
+ .addMBB(TBB).addImm(CCMode).addReg(ARM::CPSR);
+
+ finishCondBranch(BI->getParent(), TBB, FBB);
+ return true;
+ }
+ } else if (const ConstantInt *CI =
+ dyn_cast<ConstantInt>(BI->getCondition())) {
+ uint64_t Imm = CI->getZExtValue();
+ MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB;
+ fastEmitBranch(Target, DbgLoc);
+ return true;
+ }
+
+ unsigned CmpReg = getRegForValue(BI->getCondition());
+ if (CmpReg == 0) return false;
+
+ // We've been divorced from our compare! Our block was split, and
+ // now our compare lives in a predecessor block. We musn't
+ // re-compare here, as the children of the compare aren't guaranteed
+ // live across the block boundary (we *could* check for this).
+ // Regardless, the compare has been done in the predecessor block,
+ // and it left a value for us in a virtual register. Ergo, we test
+ // the one-bit value left in the virtual register.
+ unsigned TstOpc = isThumb2 ? ARM::t2TSTri : ARM::TSTri;
+ CmpReg = constrainOperandRegClass(TII.get(TstOpc), CmpReg, 0);
+ AddOptionalDefs(
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TstOpc))
+ .addReg(CmpReg)
+ .addImm(1));
+
+ unsigned CCMode = ARMCC::NE;
+ if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
+ std::swap(TBB, FBB);
+ CCMode = ARMCC::EQ;
+ }
+
+ unsigned BrOpc = isThumb2 ? ARM::t2Bcc : ARM::Bcc;
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(BrOpc))
+ .addMBB(TBB).addImm(CCMode).addReg(ARM::CPSR);
+ finishCondBranch(BI->getParent(), TBB, FBB);
+ return true;
+}
+
+bool ARMFastISel::SelectIndirectBr(const Instruction *I) {
+ unsigned AddrReg = getRegForValue(I->getOperand(0));
+ if (AddrReg == 0) return false;
+
+ unsigned Opc = isThumb2 ? ARM::tBRIND : ARM::BX;
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(Opc)).addReg(AddrReg));
+
+ const IndirectBrInst *IB = cast<IndirectBrInst>(I);
+ for (const BasicBlock *SuccBB : IB->successors())
+ FuncInfo.MBB->addSuccessor(FuncInfo.MBBMap[SuccBB]);
+
+ return true;
+}
+
+bool ARMFastISel::ARMEmitCmp(const Value *Src1Value, const Value *Src2Value,
+ bool isZExt) {
+ Type *Ty = Src1Value->getType();
+ EVT SrcEVT = TLI.getValueType(DL, Ty, true);
+ if (!SrcEVT.isSimple()) return false;
+ MVT SrcVT = SrcEVT.getSimpleVT();
+
+ bool isFloat = (Ty->isFloatTy() || Ty->isDoubleTy());
+ if (isFloat && !Subtarget->hasVFP2())
+ return false;
+
+ // Check to see if the 2nd operand is a constant that we can encode directly
+ // in the compare.
+ int Imm = 0;
+ bool UseImm = false;
+ bool isNegativeImm = false;
+ // FIXME: At -O0 we don't have anything that canonicalizes operand order.
+ // Thus, Src1Value may be a ConstantInt, but we're missing it.
+ if (const ConstantInt *ConstInt = dyn_cast<ConstantInt>(Src2Value)) {
+ if (SrcVT == MVT::i32 || SrcVT == MVT::i16 || SrcVT == MVT::i8 ||
+ SrcVT == MVT::i1) {
+ const APInt &CIVal = ConstInt->getValue();
+ Imm = (isZExt) ? (int)CIVal.getZExtValue() : (int)CIVal.getSExtValue();
+ // For INT_MIN/LONG_MIN (i.e., 0x80000000) we need to use a cmp, rather
+ // then a cmn, because there is no way to represent 2147483648 as a
+ // signed 32-bit int.
+ if (Imm < 0 && Imm != (int)0x80000000) {
+ isNegativeImm = true;
+ Imm = -Imm;
+ }
+ UseImm = isThumb2 ? (ARM_AM::getT2SOImmVal(Imm) != -1) :
+ (ARM_AM::getSOImmVal(Imm) != -1);
+ }
+ } else if (const ConstantFP *ConstFP = dyn_cast<ConstantFP>(Src2Value)) {
+ if (SrcVT == MVT::f32 || SrcVT == MVT::f64)
+ if (ConstFP->isZero() && !ConstFP->isNegative())
+ UseImm = true;
+ }
+
+ unsigned CmpOpc;
+ bool isICmp = true;
+ bool needsExt = false;
+ switch (SrcVT.SimpleTy) {
+ default: return false;
+ // TODO: Verify compares.
+ case MVT::f32:
+ isICmp = false;
+ CmpOpc = UseImm ? ARM::VCMPEZS : ARM::VCMPES;
+ break;
+ case MVT::f64:
+ isICmp = false;
+ CmpOpc = UseImm ? ARM::VCMPEZD : ARM::VCMPED;
+ break;
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i16:
+ needsExt = true;
+ // Intentional fall-through.
+ case MVT::i32:
+ if (isThumb2) {
+ if (!UseImm)
+ CmpOpc = ARM::t2CMPrr;
+ else
+ CmpOpc = isNegativeImm ? ARM::t2CMNri : ARM::t2CMPri;
+ } else {
+ if (!UseImm)
+ CmpOpc = ARM::CMPrr;
+ else
+ CmpOpc = isNegativeImm ? ARM::CMNri : ARM::CMPri;
+ }
+ break;
+ }
+
+ unsigned SrcReg1 = getRegForValue(Src1Value);
+ if (SrcReg1 == 0) return false;
+
+ unsigned SrcReg2 = 0;
+ if (!UseImm) {
+ SrcReg2 = getRegForValue(Src2Value);
+ if (SrcReg2 == 0) return false;
+ }
+
+ // We have i1, i8, or i16, we need to either zero extend or sign extend.
+ if (needsExt) {
+ SrcReg1 = ARMEmitIntExt(SrcVT, SrcReg1, MVT::i32, isZExt);
+ if (SrcReg1 == 0) return false;
+ if (!UseImm) {
+ SrcReg2 = ARMEmitIntExt(SrcVT, SrcReg2, MVT::i32, isZExt);
+ if (SrcReg2 == 0) return false;
+ }
+ }
+
+ const MCInstrDesc &II = TII.get(CmpOpc);
+ SrcReg1 = constrainOperandRegClass(II, SrcReg1, 0);
+ if (!UseImm) {
+ SrcReg2 = constrainOperandRegClass(II, SrcReg2, 1);
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
+ .addReg(SrcReg1).addReg(SrcReg2));
+ } else {
+ MachineInstrBuilder MIB;
+ MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
+ .addReg(SrcReg1);
+
+ // Only add immediate for icmp as the immediate for fcmp is an implicit 0.0.
+ if (isICmp)
+ MIB.addImm(Imm);
+ AddOptionalDefs(MIB);
+ }
+
+ // For floating point we need to move the result to a comparison register
+ // that we can then use for branches.
+ if (Ty->isFloatTy() || Ty->isDoubleTy())
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(ARM::FMSTAT)));
+ return true;
+}
+
+bool ARMFastISel::SelectCmp(const Instruction *I) {
+ const CmpInst *CI = cast<CmpInst>(I);
+
+ // Get the compare predicate.
+ ARMCC::CondCodes ARMPred = getComparePred(CI->getPredicate());
+
+ // We may not handle every CC for now.
+ if (ARMPred == ARMCC::AL) return false;
+
+ // Emit the compare.
+ if (!ARMEmitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
+ return false;
+
+ // Now set a register based on the comparison. Explicitly set the predicates
+ // here.
+ unsigned MovCCOpc = isThumb2 ? ARM::t2MOVCCi : ARM::MOVCCi;
+ const TargetRegisterClass *RC = isThumb2 ? &ARM::rGPRRegClass
+ : &ARM::GPRRegClass;
+ unsigned DestReg = createResultReg(RC);
+ Constant *Zero = ConstantInt::get(Type::getInt32Ty(*Context), 0);
+ unsigned ZeroReg = fastMaterializeConstant(Zero);
+ // ARMEmitCmp emits a FMSTAT when necessary, so it's always safe to use CPSR.
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(MovCCOpc), DestReg)
+ .addReg(ZeroReg).addImm(1)
+ .addImm(ARMPred).addReg(ARM::CPSR);
+
+ updateValueMap(I, DestReg);
+ return true;
+}
+
+bool ARMFastISel::SelectFPExt(const Instruction *I) {
+ // Make sure we have VFP and that we're extending float to double.
+ if (!Subtarget->hasVFP2()) return false;
+
+ Value *V = I->getOperand(0);
+ if (!I->getType()->isDoubleTy() ||
+ !V->getType()->isFloatTy()) return false;
+
+ unsigned Op = getRegForValue(V);
+ if (Op == 0) return false;
+
+ unsigned Result = createResultReg(&ARM::DPRRegClass);
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(ARM::VCVTDS), Result)
+ .addReg(Op));
+ updateValueMap(I, Result);
+ return true;
+}
+
+bool ARMFastISel::SelectFPTrunc(const Instruction *I) {
+ // Make sure we have VFP and that we're truncating double to float.
+ if (!Subtarget->hasVFP2()) return false;
+
+ Value *V = I->getOperand(0);
+ if (!(I->getType()->isFloatTy() &&
+ V->getType()->isDoubleTy())) return false;
+
+ unsigned Op = getRegForValue(V);
+ if (Op == 0) return false;
+
+ unsigned Result = createResultReg(&ARM::SPRRegClass);
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(ARM::VCVTSD), Result)
+ .addReg(Op));
+ updateValueMap(I, Result);
+ return true;
+}
+
+bool ARMFastISel::SelectIToFP(const Instruction *I, bool isSigned) {
+ // Make sure we have VFP.
+ if (!Subtarget->hasVFP2()) return false;
+
+ MVT DstVT;
+ Type *Ty = I->getType();
+ if (!isTypeLegal(Ty, DstVT))
+ return false;
+
+ Value *Src = I->getOperand(0);
+ EVT SrcEVT = TLI.getValueType(DL, Src->getType(), true);
+ if (!SrcEVT.isSimple())
+ return false;
+ MVT SrcVT = SrcEVT.getSimpleVT();
+ if (SrcVT != MVT::i32 && SrcVT != MVT::i16 && SrcVT != MVT::i8)
+ return false;
+
+ unsigned SrcReg = getRegForValue(Src);
+ if (SrcReg == 0) return false;
+
+ // Handle sign-extension.
+ if (SrcVT == MVT::i16 || SrcVT == MVT::i8) {
+ SrcReg = ARMEmitIntExt(SrcVT, SrcReg, MVT::i32,
+ /*isZExt*/!isSigned);
+ if (SrcReg == 0) return false;
+ }
+
+ // The conversion routine works on fp-reg to fp-reg and the operand above
+ // was an integer, move it to the fp registers if possible.
+ unsigned FP = ARMMoveToFPReg(MVT::f32, SrcReg);
+ if (FP == 0) return false;
+
+ unsigned Opc;
+ if (Ty->isFloatTy()) Opc = isSigned ? ARM::VSITOS : ARM::VUITOS;
+ else if (Ty->isDoubleTy()) Opc = isSigned ? ARM::VSITOD : ARM::VUITOD;
+ else return false;
+
+ unsigned ResultReg = createResultReg(TLI.getRegClassFor(DstVT));
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(Opc), ResultReg).addReg(FP));
+ updateValueMap(I, ResultReg);
+ return true;
+}
+
+bool ARMFastISel::SelectFPToI(const Instruction *I, bool isSigned) {
+ // Make sure we have VFP.
+ if (!Subtarget->hasVFP2()) return false;
+
+ MVT DstVT;
+ Type *RetTy = I->getType();
+ if (!isTypeLegal(RetTy, DstVT))
+ return false;
+
+ unsigned Op = getRegForValue(I->getOperand(0));
+ if (Op == 0) return false;
+
+ unsigned Opc;
+ Type *OpTy = I->getOperand(0)->getType();
+ if (OpTy->isFloatTy()) Opc = isSigned ? ARM::VTOSIZS : ARM::VTOUIZS;
+ else if (OpTy->isDoubleTy()) Opc = isSigned ? ARM::VTOSIZD : ARM::VTOUIZD;
+ else return false;
+
+ // f64->s32/u32 or f32->s32/u32 both need an intermediate f32 reg.
+ unsigned ResultReg = createResultReg(TLI.getRegClassFor(MVT::f32));
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(Opc), ResultReg).addReg(Op));
+
+ // This result needs to be in an integer register, but the conversion only
+ // takes place in fp-regs.
+ unsigned IntReg = ARMMoveToIntReg(DstVT, ResultReg);
+ if (IntReg == 0) return false;
+
+ updateValueMap(I, IntReg);
+ return true;
+}
+
+bool ARMFastISel::SelectSelect(const Instruction *I) {
+ MVT VT;
+ if (!isTypeLegal(I->getType(), VT))
+ return false;
+
+ // Things need to be register sized for register moves.
+ if (VT != MVT::i32) return false;
+
+ unsigned CondReg = getRegForValue(I->getOperand(0));
+ if (CondReg == 0) return false;
+ unsigned Op1Reg = getRegForValue(I->getOperand(1));
+ if (Op1Reg == 0) return false;
+
+ // Check to see if we can use an immediate in the conditional move.
+ int Imm = 0;
+ bool UseImm = false;
+ bool isNegativeImm = false;
+ if (const ConstantInt *ConstInt = dyn_cast<ConstantInt>(I->getOperand(2))) {
+ assert (VT == MVT::i32 && "Expecting an i32.");
+ Imm = (int)ConstInt->getValue().getZExtValue();
+ if (Imm < 0) {
+ isNegativeImm = true;
+ Imm = ~Imm;
+ }
+ UseImm = isThumb2 ? (ARM_AM::getT2SOImmVal(Imm) != -1) :
+ (ARM_AM::getSOImmVal(Imm) != -1);
+ }
+
+ unsigned Op2Reg = 0;
+ if (!UseImm) {
+ Op2Reg = getRegForValue(I->getOperand(2));
+ if (Op2Reg == 0) return false;
+ }
+
+ unsigned TstOpc = isThumb2 ? ARM::t2TSTri : ARM::TSTri;
+ CondReg = constrainOperandRegClass(TII.get(TstOpc), CondReg, 0);
+ AddOptionalDefs(
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TstOpc))
+ .addReg(CondReg)
+ .addImm(1));
+
+ unsigned MovCCOpc;
+ const TargetRegisterClass *RC;
+ if (!UseImm) {
+ RC = isThumb2 ? &ARM::tGPRRegClass : &ARM::GPRRegClass;
+ MovCCOpc = isThumb2 ? ARM::t2MOVCCr : ARM::MOVCCr;
+ } else {
+ RC = isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRRegClass;
+ if (!isNegativeImm)
+ MovCCOpc = isThumb2 ? ARM::t2MOVCCi : ARM::MOVCCi;
+ else
+ MovCCOpc = isThumb2 ? ARM::t2MVNCCi : ARM::MVNCCi;
+ }
+ unsigned ResultReg = createResultReg(RC);
+ if (!UseImm) {
+ Op2Reg = constrainOperandRegClass(TII.get(MovCCOpc), Op2Reg, 1);
+ Op1Reg = constrainOperandRegClass(TII.get(MovCCOpc), Op1Reg, 2);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(MovCCOpc),
+ ResultReg)
+ .addReg(Op2Reg)
+ .addReg(Op1Reg)
+ .addImm(ARMCC::NE)
+ .addReg(ARM::CPSR);
+ } else {
+ Op1Reg = constrainOperandRegClass(TII.get(MovCCOpc), Op1Reg, 1);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(MovCCOpc),
+ ResultReg)
+ .addReg(Op1Reg)
+ .addImm(Imm)
+ .addImm(ARMCC::EQ)
+ .addReg(ARM::CPSR);
+ }
+ updateValueMap(I, ResultReg);
+ return true;
+}
+
+bool ARMFastISel::SelectDiv(const Instruction *I, bool isSigned) {
+ MVT VT;
+ Type *Ty = I->getType();
+ if (!isTypeLegal(Ty, VT))
+ return false;
+
+ // If we have integer div support we should have selected this automagically.
+ // In case we have a real miss go ahead and return false and we'll pick
+ // it up later.
+ if (Subtarget->hasDivide()) return false;
+
+ // Otherwise emit a libcall.
+ RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
+ if (VT == MVT::i8)
+ LC = isSigned ? RTLIB::SDIV_I8 : RTLIB::UDIV_I8;
+ else if (VT == MVT::i16)
+ LC = isSigned ? RTLIB::SDIV_I16 : RTLIB::UDIV_I16;
+ else if (VT == MVT::i32)
+ LC = isSigned ? RTLIB::SDIV_I32 : RTLIB::UDIV_I32;
+ else if (VT == MVT::i64)
+ LC = isSigned ? RTLIB::SDIV_I64 : RTLIB::UDIV_I64;
+ else if (VT == MVT::i128)
+ LC = isSigned ? RTLIB::SDIV_I128 : RTLIB::UDIV_I128;
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SDIV!");
+
+ return ARMEmitLibcall(I, LC);
+}
+
+bool ARMFastISel::SelectRem(const Instruction *I, bool isSigned) {
+ MVT VT;
+ Type *Ty = I->getType();
+ if (!isTypeLegal(Ty, VT))
+ return false;
+
+ RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
+ if (VT == MVT::i8)
+ LC = isSigned ? RTLIB::SREM_I8 : RTLIB::UREM_I8;
+ else if (VT == MVT::i16)
+ LC = isSigned ? RTLIB::SREM_I16 : RTLIB::UREM_I16;
+ else if (VT == MVT::i32)
+ LC = isSigned ? RTLIB::SREM_I32 : RTLIB::UREM_I32;
+ else if (VT == MVT::i64)
+ LC = isSigned ? RTLIB::SREM_I64 : RTLIB::UREM_I64;
+ else if (VT == MVT::i128)
+ LC = isSigned ? RTLIB::SREM_I128 : RTLIB::UREM_I128;
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SREM!");
+
+ return ARMEmitLibcall(I, LC);
+}
+
+bool ARMFastISel::SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode) {
+ EVT DestVT = TLI.getValueType(DL, I->getType(), true);
+
+ // We can get here in the case when we have a binary operation on a non-legal
+ // type and the target independent selector doesn't know how to handle it.
+ if (DestVT != MVT::i16 && DestVT != MVT::i8 && DestVT != MVT::i1)
+ return false;
+
+ unsigned Opc;
+ switch (ISDOpcode) {
+ default: return false;
+ case ISD::ADD:
+ Opc = isThumb2 ? ARM::t2ADDrr : ARM::ADDrr;
+ break;
+ case ISD::OR:
+ Opc = isThumb2 ? ARM::t2ORRrr : ARM::ORRrr;
+ break;
+ case ISD::SUB:
+ Opc = isThumb2 ? ARM::t2SUBrr : ARM::SUBrr;
+ break;
+ }
+
+ unsigned SrcReg1 = getRegForValue(I->getOperand(0));
+ if (SrcReg1 == 0) return false;
+
+ // TODO: Often the 2nd operand is an immediate, which can be encoded directly
+ // in the instruction, rather then materializing the value in a register.
+ unsigned SrcReg2 = getRegForValue(I->getOperand(1));
+ if (SrcReg2 == 0) return false;
+
+ unsigned ResultReg = createResultReg(&ARM::GPRnopcRegClass);
+ SrcReg1 = constrainOperandRegClass(TII.get(Opc), SrcReg1, 1);
+ SrcReg2 = constrainOperandRegClass(TII.get(Opc), SrcReg2, 2);
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(Opc), ResultReg)
+ .addReg(SrcReg1).addReg(SrcReg2));
+ updateValueMap(I, ResultReg);
+ return true;
+}
+
+bool ARMFastISel::SelectBinaryFPOp(const Instruction *I, unsigned ISDOpcode) {
+ EVT FPVT = TLI.getValueType(DL, I->getType(), true);
+ if (!FPVT.isSimple()) return false;
+ MVT VT = FPVT.getSimpleVT();
+
+ // FIXME: Support vector types where possible.
+ if (VT.isVector())
+ return false;
+
+ // We can get here in the case when we want to use NEON for our fp
+ // operations, but can't figure out how to. Just use the vfp instructions
+ // if we have them.
+ // FIXME: It'd be nice to use NEON instructions.
+ Type *Ty = I->getType();
+ bool isFloat = (Ty->isDoubleTy() || Ty->isFloatTy());
+ if (isFloat && !Subtarget->hasVFP2())
+ return false;
+
+ unsigned Opc;
+ bool is64bit = VT == MVT::f64 || VT == MVT::i64;
+ switch (ISDOpcode) {
+ default: return false;
+ case ISD::FADD:
+ Opc = is64bit ? ARM::VADDD : ARM::VADDS;
+ break;
+ case ISD::FSUB:
+ Opc = is64bit ? ARM::VSUBD : ARM::VSUBS;
+ break;
+ case ISD::FMUL:
+ Opc = is64bit ? ARM::VMULD : ARM::VMULS;
+ break;
+ }
+ unsigned Op1 = getRegForValue(I->getOperand(0));
+ if (Op1 == 0) return false;
+
+ unsigned Op2 = getRegForValue(I->getOperand(1));
+ if (Op2 == 0) return false;
+
+ unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT.SimpleTy));
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(Opc), ResultReg)
+ .addReg(Op1).addReg(Op2));
+ updateValueMap(I, ResultReg);
+ return true;
+}
+
+// Call Handling Code
+
+// This is largely taken directly from CCAssignFnForNode
+// TODO: We may not support all of this.
+CCAssignFn *ARMFastISel::CCAssignFnForCall(CallingConv::ID CC,
+ bool Return,
+ bool isVarArg) {
+ switch (CC) {
+ default:
+ llvm_unreachable("Unsupported calling convention");
+ case CallingConv::Fast:
+ if (Subtarget->hasVFP2() && !isVarArg) {
+ if (!Subtarget->isAAPCS_ABI())
+ return (Return ? RetFastCC_ARM_APCS : FastCC_ARM_APCS);
+ // For AAPCS ABI targets, just use VFP variant of the calling convention.
+ return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP);
+ }
+ // Fallthrough
+ case CallingConv::C:
+ // Use target triple & subtarget features to do actual dispatch.
+ if (Subtarget->isAAPCS_ABI()) {
+ if (Subtarget->hasVFP2() &&
+ TM.Options.FloatABIType == FloatABI::Hard && !isVarArg)
+ return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP);
+ else
+ return (Return ? RetCC_ARM_AAPCS: CC_ARM_AAPCS);
+ } else {
+ return (Return ? RetCC_ARM_APCS: CC_ARM_APCS);
+ }
+ case CallingConv::ARM_AAPCS_VFP:
+ if (!isVarArg)
+ return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP);
+ // Fall through to soft float variant, variadic functions don't
+ // use hard floating point ABI.
+ case CallingConv::ARM_AAPCS:
+ return (Return ? RetCC_ARM_AAPCS: CC_ARM_AAPCS);
+ case CallingConv::ARM_APCS:
+ return (Return ? RetCC_ARM_APCS: CC_ARM_APCS);
+ case CallingConv::GHC:
+ if (Return)
+ llvm_unreachable("Can't return in GHC call convention");
+ else
+ return CC_ARM_APCS_GHC;
+ }
+}
+
+bool ARMFastISel::ProcessCallArgs(SmallVectorImpl<Value*> &Args,
+ SmallVectorImpl<unsigned> &ArgRegs,
+ SmallVectorImpl<MVT> &ArgVTs,
+ SmallVectorImpl<ISD::ArgFlagsTy> &ArgFlags,
+ SmallVectorImpl<unsigned> &RegArgs,
+ CallingConv::ID CC,
+ unsigned &NumBytes,
+ bool isVarArg) {
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CC, isVarArg, *FuncInfo.MF, ArgLocs, *Context);
+ CCInfo.AnalyzeCallOperands(ArgVTs, ArgFlags,
+ CCAssignFnForCall(CC, false, isVarArg));
+
+ // Check that we can handle all of the arguments. If we can't, then bail out
+ // now before we add code to the MBB.
+ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+ CCValAssign &VA = ArgLocs[i];
+ MVT ArgVT = ArgVTs[VA.getValNo()];
+
+ // We don't handle NEON/vector parameters yet.
+ if (ArgVT.isVector() || ArgVT.getSizeInBits() > 64)
+ return false;
+
+ // Now copy/store arg to correct locations.
+ if (VA.isRegLoc() && !VA.needsCustom()) {
+ continue;
+ } else if (VA.needsCustom()) {
+ // TODO: We need custom lowering for vector (v2f64) args.
+ if (VA.getLocVT() != MVT::f64 ||
+ // TODO: Only handle register args for now.
+ !VA.isRegLoc() || !ArgLocs[++i].isRegLoc())
+ return false;
+ } else {
+ switch (ArgVT.SimpleTy) {
+ default:
+ return false;
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ break;
+ case MVT::f32:
+ if (!Subtarget->hasVFP2())
+ return false;
+ break;
+ case MVT::f64:
+ if (!Subtarget->hasVFP2())
+ return false;
+ break;
+ }
+ }
+ }
+
+ // At the point, we are able to handle the call's arguments in fast isel.
+
+ // Get a count of how many bytes are to be pushed on the stack.
+ NumBytes = CCInfo.getNextStackOffset();
+
+ // Issue CALLSEQ_START
+ unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(AdjStackDown))
+ .addImm(NumBytes));
+
+ // Process the args.
+ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+ CCValAssign &VA = ArgLocs[i];
+ const Value *ArgVal = Args[VA.getValNo()];
+ unsigned Arg = ArgRegs[VA.getValNo()];
+ MVT ArgVT = ArgVTs[VA.getValNo()];
+
+ assert((!ArgVT.isVector() && ArgVT.getSizeInBits() <= 64) &&
+ "We don't handle NEON/vector parameters yet.");
+
+ // Handle arg promotion, etc.
+ switch (VA.getLocInfo()) {
+ case CCValAssign::Full: break;
+ case CCValAssign::SExt: {
+ MVT DestVT = VA.getLocVT();
+ Arg = ARMEmitIntExt(ArgVT, Arg, DestVT, /*isZExt*/false);
+ assert (Arg != 0 && "Failed to emit a sext");
+ ArgVT = DestVT;
+ break;
+ }
+ case CCValAssign::AExt:
+ // Intentional fall-through. Handle AExt and ZExt.
+ case CCValAssign::ZExt: {
+ MVT DestVT = VA.getLocVT();
+ Arg = ARMEmitIntExt(ArgVT, Arg, DestVT, /*isZExt*/true);
+ assert (Arg != 0 && "Failed to emit a zext");
+ ArgVT = DestVT;
+ break;
+ }
+ case CCValAssign::BCvt: {
+ unsigned BC = fastEmit_r(ArgVT, VA.getLocVT(), ISD::BITCAST, Arg,
+ /*TODO: Kill=*/false);
+ assert(BC != 0 && "Failed to emit a bitcast!");
+ Arg = BC;
+ ArgVT = VA.getLocVT();
+ break;
+ }
+ default: llvm_unreachable("Unknown arg promotion!");
+ }
+
+ // Now copy/store arg to correct locations.
+ if (VA.isRegLoc() && !VA.needsCustom()) {
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::COPY), VA.getLocReg()).addReg(Arg);
+ RegArgs.push_back(VA.getLocReg());
+ } else if (VA.needsCustom()) {
+ // TODO: We need custom lowering for vector (v2f64) args.
+ assert(VA.getLocVT() == MVT::f64 &&
+ "Custom lowering for v2f64 args not available");
+
+ CCValAssign &NextVA = ArgLocs[++i];
+
+ assert(VA.isRegLoc() && NextVA.isRegLoc() &&
+ "We only handle register args!");
+
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(ARM::VMOVRRD), VA.getLocReg())
+ .addReg(NextVA.getLocReg(), RegState::Define)
+ .addReg(Arg));
+ RegArgs.push_back(VA.getLocReg());
+ RegArgs.push_back(NextVA.getLocReg());
+ } else {
+ assert(VA.isMemLoc());
+ // Need to store on the stack.
+
+ // Don't emit stores for undef values.
+ if (isa<UndefValue>(ArgVal))
+ continue;
+
+ Address Addr;
+ Addr.BaseType = Address::RegBase;
+ Addr.Base.Reg = ARM::SP;
+ Addr.Offset = VA.getLocMemOffset();
+
+ bool EmitRet = ARMEmitStore(ArgVT, Arg, Addr); (void)EmitRet;
+ assert(EmitRet && "Could not emit a store for argument!");
+ }
+ }
+
+ return true;
+}
+
+bool ARMFastISel::FinishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs,
+ const Instruction *I, CallingConv::ID CC,
+ unsigned &NumBytes, bool isVarArg) {
+ // Issue CALLSEQ_END
+ unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(AdjStackUp))
+ .addImm(NumBytes).addImm(0));
+
+ // Now the return value.
+ if (RetVT != MVT::isVoid) {
+ SmallVector<CCValAssign, 16> RVLocs;
+ CCState CCInfo(CC, isVarArg, *FuncInfo.MF, RVLocs, *Context);
+ CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC, true, isVarArg));
+
+ // Copy all of the result registers out of their specified physreg.
+ if (RVLocs.size() == 2 && RetVT == MVT::f64) {
+ // For this move we copy into two registers and then move into the
+ // double fp reg we want.
+ MVT DestVT = RVLocs[0].getValVT();
+ const TargetRegisterClass* DstRC = TLI.getRegClassFor(DestVT);
+ unsigned ResultReg = createResultReg(DstRC);
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(ARM::VMOVDRR), ResultReg)
+ .addReg(RVLocs[0].getLocReg())
+ .addReg(RVLocs[1].getLocReg()));
+
+ UsedRegs.push_back(RVLocs[0].getLocReg());
+ UsedRegs.push_back(RVLocs[1].getLocReg());
+
+ // Finally update the result.
+ updateValueMap(I, ResultReg);
+ } else {
+ assert(RVLocs.size() == 1 &&"Can't handle non-double multi-reg retvals!");
+ MVT CopyVT = RVLocs[0].getValVT();
+
+ // Special handling for extended integers.
+ if (RetVT == MVT::i1 || RetVT == MVT::i8 || RetVT == MVT::i16)
+ CopyVT = MVT::i32;
+
+ const TargetRegisterClass* DstRC = TLI.getRegClassFor(CopyVT);
+
+ unsigned ResultReg = createResultReg(DstRC);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::COPY),
+ ResultReg).addReg(RVLocs[0].getLocReg());
+ UsedRegs.push_back(RVLocs[0].getLocReg());
+
+ // Finally update the result.
+ updateValueMap(I, ResultReg);
+ }
+ }
+
+ return true;
+}
+
+bool ARMFastISel::SelectRet(const Instruction *I) {
+ const ReturnInst *Ret = cast<ReturnInst>(I);
+ const Function &F = *I->getParent()->getParent();
+
+ if (!FuncInfo.CanLowerReturn)
+ return false;
+
+ // Build a list of return value registers.
+ SmallVector<unsigned, 4> RetRegs;
+
+ CallingConv::ID CC = F.getCallingConv();
+ if (Ret->getNumOperands() > 0) {
+ SmallVector<ISD::OutputArg, 4> Outs;
+ GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI, DL);
+
+ // Analyze operands of the call, assigning locations to each operand.
+ SmallVector<CCValAssign, 16> ValLocs;
+ CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, I->getContext());
+ CCInfo.AnalyzeReturn(Outs, CCAssignFnForCall(CC, true /* is Ret */,
+ F.isVarArg()));
+
+ const Value *RV = Ret->getOperand(0);
+ unsigned Reg = getRegForValue(RV);
+ if (Reg == 0)
+ return false;
+
+ // Only handle a single return value for now.
+ if (ValLocs.size() != 1)
+ return false;
+
+ CCValAssign &VA = ValLocs[0];
+
+ // Don't bother handling odd stuff for now.
+ if (VA.getLocInfo() != CCValAssign::Full)
+ return false;
+ // Only handle register returns for now.
+ if (!VA.isRegLoc())
+ return false;
+
+ unsigned SrcReg = Reg + VA.getValNo();
+ EVT RVEVT = TLI.getValueType(DL, RV->getType());
+ if (!RVEVT.isSimple()) return false;
+ MVT RVVT = RVEVT.getSimpleVT();
+ MVT DestVT = VA.getValVT();
+ // Special handling for extended integers.
+ if (RVVT != DestVT) {
+ if (RVVT != MVT::i1 && RVVT != MVT::i8 && RVVT != MVT::i16)
+ return false;
+
+ assert(DestVT == MVT::i32 && "ARM should always ext to i32");
+
+ // Perform extension if flagged as either zext or sext. Otherwise, do
+ // nothing.
+ if (Outs[0].Flags.isZExt() || Outs[0].Flags.isSExt()) {
+ SrcReg = ARMEmitIntExt(RVVT, SrcReg, DestVT, Outs[0].Flags.isZExt());
+ if (SrcReg == 0) return false;
+ }
+ }
+
+ // Make the copy.
+ unsigned DstReg = VA.getLocReg();
+ const TargetRegisterClass* SrcRC = MRI.getRegClass(SrcReg);
+ // Avoid a cross-class copy. This is very unlikely.
+ if (!SrcRC->contains(DstReg))
+ return false;
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::COPY), DstReg).addReg(SrcReg);
+
+ // Add register to return instruction.
+ RetRegs.push_back(VA.getLocReg());
+ }
+
+ unsigned RetOpc = isThumb2 ? ARM::tBX_RET : ARM::BX_RET;
+ MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(RetOpc));
+ AddOptionalDefs(MIB);
+ for (unsigned i = 0, e = RetRegs.size(); i != e; ++i)
+ MIB.addReg(RetRegs[i], RegState::Implicit);
+ return true;
+}
+
+unsigned ARMFastISel::ARMSelectCallOp(bool UseReg) {
+ if (UseReg)
+ return isThumb2 ? ARM::tBLXr : ARM::BLX;
+ else
+ return isThumb2 ? ARM::tBL : ARM::BL;
+}
+
+unsigned ARMFastISel::getLibcallReg(const Twine &Name) {
+ // Manually compute the global's type to avoid building it when unnecessary.
+ Type *GVTy = Type::getInt32PtrTy(*Context, /*AS=*/0);
+ EVT LCREVT = TLI.getValueType(DL, GVTy);
+ if (!LCREVT.isSimple()) return 0;
+
+ GlobalValue *GV = new GlobalVariable(M, Type::getInt32Ty(*Context), false,
+ GlobalValue::ExternalLinkage, nullptr,
+ Name);
+ assert(GV->getType() == GVTy && "We miscomputed the type for the global!");
+ return ARMMaterializeGV(GV, LCREVT.getSimpleVT());
+}
+
+// A quick function that will emit a call for a named libcall in F with the
+// vector of passed arguments for the Instruction in I. We can assume that we
+// can emit a call for any libcall we can produce. This is an abridged version
+// of the full call infrastructure since we won't need to worry about things
+// like computed function pointers or strange arguments at call sites.
+// TODO: Try to unify this and the normal call bits for ARM, then try to unify
+// with X86.
+bool ARMFastISel::ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call) {
+ CallingConv::ID CC = TLI.getLibcallCallingConv(Call);
+
+ // Handle *simple* calls for now.
+ Type *RetTy = I->getType();
+ MVT RetVT;
+ if (RetTy->isVoidTy())
+ RetVT = MVT::isVoid;
+ else if (!isTypeLegal(RetTy, RetVT))
+ return false;
+
+ // Can't handle non-double multi-reg retvals.
+ if (RetVT != MVT::isVoid && RetVT != MVT::i32) {
+ SmallVector<CCValAssign, 16> RVLocs;
+ CCState CCInfo(CC, false, *FuncInfo.MF, RVLocs, *Context);
+ CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC, true, false));
+ if (RVLocs.size() >= 2 && RetVT != MVT::f64)
+ return false;
+ }
+
+ // Set up the argument vectors.
+ SmallVector<Value*, 8> Args;
+ SmallVector<unsigned, 8> ArgRegs;
+ SmallVector<MVT, 8> ArgVTs;
+ SmallVector<ISD::ArgFlagsTy, 8> ArgFlags;
+ Args.reserve(I->getNumOperands());
+ ArgRegs.reserve(I->getNumOperands());
+ ArgVTs.reserve(I->getNumOperands());
+ ArgFlags.reserve(I->getNumOperands());
+ for (unsigned i = 0; i < I->getNumOperands(); ++i) {
+ Value *Op = I->getOperand(i);
+ unsigned Arg = getRegForValue(Op);
+ if (Arg == 0) return false;
+
+ Type *ArgTy = Op->getType();
+ MVT ArgVT;
+ if (!isTypeLegal(ArgTy, ArgVT)) return false;
+
+ ISD::ArgFlagsTy Flags;
+ unsigned OriginalAlignment = DL.getABITypeAlignment(ArgTy);
+ Flags.setOrigAlign(OriginalAlignment);
+
+ Args.push_back(Op);
+ ArgRegs.push_back(Arg);
+ ArgVTs.push_back(ArgVT);
+ ArgFlags.push_back(Flags);
+ }
+
+ // Handle the arguments now that we've gotten them.
+ SmallVector<unsigned, 4> RegArgs;
+ unsigned NumBytes;
+ if (!ProcessCallArgs(Args, ArgRegs, ArgVTs, ArgFlags,
+ RegArgs, CC, NumBytes, false))
+ return false;
+
+ unsigned CalleeReg = 0;
+ if (Subtarget->genLongCalls()) {
+ CalleeReg = getLibcallReg(TLI.getLibcallName(Call));
+ if (CalleeReg == 0) return false;
+ }
+
+ // Issue the call.
+ unsigned CallOpc = ARMSelectCallOp(Subtarget->genLongCalls());
+ MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
+ DbgLoc, TII.get(CallOpc));
+ // BL / BLX don't take a predicate, but tBL / tBLX do.
+ if (isThumb2)
+ AddDefaultPred(MIB);
+ if (Subtarget->genLongCalls())
+ MIB.addReg(CalleeReg);
+ else
+ MIB.addExternalSymbol(TLI.getLibcallName(Call));
+
+ // Add implicit physical register uses to the call.
+ for (unsigned i = 0, e = RegArgs.size(); i != e; ++i)
+ MIB.addReg(RegArgs[i], RegState::Implicit);
+
+ // Add a register mask with the call-preserved registers.
+ // Proper defs for return values will be added by setPhysRegsDeadExcept().
+ MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC));
+
+ // Finish off the call including any return values.
+ SmallVector<unsigned, 4> UsedRegs;
+ if (!FinishCall(RetVT, UsedRegs, I, CC, NumBytes, false)) return false;
+
+ // Set all unused physreg defs as dead.
+ static_cast<MachineInstr *>(MIB)->setPhysRegsDeadExcept(UsedRegs, TRI);
+
+ return true;
+}
+
+bool ARMFastISel::SelectCall(const Instruction *I,
+ const char *IntrMemName = nullptr) {
+ const CallInst *CI = cast<CallInst>(I);
+ const Value *Callee = CI->getCalledValue();
+
+ // Can't handle inline asm.
+ if (isa<InlineAsm>(Callee)) return false;
+
+ // Allow SelectionDAG isel to handle tail calls.
+ if (CI->isTailCall()) return false;
+
+ // Check the calling convention.
+ ImmutableCallSite CS(CI);
+ CallingConv::ID CC = CS.getCallingConv();
+
+ // TODO: Avoid some calling conventions?
+
+ PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType());
+ FunctionType *FTy = cast<FunctionType>(PT->getElementType());
+ bool isVarArg = FTy->isVarArg();
+
+ // Handle *simple* calls for now.
+ Type *RetTy = I->getType();
+ MVT RetVT;
+ if (RetTy->isVoidTy())
+ RetVT = MVT::isVoid;
+ else if (!isTypeLegal(RetTy, RetVT) && RetVT != MVT::i16 &&
+ RetVT != MVT::i8 && RetVT != MVT::i1)
+ return false;
+
+ // Can't handle non-double multi-reg retvals.
+ if (RetVT != MVT::isVoid && RetVT != MVT::i1 && RetVT != MVT::i8 &&
+ RetVT != MVT::i16 && RetVT != MVT::i32) {
+ SmallVector<CCValAssign, 16> RVLocs;
+ CCState CCInfo(CC, isVarArg, *FuncInfo.MF, RVLocs, *Context);
+ CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC, true, isVarArg));
+ if (RVLocs.size() >= 2 && RetVT != MVT::f64)
+ return false;
+ }
+
+ // Set up the argument vectors.
+ SmallVector<Value*, 8> Args;
+ SmallVector<unsigned, 8> ArgRegs;
+ SmallVector<MVT, 8> ArgVTs;
+ SmallVector<ISD::ArgFlagsTy, 8> ArgFlags;
+ unsigned arg_size = CS.arg_size();
+ Args.reserve(arg_size);
+ ArgRegs.reserve(arg_size);
+ ArgVTs.reserve(arg_size);
+ ArgFlags.reserve(arg_size);
+ for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end();
+ i != e; ++i) {
+ // If we're lowering a memory intrinsic instead of a regular call, skip the
+ // last two arguments, which shouldn't be passed to the underlying function.
+ if (IntrMemName && e-i <= 2)
+ break;
+
+ ISD::ArgFlagsTy Flags;
+ unsigned AttrInd = i - CS.arg_begin() + 1;
+ if (CS.paramHasAttr(AttrInd, Attribute::SExt))
+ Flags.setSExt();
+ if (CS.paramHasAttr(AttrInd, Attribute::ZExt))
+ Flags.setZExt();
+
+ // FIXME: Only handle *easy* calls for now.
+ if (CS.paramHasAttr(AttrInd, Attribute::InReg) ||
+ CS.paramHasAttr(AttrInd, Attribute::StructRet) ||
+ CS.paramHasAttr(AttrInd, Attribute::Nest) ||
+ CS.paramHasAttr(AttrInd, Attribute::ByVal))
+ return false;
+
+ Type *ArgTy = (*i)->getType();
+ MVT ArgVT;
+ if (!isTypeLegal(ArgTy, ArgVT) && ArgVT != MVT::i16 && ArgVT != MVT::i8 &&
+ ArgVT != MVT::i1)
+ return false;
+
+ unsigned Arg = getRegForValue(*i);
+ if (Arg == 0)
+ return false;
+
+ unsigned OriginalAlignment = DL.getABITypeAlignment(ArgTy);
+ Flags.setOrigAlign(OriginalAlignment);
+
+ Args.push_back(*i);
+ ArgRegs.push_back(Arg);
+ ArgVTs.push_back(ArgVT);
+ ArgFlags.push_back(Flags);
+ }
+
+ // Handle the arguments now that we've gotten them.
+ SmallVector<unsigned, 4> RegArgs;
+ unsigned NumBytes;
+ if (!ProcessCallArgs(Args, ArgRegs, ArgVTs, ArgFlags,
+ RegArgs, CC, NumBytes, isVarArg))
+ return false;
+
+ bool UseReg = false;
+ const GlobalValue *GV = dyn_cast<GlobalValue>(Callee);
+ if (!GV || Subtarget->genLongCalls()) UseReg = true;
+
+ unsigned CalleeReg = 0;
+ if (UseReg) {
+ if (IntrMemName)
+ CalleeReg = getLibcallReg(IntrMemName);
+ else
+ CalleeReg = getRegForValue(Callee);
+
+ if (CalleeReg == 0) return false;
+ }
+
+ // Issue the call.
+ unsigned CallOpc = ARMSelectCallOp(UseReg);
+ MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
+ DbgLoc, TII.get(CallOpc));
+
+ unsigned char OpFlags = 0;
+
+ // Add MO_PLT for global address or external symbol in the PIC relocation
+ // model.
+ if (Subtarget->isTargetELF() && TM.getRelocationModel() == Reloc::PIC_)
+ OpFlags = ARMII::MO_PLT;
+
+ // ARM calls don't take a predicate, but tBL / tBLX do.
+ if(isThumb2)
+ AddDefaultPred(MIB);
+ if (UseReg)
+ MIB.addReg(CalleeReg);
+ else if (!IntrMemName)
+ MIB.addGlobalAddress(GV, 0, OpFlags);
+ else
+ MIB.addExternalSymbol(IntrMemName, OpFlags);
+
+ // Add implicit physical register uses to the call.
+ for (unsigned i = 0, e = RegArgs.size(); i != e; ++i)
+ MIB.addReg(RegArgs[i], RegState::Implicit);
+
+ // Add a register mask with the call-preserved registers.
+ // Proper defs for return values will be added by setPhysRegsDeadExcept().
+ MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC));
+
+ // Finish off the call including any return values.
+ SmallVector<unsigned, 4> UsedRegs;
+ if (!FinishCall(RetVT, UsedRegs, I, CC, NumBytes, isVarArg))
+ return false;
+
+ // Set all unused physreg defs as dead.
+ static_cast<MachineInstr *>(MIB)->setPhysRegsDeadExcept(UsedRegs, TRI);
+
+ return true;
+}
+
+bool ARMFastISel::ARMIsMemCpySmall(uint64_t Len) {
+ return Len <= 16;
+}
+
+bool ARMFastISel::ARMTryEmitSmallMemCpy(Address Dest, Address Src,
+ uint64_t Len, unsigned Alignment) {
+ // Make sure we don't bloat code by inlining very large memcpy's.
+ if (!ARMIsMemCpySmall(Len))
+ return false;
+
+ while (Len) {
+ MVT VT;
+ if (!Alignment || Alignment >= 4) {
+ if (Len >= 4)
+ VT = MVT::i32;
+ else if (Len >= 2)
+ VT = MVT::i16;
+ else {
+ assert (Len == 1 && "Expected a length of 1!");
+ VT = MVT::i8;
+ }
+ } else {
+ // Bound based on alignment.
+ if (Len >= 2 && Alignment == 2)
+ VT = MVT::i16;
+ else {
+ VT = MVT::i8;
+ }
+ }
+
+ bool RV;
+ unsigned ResultReg;
+ RV = ARMEmitLoad(VT, ResultReg, Src);
+ assert (RV == true && "Should be able to handle this load.");
+ RV = ARMEmitStore(VT, ResultReg, Dest);
+ assert (RV == true && "Should be able to handle this store.");
+ (void)RV;
+
+ unsigned Size = VT.getSizeInBits()/8;
+ Len -= Size;
+ Dest.Offset += Size;
+ Src.Offset += Size;
+ }
+
+ return true;
+}
+
+bool ARMFastISel::SelectIntrinsicCall(const IntrinsicInst &I) {
+ // FIXME: Handle more intrinsics.
+ switch (I.getIntrinsicID()) {
+ default: return false;
+ case Intrinsic::frameaddress: {
+ MachineFrameInfo *MFI = FuncInfo.MF->getFrameInfo();
+ MFI->setFrameAddressIsTaken(true);
+
+ unsigned LdrOpc = isThumb2 ? ARM::t2LDRi12 : ARM::LDRi12;
+ const TargetRegisterClass *RC = isThumb2 ? &ARM::tGPRRegClass
+ : &ARM::GPRRegClass;
+
+ const ARMBaseRegisterInfo *RegInfo =
+ static_cast<const ARMBaseRegisterInfo *>(Subtarget->getRegisterInfo());
+ unsigned FramePtr = RegInfo->getFrameRegister(*(FuncInfo.MF));
+ unsigned SrcReg = FramePtr;
+
+ // Recursively load frame address
+ // ldr r0 [fp]
+ // ldr r0 [r0]
+ // ldr r0 [r0]
+ // ...
+ unsigned DestReg;
+ unsigned Depth = cast<ConstantInt>(I.getOperand(0))->getZExtValue();
+ while (Depth--) {
+ DestReg = createResultReg(RC);
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(LdrOpc), DestReg)
+ .addReg(SrcReg).addImm(0));
+ SrcReg = DestReg;
+ }
+ updateValueMap(&I, SrcReg);
+ return true;
+ }
+ case Intrinsic::memcpy:
+ case Intrinsic::memmove: {
+ const MemTransferInst &MTI = cast<MemTransferInst>(I);
+ // Don't handle volatile.
+ if (MTI.isVolatile())
+ return false;
+
+ // Disable inlining for memmove before calls to ComputeAddress. Otherwise,
+ // we would emit dead code because we don't currently handle memmoves.
+ bool isMemCpy = (I.getIntrinsicID() == Intrinsic::memcpy);
+ if (isa<ConstantInt>(MTI.getLength()) && isMemCpy) {
+ // Small memcpy's are common enough that we want to do them without a call
+ // if possible.
+ uint64_t Len = cast<ConstantInt>(MTI.getLength())->getZExtValue();
+ if (ARMIsMemCpySmall(Len)) {
+ Address Dest, Src;
+ if (!ARMComputeAddress(MTI.getRawDest(), Dest) ||
+ !ARMComputeAddress(MTI.getRawSource(), Src))
+ return false;
+ unsigned Alignment = MTI.getAlignment();
+ if (ARMTryEmitSmallMemCpy(Dest, Src, Len, Alignment))
+ return true;
+ }
+ }
+
+ if (!MTI.getLength()->getType()->isIntegerTy(32))
+ return false;
+
+ if (MTI.getSourceAddressSpace() > 255 || MTI.getDestAddressSpace() > 255)
+ return false;
+
+ const char *IntrMemName = isa<MemCpyInst>(I) ? "memcpy" : "memmove";
+ return SelectCall(&I, IntrMemName);
+ }
+ case Intrinsic::memset: {
+ const MemSetInst &MSI = cast<MemSetInst>(I);
+ // Don't handle volatile.
+ if (MSI.isVolatile())
+ return false;
+
+ if (!MSI.getLength()->getType()->isIntegerTy(32))
+ return false;
+
+ if (MSI.getDestAddressSpace() > 255)
+ return false;
+
+ return SelectCall(&I, "memset");
+ }
+ case Intrinsic::trap: {
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(
+ Subtarget->useNaClTrap() ? ARM::TRAPNaCl : ARM::TRAP));
+ return true;
+ }
+ }
+}
+
+bool ARMFastISel::SelectTrunc(const Instruction *I) {
+ // The high bits for a type smaller than the register size are assumed to be
+ // undefined.
+ Value *Op = I->getOperand(0);
+
+ EVT SrcVT, DestVT;
+ SrcVT = TLI.getValueType(DL, Op->getType(), true);
+ DestVT = TLI.getValueType(DL, I->getType(), true);
+
+ if (SrcVT != MVT::i32 && SrcVT != MVT::i16 && SrcVT != MVT::i8)
+ return false;
+ if (DestVT != MVT::i16 && DestVT != MVT::i8 && DestVT != MVT::i1)
+ return false;
+
+ unsigned SrcReg = getRegForValue(Op);
+ if (!SrcReg) return false;
+
+ // Because the high bits are undefined, a truncate doesn't generate
+ // any code.
+ updateValueMap(I, SrcReg);
+ return true;
+}
+
+unsigned ARMFastISel::ARMEmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
+ bool isZExt) {
+ if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8)
+ return 0;
+ if (SrcVT != MVT::i16 && SrcVT != MVT::i8 && SrcVT != MVT::i1)
+ return 0;
+
+ // Table of which combinations can be emitted as a single instruction,
+ // and which will require two.
+ static const uint8_t isSingleInstrTbl[3][2][2][2] = {
+ // ARM Thumb
+ // !hasV6Ops hasV6Ops !hasV6Ops hasV6Ops
+ // ext: s z s z s z s z
+ /* 1 */ { { { 0, 1 }, { 0, 1 } }, { { 0, 0 }, { 0, 1 } } },
+ /* 8 */ { { { 0, 1 }, { 1, 1 } }, { { 0, 0 }, { 1, 1 } } },
+ /* 16 */ { { { 0, 0 }, { 1, 1 } }, { { 0, 0 }, { 1, 1 } } }
+ };
+
+ // Target registers for:
+ // - For ARM can never be PC.
+ // - For 16-bit Thumb are restricted to lower 8 registers.
+ // - For 32-bit Thumb are restricted to non-SP and non-PC.
+ static const TargetRegisterClass *RCTbl[2][2] = {
+ // Instructions: Two Single
+ /* ARM */ { &ARM::GPRnopcRegClass, &ARM::GPRnopcRegClass },
+ /* Thumb */ { &ARM::tGPRRegClass, &ARM::rGPRRegClass }
+ };
+
+ // Table governing the instruction(s) to be emitted.
+ static const struct InstructionTable {
+ uint32_t Opc : 16;
+ uint32_t hasS : 1; // Some instructions have an S bit, always set it to 0.
+ uint32_t Shift : 7; // For shift operand addressing mode, used by MOVsi.
+ uint32_t Imm : 8; // All instructions have either a shift or a mask.
+ } IT[2][2][3][2] = {
+ { // Two instructions (first is left shift, second is in this table).
+ { // ARM Opc S Shift Imm
+ /* 1 bit sext */ { { ARM::MOVsi , 1, ARM_AM::asr , 31 },
+ /* 1 bit zext */ { ARM::MOVsi , 1, ARM_AM::lsr , 31 } },
+ /* 8 bit sext */ { { ARM::MOVsi , 1, ARM_AM::asr , 24 },
+ /* 8 bit zext */ { ARM::MOVsi , 1, ARM_AM::lsr , 24 } },
+ /* 16 bit sext */ { { ARM::MOVsi , 1, ARM_AM::asr , 16 },
+ /* 16 bit zext */ { ARM::MOVsi , 1, ARM_AM::lsr , 16 } }
+ },
+ { // Thumb Opc S Shift Imm
+ /* 1 bit sext */ { { ARM::tASRri , 0, ARM_AM::no_shift, 31 },
+ /* 1 bit zext */ { ARM::tLSRri , 0, ARM_AM::no_shift, 31 } },
+ /* 8 bit sext */ { { ARM::tASRri , 0, ARM_AM::no_shift, 24 },
+ /* 8 bit zext */ { ARM::tLSRri , 0, ARM_AM::no_shift, 24 } },
+ /* 16 bit sext */ { { ARM::tASRri , 0, ARM_AM::no_shift, 16 },
+ /* 16 bit zext */ { ARM::tLSRri , 0, ARM_AM::no_shift, 16 } }
+ }
+ },
+ { // Single instruction.
+ { // ARM Opc S Shift Imm
+ /* 1 bit sext */ { { ARM::KILL , 0, ARM_AM::no_shift, 0 },
+ /* 1 bit zext */ { ARM::ANDri , 1, ARM_AM::no_shift, 1 } },
+ /* 8 bit sext */ { { ARM::SXTB , 0, ARM_AM::no_shift, 0 },
+ /* 8 bit zext */ { ARM::ANDri , 1, ARM_AM::no_shift, 255 } },
+ /* 16 bit sext */ { { ARM::SXTH , 0, ARM_AM::no_shift, 0 },
+ /* 16 bit zext */ { ARM::UXTH , 0, ARM_AM::no_shift, 0 } }
+ },
+ { // Thumb Opc S Shift Imm
+ /* 1 bit sext */ { { ARM::KILL , 0, ARM_AM::no_shift, 0 },
+ /* 1 bit zext */ { ARM::t2ANDri, 1, ARM_AM::no_shift, 1 } },
+ /* 8 bit sext */ { { ARM::t2SXTB , 0, ARM_AM::no_shift, 0 },
+ /* 8 bit zext */ { ARM::t2ANDri, 1, ARM_AM::no_shift, 255 } },
+ /* 16 bit sext */ { { ARM::t2SXTH , 0, ARM_AM::no_shift, 0 },
+ /* 16 bit zext */ { ARM::t2UXTH , 0, ARM_AM::no_shift, 0 } }
+ }
+ }
+ };
+
+ unsigned SrcBits = SrcVT.getSizeInBits();
+ unsigned DestBits = DestVT.getSizeInBits();
+ (void) DestBits;
+ assert((SrcBits < DestBits) && "can only extend to larger types");
+ assert((DestBits == 32 || DestBits == 16 || DestBits == 8) &&
+ "other sizes unimplemented");
+ assert((SrcBits == 16 || SrcBits == 8 || SrcBits == 1) &&
+ "other sizes unimplemented");
+
+ bool hasV6Ops = Subtarget->hasV6Ops();
+ unsigned Bitness = SrcBits / 8; // {1,8,16}=>{0,1,2}
+ assert((Bitness < 3) && "sanity-check table bounds");
+
+ bool isSingleInstr = isSingleInstrTbl[Bitness][isThumb2][hasV6Ops][isZExt];
+ const TargetRegisterClass *RC = RCTbl[isThumb2][isSingleInstr];
+ const InstructionTable *ITP = &IT[isSingleInstr][isThumb2][Bitness][isZExt];
+ unsigned Opc = ITP->Opc;
+ assert(ARM::KILL != Opc && "Invalid table entry");
+ unsigned hasS = ITP->hasS;
+ ARM_AM::ShiftOpc Shift = (ARM_AM::ShiftOpc) ITP->Shift;
+ assert(((Shift == ARM_AM::no_shift) == (Opc != ARM::MOVsi)) &&
+ "only MOVsi has shift operand addressing mode");
+ unsigned Imm = ITP->Imm;
+
+ // 16-bit Thumb instructions always set CPSR (unless they're in an IT block).
+ bool setsCPSR = &ARM::tGPRRegClass == RC;
+ unsigned LSLOpc = isThumb2 ? ARM::tLSLri : ARM::MOVsi;
+ unsigned ResultReg;
+ // MOVsi encodes shift and immediate in shift operand addressing mode.
+ // The following condition has the same value when emitting two
+ // instruction sequences: both are shifts.
+ bool ImmIsSO = (Shift != ARM_AM::no_shift);
+
+ // Either one or two instructions are emitted.
+ // They're always of the form:
+ // dst = in OP imm
+ // CPSR is set only by 16-bit Thumb instructions.
+ // Predicate, if any, is AL.
+ // S bit, if available, is always 0.
+ // When two are emitted the first's result will feed as the second's input,
+ // that value is then dead.
+ unsigned NumInstrsEmitted = isSingleInstr ? 1 : 2;
+ for (unsigned Instr = 0; Instr != NumInstrsEmitted; ++Instr) {
+ ResultReg = createResultReg(RC);
+ bool isLsl = (0 == Instr) && !isSingleInstr;
+ unsigned Opcode = isLsl ? LSLOpc : Opc;
+ ARM_AM::ShiftOpc ShiftAM = isLsl ? ARM_AM::lsl : Shift;
+ unsigned ImmEnc = ImmIsSO ? ARM_AM::getSORegOpc(ShiftAM, Imm) : Imm;
+ bool isKill = 1 == Instr;
+ MachineInstrBuilder MIB = BuildMI(
+ *FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opcode), ResultReg);
+ if (setsCPSR)
+ MIB.addReg(ARM::CPSR, RegState::Define);
+ SrcReg = constrainOperandRegClass(TII.get(Opcode), SrcReg, 1 + setsCPSR);
+ AddDefaultPred(MIB.addReg(SrcReg, isKill * RegState::Kill).addImm(ImmEnc));
+ if (hasS)
+ AddDefaultCC(MIB);
+ // Second instruction consumes the first's result.
+ SrcReg = ResultReg;
+ }
+
+ return ResultReg;
+}
+
+bool ARMFastISel::SelectIntExt(const Instruction *I) {
+ // On ARM, in general, integer casts don't involve legal types; this code
+ // handles promotable integers.
+ Type *DestTy = I->getType();
+ Value *Src = I->getOperand(0);
+ Type *SrcTy = Src->getType();
+
+ bool isZExt = isa<ZExtInst>(I);
+ unsigned SrcReg = getRegForValue(Src);
+ if (!SrcReg) return false;
+
+ EVT SrcEVT, DestEVT;
+ SrcEVT = TLI.getValueType(DL, SrcTy, true);
+ DestEVT = TLI.getValueType(DL, DestTy, true);
+ if (!SrcEVT.isSimple()) return false;
+ if (!DestEVT.isSimple()) return false;
+
+ MVT SrcVT = SrcEVT.getSimpleVT();
+ MVT DestVT = DestEVT.getSimpleVT();
+ unsigned ResultReg = ARMEmitIntExt(SrcVT, SrcReg, DestVT, isZExt);
+ if (ResultReg == 0) return false;
+ updateValueMap(I, ResultReg);
+ return true;
+}
+
+bool ARMFastISel::SelectShift(const Instruction *I,
+ ARM_AM::ShiftOpc ShiftTy) {
+ // We handle thumb2 mode by target independent selector
+ // or SelectionDAG ISel.
+ if (isThumb2)
+ return false;
+
+ // Only handle i32 now.
+ EVT DestVT = TLI.getValueType(DL, I->getType(), true);
+ if (DestVT != MVT::i32)
+ return false;
+
+ unsigned Opc = ARM::MOVsr;
+ unsigned ShiftImm;
+ Value *Src2Value = I->getOperand(1);
+ if (const ConstantInt *CI = dyn_cast<ConstantInt>(Src2Value)) {
+ ShiftImm = CI->getZExtValue();
+
+ // Fall back to selection DAG isel if the shift amount
+ // is zero or greater than the width of the value type.
+ if (ShiftImm == 0 || ShiftImm >=32)
+ return false;
+
+ Opc = ARM::MOVsi;
+ }
+
+ Value *Src1Value = I->getOperand(0);
+ unsigned Reg1 = getRegForValue(Src1Value);
+ if (Reg1 == 0) return false;
+
+ unsigned Reg2 = 0;
+ if (Opc == ARM::MOVsr) {
+ Reg2 = getRegForValue(Src2Value);
+ if (Reg2 == 0) return false;
+ }
+
+ unsigned ResultReg = createResultReg(&ARM::GPRnopcRegClass);
+ if(ResultReg == 0) return false;
+
+ MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(Opc), ResultReg)
+ .addReg(Reg1);
+
+ if (Opc == ARM::MOVsi)
+ MIB.addImm(ARM_AM::getSORegOpc(ShiftTy, ShiftImm));
+ else if (Opc == ARM::MOVsr) {
+ MIB.addReg(Reg2);
+ MIB.addImm(ARM_AM::getSORegOpc(ShiftTy, 0));
+ }
+
+ AddOptionalDefs(MIB);
+ updateValueMap(I, ResultReg);
+ return true;
+}
+
+// TODO: SoftFP support.
+bool ARMFastISel::fastSelectInstruction(const Instruction *I) {
+
+ switch (I->getOpcode()) {
+ case Instruction::Load:
+ return SelectLoad(I);
+ case Instruction::Store:
+ return SelectStore(I);
+ case Instruction::Br:
+ return SelectBranch(I);
+ case Instruction::IndirectBr:
+ return SelectIndirectBr(I);
+ case Instruction::ICmp:
+ case Instruction::FCmp:
+ return SelectCmp(I);
+ case Instruction::FPExt:
+ return SelectFPExt(I);
+ case Instruction::FPTrunc:
+ return SelectFPTrunc(I);
+ case Instruction::SIToFP:
+ return SelectIToFP(I, /*isSigned*/ true);
+ case Instruction::UIToFP:
+ return SelectIToFP(I, /*isSigned*/ false);
+ case Instruction::FPToSI:
+ return SelectFPToI(I, /*isSigned*/ true);
+ case Instruction::FPToUI:
+ return SelectFPToI(I, /*isSigned*/ false);
+ case Instruction::Add:
+ return SelectBinaryIntOp(I, ISD::ADD);
+ case Instruction::Or:
+ return SelectBinaryIntOp(I, ISD::OR);
+ case Instruction::Sub:
+ return SelectBinaryIntOp(I, ISD::SUB);
+ case Instruction::FAdd:
+ return SelectBinaryFPOp(I, ISD::FADD);
+ case Instruction::FSub:
+ return SelectBinaryFPOp(I, ISD::FSUB);
+ case Instruction::FMul:
+ return SelectBinaryFPOp(I, ISD::FMUL);
+ case Instruction::SDiv:
+ return SelectDiv(I, /*isSigned*/ true);
+ case Instruction::UDiv:
+ return SelectDiv(I, /*isSigned*/ false);
+ case Instruction::SRem:
+ return SelectRem(I, /*isSigned*/ true);
+ case Instruction::URem:
+ return SelectRem(I, /*isSigned*/ false);
+ case Instruction::Call:
+ if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I))
+ return SelectIntrinsicCall(*II);
+ return SelectCall(I);
+ case Instruction::Select:
+ return SelectSelect(I);
+ case Instruction::Ret:
+ return SelectRet(I);
+ case Instruction::Trunc:
+ return SelectTrunc(I);
+ case Instruction::ZExt:
+ case Instruction::SExt:
+ return SelectIntExt(I);
+ case Instruction::Shl:
+ return SelectShift(I, ARM_AM::lsl);
+ case Instruction::LShr:
+ return SelectShift(I, ARM_AM::lsr);
+ case Instruction::AShr:
+ return SelectShift(I, ARM_AM::asr);
+ default: break;
+ }
+ return false;
+}
+
+namespace {
+// This table describes sign- and zero-extend instructions which can be
+// folded into a preceding load. All of these extends have an immediate
+// (sometimes a mask and sometimes a shift) that's applied after
+// extension.
+const struct FoldableLoadExtendsStruct {
+ uint16_t Opc[2]; // ARM, Thumb.
+ uint8_t ExpectedImm;
+ uint8_t isZExt : 1;
+ uint8_t ExpectedVT : 7;
+} FoldableLoadExtends[] = {
+ { { ARM::SXTH, ARM::t2SXTH }, 0, 0, MVT::i16 },
+ { { ARM::UXTH, ARM::t2UXTH }, 0, 1, MVT::i16 },
+ { { ARM::ANDri, ARM::t2ANDri }, 255, 1, MVT::i8 },
+ { { ARM::SXTB, ARM::t2SXTB }, 0, 0, MVT::i8 },
+ { { ARM::UXTB, ARM::t2UXTB }, 0, 1, MVT::i8 }
+};
+}
+
+/// \brief The specified machine instr operand is a vreg, and that
+/// vreg is being provided by the specified load instruction. If possible,
+/// try to fold the load as an operand to the instruction, returning true if
+/// successful.
+bool ARMFastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
+ const LoadInst *LI) {
+ // Verify we have a legal type before going any further.
+ MVT VT;
+ if (!isLoadTypeLegal(LI->getType(), VT))
+ return false;
+
+ // Combine load followed by zero- or sign-extend.
+ // ldrb r1, [r0] ldrb r1, [r0]
+ // uxtb r2, r1 =>
+ // mov r3, r2 mov r3, r1
+ if (MI->getNumOperands() < 3 || !MI->getOperand(2).isImm())
+ return false;
+ const uint64_t Imm = MI->getOperand(2).getImm();
+
+ bool Found = false;
+ bool isZExt;
+ for (unsigned i = 0, e = array_lengthof(FoldableLoadExtends);
+ i != e; ++i) {
+ if (FoldableLoadExtends[i].Opc[isThumb2] == MI->getOpcode() &&
+ (uint64_t)FoldableLoadExtends[i].ExpectedImm == Imm &&
+ MVT((MVT::SimpleValueType)FoldableLoadExtends[i].ExpectedVT) == VT) {
+ Found = true;
+ isZExt = FoldableLoadExtends[i].isZExt;
+ }
+ }
+ if (!Found) return false;
+
+ // See if we can handle this address.
+ Address Addr;
+ if (!ARMComputeAddress(LI->getOperand(0), Addr)) return false;
+
+ unsigned ResultReg = MI->getOperand(0).getReg();
+ if (!ARMEmitLoad(VT, ResultReg, Addr, LI->getAlignment(), isZExt, false))
+ return false;
+ MI->eraseFromParent();
+ return true;
+}
+
+unsigned ARMFastISel::ARMLowerPICELF(const GlobalValue *GV,
+ unsigned Align, MVT VT) {
+ bool UseGOT_PREL =
+ !(GV->hasHiddenVisibility() || GV->hasLocalLinkage());
+
+ LLVMContext *Context = &MF->getFunction()->getContext();
+ unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
+ unsigned PCAdj = Subtarget->isThumb() ? 4 : 8;
+ ARMConstantPoolValue *CPV = ARMConstantPoolConstant::Create(
+ GV, ARMPCLabelIndex, ARMCP::CPValue, PCAdj,
+ UseGOT_PREL ? ARMCP::GOT_PREL : ARMCP::no_modifier,
+ /*AddCurrentAddress=*/UseGOT_PREL);
+
+ unsigned ConstAlign =
+ MF->getDataLayout().getPrefTypeAlignment(Type::getInt32PtrTy(*Context));
+ unsigned Idx = MF->getConstantPool()->getConstantPoolIndex(CPV, ConstAlign);
+
+ unsigned TempReg = MF->getRegInfo().createVirtualRegister(&ARM::rGPRRegClass);
+ unsigned Opc = isThumb2 ? ARM::t2LDRpci : ARM::LDRcp;
+ MachineInstrBuilder MIB =
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), TempReg)
+ .addConstantPoolIndex(Idx);
+ if (Opc == ARM::LDRcp)
+ MIB.addImm(0);
+ AddDefaultPred(MIB);
+
+ // Fix the address by adding pc.
+ unsigned DestReg = createResultReg(TLI.getRegClassFor(VT));
+ Opc = Subtarget->isThumb() ? ARM::tPICADD : UseGOT_PREL ? ARM::PICLDR
+ : ARM::PICADD;
+ DestReg = constrainOperandRegClass(TII.get(Opc), DestReg, 0);
+ MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), DestReg)
+ .addReg(TempReg)
+ .addImm(ARMPCLabelIndex);
+ if (!Subtarget->isThumb())
+ AddDefaultPred(MIB);
+
+ if (UseGOT_PREL && Subtarget->isThumb()) {
+ unsigned NewDestReg = createResultReg(TLI.getRegClassFor(VT));
+ MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(ARM::t2LDRi12), NewDestReg)
+ .addReg(DestReg)
+ .addImm(0);
+ DestReg = NewDestReg;
+ AddOptionalDefs(MIB);
+ }
+ return DestReg;
+}
+
+bool ARMFastISel::fastLowerArguments() {
+ if (!FuncInfo.CanLowerReturn)
+ return false;
+
+ const Function *F = FuncInfo.Fn;
+ if (F->isVarArg())
+ return false;
+
+ CallingConv::ID CC = F->getCallingConv();
+ switch (CC) {
+ default:
+ return false;
+ case CallingConv::Fast:
+ case CallingConv::C:
+ case CallingConv::ARM_AAPCS_VFP:
+ case CallingConv::ARM_AAPCS:
+ case CallingConv::ARM_APCS:
+ break;
+ }
+
+ // Only handle simple cases. i.e. Up to 4 i8/i16/i32 scalar arguments
+ // which are passed in r0 - r3.
+ unsigned Idx = 1;
+ for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end();
+ I != E; ++I, ++Idx) {
+ if (Idx > 4)
+ return false;
+
+ if (F->getAttributes().hasAttribute(Idx, Attribute::InReg) ||
+ F->getAttributes().hasAttribute(Idx, Attribute::StructRet) ||
+ F->getAttributes().hasAttribute(Idx, Attribute::ByVal))
+ return false;
+
+ Type *ArgTy = I->getType();
+ if (ArgTy->isStructTy() || ArgTy->isArrayTy() || ArgTy->isVectorTy())
+ return false;
+
+ EVT ArgVT = TLI.getValueType(DL, ArgTy);
+ if (!ArgVT.isSimple()) return false;
+ switch (ArgVT.getSimpleVT().SimpleTy) {
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ break;
+ default:
+ return false;
+ }
+ }
+
+
+ static const MCPhysReg GPRArgRegs[] = {
+ ARM::R0, ARM::R1, ARM::R2, ARM::R3
+ };
+
+ const TargetRegisterClass *RC = &ARM::rGPRRegClass;
+ Idx = 0;
+ for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end();
+ I != E; ++I, ++Idx) {
+ unsigned SrcReg = GPRArgRegs[Idx];
+ unsigned DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC);
+ // FIXME: Unfortunately it's necessary to emit a copy from the livein copy.
+ // Without this, EmitLiveInCopies may eliminate the livein if its only
+ // use is a bitcast (which isn't turned into an instruction).
+ unsigned ResultReg = createResultReg(RC);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::COPY),
+ ResultReg).addReg(DstReg, getKillRegState(true));
+ updateValueMap(&*I, ResultReg);
+ }
+
+ return true;
+}
+
+namespace llvm {
+ FastISel *ARM::createFastISel(FunctionLoweringInfo &funcInfo,
+ const TargetLibraryInfo *libInfo) {
+ if (funcInfo.MF->getSubtarget<ARMSubtarget>().useFastISel())
+ return new ARMFastISel(funcInfo, libInfo);
+
+ return nullptr;
+ }
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMFeatures.h b/contrib/llvm/lib/Target/ARM/ARMFeatures.h
new file mode 100644
index 0000000..0c910ab
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMFeatures.h
@@ -0,0 +1,97 @@
+//===-- ARMFeatures.h - Checks for ARM instruction features -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the code shared between ARM CodeGen and ARM MC
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_ARM_ARMFEATURES_H
+#define LLVM_LIB_TARGET_ARM_ARMFEATURES_H
+
+#include "MCTargetDesc/ARMMCTargetDesc.h"
+
+namespace llvm {
+
+template<typename InstrType> // could be MachineInstr or MCInst
+bool IsCPSRDead(InstrType *Instr);
+
+template<typename InstrType> // could be MachineInstr or MCInst
+inline bool isV8EligibleForIT(InstrType *Instr) {
+ switch (Instr->getOpcode()) {
+ default:
+ return false;
+ case ARM::tADC:
+ case ARM::tADDi3:
+ case ARM::tADDi8:
+ case ARM::tADDrr:
+ case ARM::tAND:
+ case ARM::tASRri:
+ case ARM::tASRrr:
+ case ARM::tBIC:
+ case ARM::tEOR:
+ case ARM::tLSLri:
+ case ARM::tLSLrr:
+ case ARM::tLSRri:
+ case ARM::tLSRrr:
+ case ARM::tMOVi8:
+ case ARM::tMUL:
+ case ARM::tMVN:
+ case ARM::tORR:
+ case ARM::tROR:
+ case ARM::tRSB:
+ case ARM::tSBC:
+ case ARM::tSUBi3:
+ case ARM::tSUBi8:
+ case ARM::tSUBrr:
+ // Outside of an IT block, these set CPSR.
+ return IsCPSRDead(Instr);
+ case ARM::tADDrSPi:
+ case ARM::tCMNz:
+ case ARM::tCMPi8:
+ case ARM::tCMPr:
+ case ARM::tLDRBi:
+ case ARM::tLDRBr:
+ case ARM::tLDRHi:
+ case ARM::tLDRHr:
+ case ARM::tLDRSB:
+ case ARM::tLDRSH:
+ case ARM::tLDRi:
+ case ARM::tLDRr:
+ case ARM::tLDRspi:
+ case ARM::tSTRBi:
+ case ARM::tSTRBr:
+ case ARM::tSTRHi:
+ case ARM::tSTRHr:
+ case ARM::tSTRi:
+ case ARM::tSTRr:
+ case ARM::tSTRspi:
+ case ARM::tTST:
+ return true;
+// there are some "conditionally deprecated" opcodes
+ case ARM::tADDspr:
+ case ARM::tBLXr:
+ return Instr->getOperand(2).getReg() != ARM::PC;
+ // ADD PC, SP and BLX PC were always unpredictable,
+ // now on top of it they're deprecated
+ case ARM::tADDrSP:
+ case ARM::tBX:
+ return Instr->getOperand(0).getReg() != ARM::PC;
+ case ARM::tADDhirr:
+ return Instr->getOperand(0).getReg() != ARM::PC &&
+ Instr->getOperand(2).getReg() != ARM::PC;
+ case ARM::tCMPhir:
+ case ARM::tMOVr:
+ return Instr->getOperand(0).getReg() != ARM::PC &&
+ Instr->getOperand(1).getReg() != ARM::PC;
+ }
+}
+
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp b/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp
new file mode 100644
index 0000000..c5990bb
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp
@@ -0,0 +1,2168 @@
+//===-- ARMFrameLowering.cpp - ARM Frame Information ----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the ARM implementation of TargetFrameLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARMFrameLowering.h"
+#include "ARMBaseInstrInfo.h"
+#include "ARMBaseRegisterInfo.h"
+#include "ARMConstantPoolValue.h"
+#include "ARMMachineFunctionInfo.h"
+#include "MCTargetDesc/ARMAddressingModes.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/Function.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Target/TargetOptions.h"
+
+using namespace llvm;
+
+static cl::opt<bool>
+SpillAlignedNEONRegs("align-neon-spills", cl::Hidden, cl::init(true),
+ cl::desc("Align ARM NEON spills in prolog and epilog"));
+
+static MachineBasicBlock::iterator
+skipAlignedDPRCS2Spills(MachineBasicBlock::iterator MI,
+ unsigned NumAlignedDPRCS2Regs);
+
+ARMFrameLowering::ARMFrameLowering(const ARMSubtarget &sti)
+ : TargetFrameLowering(StackGrowsDown, sti.getStackAlignment(), 0, 4),
+ STI(sti) {}
+
+bool ARMFrameLowering::noFramePointerElim(const MachineFunction &MF) const {
+ // iOS always has a FP for backtracking, force other targets to keep their FP
+ // when doing FastISel. The emitted code is currently superior, and in cases
+ // like test-suite's lencod FastISel isn't quite correct when FP is eliminated.
+ return TargetFrameLowering::noFramePointerElim(MF) ||
+ MF.getSubtarget<ARMSubtarget>().useFastISel();
+}
+
+/// hasFP - Return true if the specified function should have a dedicated frame
+/// pointer register. This is true if the function has variable sized allocas
+/// or if frame pointer elimination is disabled.
+bool ARMFrameLowering::hasFP(const MachineFunction &MF) const {
+ const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
+
+ // iOS requires FP not to be clobbered for backtracing purpose.
+ if (STI.isTargetIOS() || STI.isTargetWatchOS())
+ return true;
+
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ // Always eliminate non-leaf frame pointers.
+ return ((MF.getTarget().Options.DisableFramePointerElim(MF) &&
+ MFI->hasCalls()) ||
+ RegInfo->needsStackRealignment(MF) ||
+ MFI->hasVarSizedObjects() ||
+ MFI->isFrameAddressTaken());
+}
+
+/// hasReservedCallFrame - Under normal circumstances, when a frame pointer is
+/// not required, we reserve argument space for call sites in the function
+/// immediately on entry to the current function. This eliminates the need for
+/// add/sub sp brackets around call sites. Returns true if the call frame is
+/// included as part of the stack frame.
+bool ARMFrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
+ const MachineFrameInfo *FFI = MF.getFrameInfo();
+ unsigned CFSize = FFI->getMaxCallFrameSize();
+ // It's not always a good idea to include the call frame as part of the
+ // stack frame. ARM (especially Thumb) has small immediate offset to
+ // address the stack frame. So a large call frame can cause poor codegen
+ // and may even makes it impossible to scavenge a register.
+ if (CFSize >= ((1 << 12) - 1) / 2) // Half of imm12
+ return false;
+
+ return !MF.getFrameInfo()->hasVarSizedObjects();
+}
+
+/// canSimplifyCallFramePseudos - If there is a reserved call frame, the
+/// call frame pseudos can be simplified. Unlike most targets, having a FP
+/// is not sufficient here since we still may reference some objects via SP
+/// even when FP is available in Thumb2 mode.
+bool
+ARMFrameLowering::canSimplifyCallFramePseudos(const MachineFunction &MF) const {
+ return hasReservedCallFrame(MF) || MF.getFrameInfo()->hasVarSizedObjects();
+}
+
+static bool isCSRestore(MachineInstr *MI,
+ const ARMBaseInstrInfo &TII,
+ const MCPhysReg *CSRegs) {
+ // Integer spill area is handled with "pop".
+ if (isPopOpcode(MI->getOpcode())) {
+ // The first two operands are predicates. The last two are
+ // imp-def and imp-use of SP. Check everything in between.
+ for (int i = 5, e = MI->getNumOperands(); i != e; ++i)
+ if (!isCalleeSavedRegister(MI->getOperand(i).getReg(), CSRegs))
+ return false;
+ return true;
+ }
+ if ((MI->getOpcode() == ARM::LDR_POST_IMM ||
+ MI->getOpcode() == ARM::LDR_POST_REG ||
+ MI->getOpcode() == ARM::t2LDR_POST) &&
+ isCalleeSavedRegister(MI->getOperand(0).getReg(), CSRegs) &&
+ MI->getOperand(1).getReg() == ARM::SP)
+ return true;
+
+ return false;
+}
+
+static void emitRegPlusImmediate(bool isARM, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI, DebugLoc dl,
+ const ARMBaseInstrInfo &TII, unsigned DestReg,
+ unsigned SrcReg, int NumBytes,
+ unsigned MIFlags = MachineInstr::NoFlags,
+ ARMCC::CondCodes Pred = ARMCC::AL,
+ unsigned PredReg = 0) {
+ if (isARM)
+ emitARMRegPlusImmediate(MBB, MBBI, dl, DestReg, SrcReg, NumBytes,
+ Pred, PredReg, TII, MIFlags);
+ else
+ emitT2RegPlusImmediate(MBB, MBBI, dl, DestReg, SrcReg, NumBytes,
+ Pred, PredReg, TII, MIFlags);
+}
+
+static void emitSPUpdate(bool isARM, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI, DebugLoc dl,
+ const ARMBaseInstrInfo &TII, int NumBytes,
+ unsigned MIFlags = MachineInstr::NoFlags,
+ ARMCC::CondCodes Pred = ARMCC::AL,
+ unsigned PredReg = 0) {
+ emitRegPlusImmediate(isARM, MBB, MBBI, dl, TII, ARM::SP, ARM::SP, NumBytes,
+ MIFlags, Pred, PredReg);
+}
+
+static int sizeOfSPAdjustment(const MachineInstr *MI) {
+ int RegSize;
+ switch (MI->getOpcode()) {
+ case ARM::VSTMDDB_UPD:
+ RegSize = 8;
+ break;
+ case ARM::STMDB_UPD:
+ case ARM::t2STMDB_UPD:
+ RegSize = 4;
+ break;
+ case ARM::t2STR_PRE:
+ case ARM::STR_PRE_IMM:
+ return 4;
+ default:
+ llvm_unreachable("Unknown push or pop like instruction");
+ }
+
+ int count = 0;
+ // ARM and Thumb2 push/pop insts have explicit "sp, sp" operands (+
+ // pred) so the list starts at 4.
+ for (int i = MI->getNumOperands() - 1; i >= 4; --i)
+ count += RegSize;
+ return count;
+}
+
+static bool WindowsRequiresStackProbe(const MachineFunction &MF,
+ size_t StackSizeInBytes) {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ const Function *F = MF.getFunction();
+ unsigned StackProbeSize = (MFI->getStackProtectorIndex() > 0) ? 4080 : 4096;
+ if (F->hasFnAttribute("stack-probe-size"))
+ F->getFnAttribute("stack-probe-size")
+ .getValueAsString()
+ .getAsInteger(0, StackProbeSize);
+ return StackSizeInBytes >= StackProbeSize;
+}
+
+namespace {
+struct StackAdjustingInsts {
+ struct InstInfo {
+ MachineBasicBlock::iterator I;
+ unsigned SPAdjust;
+ bool BeforeFPSet;
+ };
+
+ SmallVector<InstInfo, 4> Insts;
+
+ void addInst(MachineBasicBlock::iterator I, unsigned SPAdjust,
+ bool BeforeFPSet = false) {
+ InstInfo Info = {I, SPAdjust, BeforeFPSet};
+ Insts.push_back(Info);
+ }
+
+ void addExtraBytes(const MachineBasicBlock::iterator I, unsigned ExtraBytes) {
+ auto Info = std::find_if(Insts.begin(), Insts.end(),
+ [&](InstInfo &Info) { return Info.I == I; });
+ assert(Info != Insts.end() && "invalid sp adjusting instruction");
+ Info->SPAdjust += ExtraBytes;
+ }
+
+ void emitDefCFAOffsets(MachineModuleInfo &MMI, MachineBasicBlock &MBB,
+ DebugLoc dl, const ARMBaseInstrInfo &TII, bool HasFP) {
+ unsigned CFAOffset = 0;
+ for (auto &Info : Insts) {
+ if (HasFP && !Info.BeforeFPSet)
+ return;
+
+ CFAOffset -= Info.SPAdjust;
+ unsigned CFIIndex = MMI.addFrameInst(
+ MCCFIInstruction::createDefCfaOffset(nullptr, CFAOffset));
+ BuildMI(MBB, std::next(Info.I), dl,
+ TII.get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex)
+ .setMIFlags(MachineInstr::FrameSetup);
+ }
+ }
+};
+}
+
+/// Emit an instruction sequence that will align the address in
+/// register Reg by zero-ing out the lower bits. For versions of the
+/// architecture that support Neon, this must be done in a single
+/// instruction, since skipAlignedDPRCS2Spills assumes it is done in a
+/// single instruction. That function only gets called when optimizing
+/// spilling of D registers on a core with the Neon instruction set
+/// present.
+static void emitAligningInstructions(MachineFunction &MF, ARMFunctionInfo *AFI,
+ const TargetInstrInfo &TII,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ DebugLoc DL, const unsigned Reg,
+ const unsigned Alignment,
+ const bool MustBeSingleInstruction) {
+ const ARMSubtarget &AST =
+ static_cast<const ARMSubtarget &>(MF.getSubtarget());
+ const bool CanUseBFC = AST.hasV6T2Ops() || AST.hasV7Ops();
+ const unsigned AlignMask = Alignment - 1;
+ const unsigned NrBitsToZero = countTrailingZeros(Alignment);
+ assert(!AFI->isThumb1OnlyFunction() && "Thumb1 not supported");
+ if (!AFI->isThumbFunction()) {
+ // if the BFC instruction is available, use that to zero the lower
+ // bits:
+ // bfc Reg, #0, log2(Alignment)
+ // otherwise use BIC, if the mask to zero the required number of bits
+ // can be encoded in the bic immediate field
+ // bic Reg, Reg, Alignment-1
+ // otherwise, emit
+ // lsr Reg, Reg, log2(Alignment)
+ // lsl Reg, Reg, log2(Alignment)
+ if (CanUseBFC) {
+ AddDefaultPred(BuildMI(MBB, MBBI, DL, TII.get(ARM::BFC), Reg)
+ .addReg(Reg, RegState::Kill)
+ .addImm(~AlignMask));
+ } else if (AlignMask <= 255) {
+ AddDefaultCC(
+ AddDefaultPred(BuildMI(MBB, MBBI, DL, TII.get(ARM::BICri), Reg)
+ .addReg(Reg, RegState::Kill)
+ .addImm(AlignMask)));
+ } else {
+ assert(!MustBeSingleInstruction &&
+ "Shouldn't call emitAligningInstructions demanding a single "
+ "instruction to be emitted for large stack alignment for a target "
+ "without BFC.");
+ AddDefaultCC(AddDefaultPred(
+ BuildMI(MBB, MBBI, DL, TII.get(ARM::MOVsi), Reg)
+ .addReg(Reg, RegState::Kill)
+ .addImm(ARM_AM::getSORegOpc(ARM_AM::lsr, NrBitsToZero))));
+ AddDefaultCC(AddDefaultPred(
+ BuildMI(MBB, MBBI, DL, TII.get(ARM::MOVsi), Reg)
+ .addReg(Reg, RegState::Kill)
+ .addImm(ARM_AM::getSORegOpc(ARM_AM::lsl, NrBitsToZero))));
+ }
+ } else {
+ // Since this is only reached for Thumb-2 targets, the BFC instruction
+ // should always be available.
+ assert(CanUseBFC);
+ AddDefaultPred(BuildMI(MBB, MBBI, DL, TII.get(ARM::t2BFC), Reg)
+ .addReg(Reg, RegState::Kill)
+ .addImm(~AlignMask));
+ }
+}
+
+void ARMFrameLowering::emitPrologue(MachineFunction &MF,
+ MachineBasicBlock &MBB) const {
+ MachineBasicBlock::iterator MBBI = MBB.begin();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ MachineModuleInfo &MMI = MF.getMMI();
+ MCContext &Context = MMI.getContext();
+ const TargetMachine &TM = MF.getTarget();
+ const MCRegisterInfo *MRI = Context.getRegisterInfo();
+ const ARMBaseRegisterInfo *RegInfo = STI.getRegisterInfo();
+ const ARMBaseInstrInfo &TII = *STI.getInstrInfo();
+ assert(!AFI->isThumb1OnlyFunction() &&
+ "This emitPrologue does not support Thumb1!");
+ bool isARM = !AFI->isThumbFunction();
+ unsigned Align = STI.getFrameLowering()->getStackAlignment();
+ unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize();
+ unsigned NumBytes = MFI->getStackSize();
+ const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
+
+ // Debug location must be unknown since the first debug location is used
+ // to determine the end of the prologue.
+ DebugLoc dl;
+
+ unsigned FramePtr = RegInfo->getFrameRegister(MF);
+
+ // Determine the sizes of each callee-save spill areas and record which frame
+ // belongs to which callee-save spill areas.
+ unsigned GPRCS1Size = 0, GPRCS2Size = 0, DPRCSSize = 0;
+ int FramePtrSpillFI = 0;
+ int D8SpillFI = 0;
+
+ // All calls are tail calls in GHC calling conv, and functions have no
+ // prologue/epilogue.
+ if (MF.getFunction()->getCallingConv() == CallingConv::GHC)
+ return;
+
+ StackAdjustingInsts DefCFAOffsetCandidates;
+ bool HasFP = hasFP(MF);
+
+ // Allocate the vararg register save area.
+ if (ArgRegsSaveSize) {
+ emitSPUpdate(isARM, MBB, MBBI, dl, TII, -ArgRegsSaveSize,
+ MachineInstr::FrameSetup);
+ DefCFAOffsetCandidates.addInst(std::prev(MBBI), ArgRegsSaveSize, true);
+ }
+
+ if (!AFI->hasStackFrame() &&
+ (!STI.isTargetWindows() || !WindowsRequiresStackProbe(MF, NumBytes))) {
+ if (NumBytes - ArgRegsSaveSize != 0) {
+ emitSPUpdate(isARM, MBB, MBBI, dl, TII, -(NumBytes - ArgRegsSaveSize),
+ MachineInstr::FrameSetup);
+ DefCFAOffsetCandidates.addInst(std::prev(MBBI),
+ NumBytes - ArgRegsSaveSize, true);
+ }
+ DefCFAOffsetCandidates.emitDefCFAOffsets(MMI, MBB, dl, TII, HasFP);
+ return;
+ }
+
+ // Determine spill area sizes.
+ for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+ unsigned Reg = CSI[i].getReg();
+ int FI = CSI[i].getFrameIdx();
+ switch (Reg) {
+ case ARM::R8:
+ case ARM::R9:
+ case ARM::R10:
+ case ARM::R11:
+ case ARM::R12:
+ if (STI.isTargetDarwin()) {
+ GPRCS2Size += 4;
+ break;
+ }
+ // fallthrough
+ case ARM::R0:
+ case ARM::R1:
+ case ARM::R2:
+ case ARM::R3:
+ case ARM::R4:
+ case ARM::R5:
+ case ARM::R6:
+ case ARM::R7:
+ case ARM::LR:
+ if (Reg == FramePtr)
+ FramePtrSpillFI = FI;
+ GPRCS1Size += 4;
+ break;
+ default:
+ // This is a DPR. Exclude the aligned DPRCS2 spills.
+ if (Reg == ARM::D8)
+ D8SpillFI = FI;
+ if (Reg < ARM::D8 || Reg >= ARM::D8 + AFI->getNumAlignedDPRCS2Regs())
+ DPRCSSize += 8;
+ }
+ }
+
+ // Move past area 1.
+ MachineBasicBlock::iterator LastPush = MBB.end(), GPRCS1Push, GPRCS2Push;
+ if (GPRCS1Size > 0) {
+ GPRCS1Push = LastPush = MBBI++;
+ DefCFAOffsetCandidates.addInst(LastPush, GPRCS1Size, true);
+ }
+
+ // Determine starting offsets of spill areas.
+ unsigned GPRCS1Offset = NumBytes - ArgRegsSaveSize - GPRCS1Size;
+ unsigned GPRCS2Offset = GPRCS1Offset - GPRCS2Size;
+ unsigned DPRAlign = DPRCSSize ? std::min(8U, Align) : 4U;
+ unsigned DPRGapSize = (GPRCS1Size + GPRCS2Size + ArgRegsSaveSize) % DPRAlign;
+ unsigned DPRCSOffset = GPRCS2Offset - DPRGapSize - DPRCSSize;
+ int FramePtrOffsetInPush = 0;
+ if (HasFP) {
+ FramePtrOffsetInPush =
+ MFI->getObjectOffset(FramePtrSpillFI) + ArgRegsSaveSize;
+ AFI->setFramePtrSpillOffset(MFI->getObjectOffset(FramePtrSpillFI) +
+ NumBytes);
+ }
+ AFI->setGPRCalleeSavedArea1Offset(GPRCS1Offset);
+ AFI->setGPRCalleeSavedArea2Offset(GPRCS2Offset);
+ AFI->setDPRCalleeSavedAreaOffset(DPRCSOffset);
+
+ // Move past area 2.
+ if (GPRCS2Size > 0) {
+ GPRCS2Push = LastPush = MBBI++;
+ DefCFAOffsetCandidates.addInst(LastPush, GPRCS2Size);
+ }
+
+ // Prolog/epilog inserter assumes we correctly align DPRs on the stack, so our
+ // .cfi_offset operations will reflect that.
+ if (DPRGapSize) {
+ assert(DPRGapSize == 4 && "unexpected alignment requirements for DPRs");
+ if (tryFoldSPUpdateIntoPushPop(STI, MF, LastPush, DPRGapSize))
+ DefCFAOffsetCandidates.addExtraBytes(LastPush, DPRGapSize);
+ else {
+ emitSPUpdate(isARM, MBB, MBBI, dl, TII, -DPRGapSize,
+ MachineInstr::FrameSetup);
+ DefCFAOffsetCandidates.addInst(std::prev(MBBI), DPRGapSize);
+ }
+ }
+
+ // Move past area 3.
+ if (DPRCSSize > 0) {
+ // Since vpush register list cannot have gaps, there may be multiple vpush
+ // instructions in the prologue.
+ while (MBBI->getOpcode() == ARM::VSTMDDB_UPD) {
+ DefCFAOffsetCandidates.addInst(MBBI, sizeOfSPAdjustment(MBBI));
+ LastPush = MBBI++;
+ }
+ }
+
+ // Move past the aligned DPRCS2 area.
+ if (AFI->getNumAlignedDPRCS2Regs() > 0) {
+ MBBI = skipAlignedDPRCS2Spills(MBBI, AFI->getNumAlignedDPRCS2Regs());
+ // The code inserted by emitAlignedDPRCS2Spills realigns the stack, and
+ // leaves the stack pointer pointing to the DPRCS2 area.
+ //
+ // Adjust NumBytes to represent the stack slots below the DPRCS2 area.
+ NumBytes += MFI->getObjectOffset(D8SpillFI);
+ } else
+ NumBytes = DPRCSOffset;
+
+ if (STI.isTargetWindows() && WindowsRequiresStackProbe(MF, NumBytes)) {
+ uint32_t NumWords = NumBytes >> 2;
+
+ if (NumWords < 65536)
+ AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi16), ARM::R4)
+ .addImm(NumWords)
+ .setMIFlags(MachineInstr::FrameSetup));
+ else
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi32imm), ARM::R4)
+ .addImm(NumWords)
+ .setMIFlags(MachineInstr::FrameSetup);
+
+ switch (TM.getCodeModel()) {
+ case CodeModel::Small:
+ case CodeModel::Medium:
+ case CodeModel::Default:
+ case CodeModel::Kernel:
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::tBL))
+ .addImm((unsigned)ARMCC::AL).addReg(0)
+ .addExternalSymbol("__chkstk")
+ .addReg(ARM::R4, RegState::Implicit)
+ .setMIFlags(MachineInstr::FrameSetup);
+ break;
+ case CodeModel::Large:
+ case CodeModel::JITDefault:
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi32imm), ARM::R12)
+ .addExternalSymbol("__chkstk")
+ .setMIFlags(MachineInstr::FrameSetup);
+
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::tBLXr))
+ .addImm((unsigned)ARMCC::AL).addReg(0)
+ .addReg(ARM::R12, RegState::Kill)
+ .addReg(ARM::R4, RegState::Implicit)
+ .setMIFlags(MachineInstr::FrameSetup);
+ break;
+ }
+
+ AddDefaultCC(AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::t2SUBrr),
+ ARM::SP)
+ .addReg(ARM::SP, RegState::Define)
+ .addReg(ARM::R4, RegState::Kill)
+ .setMIFlags(MachineInstr::FrameSetup)));
+ NumBytes = 0;
+ }
+
+ if (NumBytes) {
+ // Adjust SP after all the callee-save spills.
+ if (AFI->getNumAlignedDPRCS2Regs() == 0 &&
+ tryFoldSPUpdateIntoPushPop(STI, MF, LastPush, NumBytes))
+ DefCFAOffsetCandidates.addExtraBytes(LastPush, NumBytes);
+ else {
+ emitSPUpdate(isARM, MBB, MBBI, dl, TII, -NumBytes,
+ MachineInstr::FrameSetup);
+ DefCFAOffsetCandidates.addInst(std::prev(MBBI), NumBytes);
+ }
+
+ if (HasFP && isARM)
+ // Restore from fp only in ARM mode: e.g. sub sp, r7, #24
+ // Note it's not safe to do this in Thumb2 mode because it would have
+ // taken two instructions:
+ // mov sp, r7
+ // sub sp, #24
+ // If an interrupt is taken between the two instructions, then sp is in
+ // an inconsistent state (pointing to the middle of callee-saved area).
+ // The interrupt handler can end up clobbering the registers.
+ AFI->setShouldRestoreSPFromFP(true);
+ }
+
+ // Set FP to point to the stack slot that contains the previous FP.
+ // For iOS, FP is R7, which has now been stored in spill area 1.
+ // Otherwise, if this is not iOS, all the callee-saved registers go
+ // into spill area 1, including the FP in R11. In either case, it
+ // is in area one and the adjustment needs to take place just after
+ // that push.
+ if (HasFP) {
+ MachineBasicBlock::iterator AfterPush = std::next(GPRCS1Push);
+ unsigned PushSize = sizeOfSPAdjustment(GPRCS1Push);
+ emitRegPlusImmediate(!AFI->isThumbFunction(), MBB, AfterPush,
+ dl, TII, FramePtr, ARM::SP,
+ PushSize + FramePtrOffsetInPush,
+ MachineInstr::FrameSetup);
+ if (FramePtrOffsetInPush + PushSize != 0) {
+ unsigned CFIIndex = MMI.addFrameInst(MCCFIInstruction::createDefCfa(
+ nullptr, MRI->getDwarfRegNum(FramePtr, true),
+ -(ArgRegsSaveSize - FramePtrOffsetInPush)));
+ BuildMI(MBB, AfterPush, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex)
+ .setMIFlags(MachineInstr::FrameSetup);
+ } else {
+ unsigned CFIIndex =
+ MMI.addFrameInst(MCCFIInstruction::createDefCfaRegister(
+ nullptr, MRI->getDwarfRegNum(FramePtr, true)));
+ BuildMI(MBB, AfterPush, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex)
+ .setMIFlags(MachineInstr::FrameSetup);
+ }
+ }
+
+ // Now that the prologue's actual instructions are finalised, we can insert
+ // the necessary DWARF cf instructions to describe the situation. Start by
+ // recording where each register ended up:
+ if (GPRCS1Size > 0) {
+ MachineBasicBlock::iterator Pos = std::next(GPRCS1Push);
+ int CFIIndex;
+ for (const auto &Entry : CSI) {
+ unsigned Reg = Entry.getReg();
+ int FI = Entry.getFrameIdx();
+ switch (Reg) {
+ case ARM::R8:
+ case ARM::R9:
+ case ARM::R10:
+ case ARM::R11:
+ case ARM::R12:
+ if (STI.isTargetDarwin())
+ break;
+ // fallthrough
+ case ARM::R0:
+ case ARM::R1:
+ case ARM::R2:
+ case ARM::R3:
+ case ARM::R4:
+ case ARM::R5:
+ case ARM::R6:
+ case ARM::R7:
+ case ARM::LR:
+ CFIIndex = MMI.addFrameInst(MCCFIInstruction::createOffset(
+ nullptr, MRI->getDwarfRegNum(Reg, true), MFI->getObjectOffset(FI)));
+ BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex)
+ .setMIFlags(MachineInstr::FrameSetup);
+ break;
+ }
+ }
+ }
+
+ if (GPRCS2Size > 0) {
+ MachineBasicBlock::iterator Pos = std::next(GPRCS2Push);
+ for (const auto &Entry : CSI) {
+ unsigned Reg = Entry.getReg();
+ int FI = Entry.getFrameIdx();
+ switch (Reg) {
+ case ARM::R8:
+ case ARM::R9:
+ case ARM::R10:
+ case ARM::R11:
+ case ARM::R12:
+ if (STI.isTargetDarwin()) {
+ unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true);
+ unsigned Offset = MFI->getObjectOffset(FI);
+ unsigned CFIIndex = MMI.addFrameInst(
+ MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset));
+ BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex)
+ .setMIFlags(MachineInstr::FrameSetup);
+ }
+ break;
+ }
+ }
+ }
+
+ if (DPRCSSize > 0) {
+ // Since vpush register list cannot have gaps, there may be multiple vpush
+ // instructions in the prologue.
+ MachineBasicBlock::iterator Pos = std::next(LastPush);
+ for (const auto &Entry : CSI) {
+ unsigned Reg = Entry.getReg();
+ int FI = Entry.getFrameIdx();
+ if ((Reg >= ARM::D0 && Reg <= ARM::D31) &&
+ (Reg < ARM::D8 || Reg >= ARM::D8 + AFI->getNumAlignedDPRCS2Regs())) {
+ unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true);
+ unsigned Offset = MFI->getObjectOffset(FI);
+ unsigned CFIIndex = MMI.addFrameInst(
+ MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset));
+ BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex)
+ .setMIFlags(MachineInstr::FrameSetup);
+ }
+ }
+ }
+
+ // Now we can emit descriptions of where the canonical frame address was
+ // throughout the process. If we have a frame pointer, it takes over the job
+ // half-way through, so only the first few .cfi_def_cfa_offset instructions
+ // actually get emitted.
+ DefCFAOffsetCandidates.emitDefCFAOffsets(MMI, MBB, dl, TII, HasFP);
+
+ if (STI.isTargetELF() && hasFP(MF))
+ MFI->setOffsetAdjustment(MFI->getOffsetAdjustment() -
+ AFI->getFramePtrSpillOffset());
+
+ AFI->setGPRCalleeSavedArea1Size(GPRCS1Size);
+ AFI->setGPRCalleeSavedArea2Size(GPRCS2Size);
+ AFI->setDPRCalleeSavedGapSize(DPRGapSize);
+ AFI->setDPRCalleeSavedAreaSize(DPRCSSize);
+
+ // If we need dynamic stack realignment, do it here. Be paranoid and make
+ // sure if we also have VLAs, we have a base pointer for frame access.
+ // If aligned NEON registers were spilled, the stack has already been
+ // realigned.
+ if (!AFI->getNumAlignedDPRCS2Regs() && RegInfo->needsStackRealignment(MF)) {
+ unsigned MaxAlign = MFI->getMaxAlignment();
+ assert(!AFI->isThumb1OnlyFunction());
+ if (!AFI->isThumbFunction()) {
+ emitAligningInstructions(MF, AFI, TII, MBB, MBBI, dl, ARM::SP, MaxAlign,
+ false);
+ } else {
+ // We cannot use sp as source/dest register here, thus we're using r4 to
+ // perform the calculations. We're emitting the following sequence:
+ // mov r4, sp
+ // -- use emitAligningInstructions to produce best sequence to zero
+ // -- out lower bits in r4
+ // mov sp, r4
+ // FIXME: It will be better just to find spare register here.
+ AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::R4)
+ .addReg(ARM::SP, RegState::Kill));
+ emitAligningInstructions(MF, AFI, TII, MBB, MBBI, dl, ARM::R4, MaxAlign,
+ false);
+ AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP)
+ .addReg(ARM::R4, RegState::Kill));
+ }
+
+ AFI->setShouldRestoreSPFromFP(true);
+ }
+
+ // If we need a base pointer, set it up here. It's whatever the value
+ // of the stack pointer is at this point. Any variable size objects
+ // will be allocated after this, so we can still use the base pointer
+ // to reference locals.
+ // FIXME: Clarify FrameSetup flags here.
+ if (RegInfo->hasBasePointer(MF)) {
+ if (isARM)
+ BuildMI(MBB, MBBI, dl,
+ TII.get(ARM::MOVr), RegInfo->getBaseRegister())
+ .addReg(ARM::SP)
+ .addImm((unsigned)ARMCC::AL).addReg(0).addReg(0);
+ else
+ AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr),
+ RegInfo->getBaseRegister())
+ .addReg(ARM::SP));
+ }
+
+ // If the frame has variable sized objects then the epilogue must restore
+ // the sp from fp. We can assume there's an FP here since hasFP already
+ // checks for hasVarSizedObjects.
+ if (MFI->hasVarSizedObjects())
+ AFI->setShouldRestoreSPFromFP(true);
+}
+
+void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
+ MachineBasicBlock &MBB) const {
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
+ const ARMBaseInstrInfo &TII =
+ *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
+ assert(!AFI->isThumb1OnlyFunction() &&
+ "This emitEpilogue does not support Thumb1!");
+ bool isARM = !AFI->isThumbFunction();
+
+ unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize();
+ int NumBytes = (int)MFI->getStackSize();
+ unsigned FramePtr = RegInfo->getFrameRegister(MF);
+
+ // All calls are tail calls in GHC calling conv, and functions have no
+ // prologue/epilogue.
+ if (MF.getFunction()->getCallingConv() == CallingConv::GHC)
+ return;
+
+ // First put ourselves on the first (from top) terminator instructions.
+ MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
+ DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
+
+ if (!AFI->hasStackFrame()) {
+ if (NumBytes - ArgRegsSaveSize != 0)
+ emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes - ArgRegsSaveSize);
+ } else {
+ // Unwind MBBI to point to first LDR / VLDRD.
+ const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
+ if (MBBI != MBB.begin()) {
+ do {
+ --MBBI;
+ } while (MBBI != MBB.begin() && isCSRestore(MBBI, TII, CSRegs));
+ if (!isCSRestore(MBBI, TII, CSRegs))
+ ++MBBI;
+ }
+
+ // Move SP to start of FP callee save spill area.
+ NumBytes -= (ArgRegsSaveSize +
+ AFI->getGPRCalleeSavedArea1Size() +
+ AFI->getGPRCalleeSavedArea2Size() +
+ AFI->getDPRCalleeSavedGapSize() +
+ AFI->getDPRCalleeSavedAreaSize());
+
+ // Reset SP based on frame pointer only if the stack frame extends beyond
+ // frame pointer stack slot or target is ELF and the function has FP.
+ if (AFI->shouldRestoreSPFromFP()) {
+ NumBytes = AFI->getFramePtrSpillOffset() - NumBytes;
+ if (NumBytes) {
+ if (isARM)
+ emitARMRegPlusImmediate(MBB, MBBI, dl, ARM::SP, FramePtr, -NumBytes,
+ ARMCC::AL, 0, TII);
+ else {
+ // It's not possible to restore SP from FP in a single instruction.
+ // For iOS, this looks like:
+ // mov sp, r7
+ // sub sp, #24
+ // This is bad, if an interrupt is taken after the mov, sp is in an
+ // inconsistent state.
+ // Use the first callee-saved register as a scratch register.
+ assert(!MFI->getPristineRegs(MF).test(ARM::R4) &&
+ "No scratch register to restore SP from FP!");
+ emitT2RegPlusImmediate(MBB, MBBI, dl, ARM::R4, FramePtr, -NumBytes,
+ ARMCC::AL, 0, TII);
+ AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr),
+ ARM::SP)
+ .addReg(ARM::R4));
+ }
+ } else {
+ // Thumb2 or ARM.
+ if (isARM)
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), ARM::SP)
+ .addReg(FramePtr).addImm((unsigned)ARMCC::AL).addReg(0).addReg(0);
+ else
+ AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr),
+ ARM::SP)
+ .addReg(FramePtr));
+ }
+ } else if (NumBytes &&
+ !tryFoldSPUpdateIntoPushPop(STI, MF, MBBI, NumBytes))
+ emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes);
+
+ // Increment past our save areas.
+ if (AFI->getDPRCalleeSavedAreaSize()) {
+ MBBI++;
+ // Since vpop register list cannot have gaps, there may be multiple vpop
+ // instructions in the epilogue.
+ while (MBBI->getOpcode() == ARM::VLDMDIA_UPD)
+ MBBI++;
+ }
+ if (AFI->getDPRCalleeSavedGapSize()) {
+ assert(AFI->getDPRCalleeSavedGapSize() == 4 &&
+ "unexpected DPR alignment gap");
+ emitSPUpdate(isARM, MBB, MBBI, dl, TII, AFI->getDPRCalleeSavedGapSize());
+ }
+
+ if (AFI->getGPRCalleeSavedArea2Size()) MBBI++;
+ if (AFI->getGPRCalleeSavedArea1Size()) MBBI++;
+ }
+
+ if (ArgRegsSaveSize)
+ emitSPUpdate(isARM, MBB, MBBI, dl, TII, ArgRegsSaveSize);
+}
+
+/// getFrameIndexReference - Provide a base+offset reference to an FI slot for
+/// debug info. It's the same as what we use for resolving the code-gen
+/// references for now. FIXME: This can go wrong when references are
+/// SP-relative and simple call frames aren't used.
+int
+ARMFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
+ unsigned &FrameReg) const {
+ return ResolveFrameIndexReference(MF, FI, FrameReg, 0);
+}
+
+int
+ARMFrameLowering::ResolveFrameIndexReference(const MachineFunction &MF,
+ int FI, unsigned &FrameReg,
+ int SPAdj) const {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ const ARMBaseRegisterInfo *RegInfo = static_cast<const ARMBaseRegisterInfo *>(
+ MF.getSubtarget().getRegisterInfo());
+ const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ int Offset = MFI->getObjectOffset(FI) + MFI->getStackSize();
+ int FPOffset = Offset - AFI->getFramePtrSpillOffset();
+ bool isFixed = MFI->isFixedObjectIndex(FI);
+
+ FrameReg = ARM::SP;
+ Offset += SPAdj;
+
+ // SP can move around if there are allocas. We may also lose track of SP
+ // when emergency spilling inside a non-reserved call frame setup.
+ bool hasMovingSP = !hasReservedCallFrame(MF);
+
+ // When dynamically realigning the stack, use the frame pointer for
+ // parameters, and the stack/base pointer for locals.
+ if (RegInfo->needsStackRealignment(MF)) {
+ assert (hasFP(MF) && "dynamic stack realignment without a FP!");
+ if (isFixed) {
+ FrameReg = RegInfo->getFrameRegister(MF);
+ Offset = FPOffset;
+ } else if (hasMovingSP) {
+ assert(RegInfo->hasBasePointer(MF) &&
+ "VLAs and dynamic stack alignment, but missing base pointer!");
+ FrameReg = RegInfo->getBaseRegister();
+ }
+ return Offset;
+ }
+
+ // If there is a frame pointer, use it when we can.
+ if (hasFP(MF) && AFI->hasStackFrame()) {
+ // Use frame pointer to reference fixed objects. Use it for locals if
+ // there are VLAs (and thus the SP isn't reliable as a base).
+ if (isFixed || (hasMovingSP && !RegInfo->hasBasePointer(MF))) {
+ FrameReg = RegInfo->getFrameRegister(MF);
+ return FPOffset;
+ } else if (hasMovingSP) {
+ assert(RegInfo->hasBasePointer(MF) && "missing base pointer!");
+ if (AFI->isThumb2Function()) {
+ // Try to use the frame pointer if we can, else use the base pointer
+ // since it's available. This is handy for the emergency spill slot, in
+ // particular.
+ if (FPOffset >= -255 && FPOffset < 0) {
+ FrameReg = RegInfo->getFrameRegister(MF);
+ return FPOffset;
+ }
+ }
+ } else if (AFI->isThumb2Function()) {
+ // Use add <rd>, sp, #<imm8>
+ // ldr <rd>, [sp, #<imm8>]
+ // if at all possible to save space.
+ if (Offset >= 0 && (Offset & 3) == 0 && Offset <= 1020)
+ return Offset;
+ // In Thumb2 mode, the negative offset is very limited. Try to avoid
+ // out of range references. ldr <rt>,[<rn>, #-<imm8>]
+ if (FPOffset >= -255 && FPOffset < 0) {
+ FrameReg = RegInfo->getFrameRegister(MF);
+ return FPOffset;
+ }
+ } else if (Offset > (FPOffset < 0 ? -FPOffset : FPOffset)) {
+ // Otherwise, use SP or FP, whichever is closer to the stack slot.
+ FrameReg = RegInfo->getFrameRegister(MF);
+ return FPOffset;
+ }
+ }
+ // Use the base pointer if we have one.
+ if (RegInfo->hasBasePointer(MF))
+ FrameReg = RegInfo->getBaseRegister();
+ return Offset;
+}
+
+void ARMFrameLowering::emitPushInst(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ unsigned StmOpc, unsigned StrOpc,
+ bool NoGap,
+ bool(*Func)(unsigned, bool),
+ unsigned NumAlignedDPRCS2Regs,
+ unsigned MIFlags) const {
+ MachineFunction &MF = *MBB.getParent();
+ const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
+
+ DebugLoc DL;
+
+ SmallVector<std::pair<unsigned,bool>, 4> Regs;
+ unsigned i = CSI.size();
+ while (i != 0) {
+ unsigned LastReg = 0;
+ for (; i != 0; --i) {
+ unsigned Reg = CSI[i-1].getReg();
+ if (!(Func)(Reg, STI.isTargetDarwin())) continue;
+
+ // D-registers in the aligned area DPRCS2 are NOT spilled here.
+ if (Reg >= ARM::D8 && Reg < ARM::D8 + NumAlignedDPRCS2Regs)
+ continue;
+
+ // Add the callee-saved register as live-in unless it's LR and
+ // @llvm.returnaddress is called. If LR is returned for
+ // @llvm.returnaddress then it's already added to the function and
+ // entry block live-in sets.
+ bool isKill = true;
+ if (Reg == ARM::LR) {
+ if (MF.getFrameInfo()->isReturnAddressTaken() &&
+ MF.getRegInfo().isLiveIn(Reg))
+ isKill = false;
+ }
+
+ if (isKill)
+ MBB.addLiveIn(Reg);
+
+ // If NoGap is true, push consecutive registers and then leave the rest
+ // for other instructions. e.g.
+ // vpush {d8, d10, d11} -> vpush {d8}, vpush {d10, d11}
+ if (NoGap && LastReg && LastReg != Reg-1)
+ break;
+ LastReg = Reg;
+ Regs.push_back(std::make_pair(Reg, isKill));
+ }
+
+ if (Regs.empty())
+ continue;
+ if (Regs.size() > 1 || StrOpc== 0) {
+ MachineInstrBuilder MIB =
+ AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(StmOpc), ARM::SP)
+ .addReg(ARM::SP).setMIFlags(MIFlags));
+ for (unsigned i = 0, e = Regs.size(); i < e; ++i)
+ MIB.addReg(Regs[i].first, getKillRegState(Regs[i].second));
+ } else if (Regs.size() == 1) {
+ MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(StrOpc),
+ ARM::SP)
+ .addReg(Regs[0].first, getKillRegState(Regs[0].second))
+ .addReg(ARM::SP).setMIFlags(MIFlags)
+ .addImm(-4);
+ AddDefaultPred(MIB);
+ }
+ Regs.clear();
+
+ // Put any subsequent vpush instructions before this one: they will refer to
+ // higher register numbers so need to be pushed first in order to preserve
+ // monotonicity.
+ if (MI != MBB.begin())
+ --MI;
+ }
+}
+
+void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ unsigned LdmOpc, unsigned LdrOpc,
+ bool isVarArg, bool NoGap,
+ bool(*Func)(unsigned, bool),
+ unsigned NumAlignedDPRCS2Regs) const {
+ MachineFunction &MF = *MBB.getParent();
+ const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ DebugLoc DL;
+ bool isTailCall = false;
+ bool isInterrupt = false;
+ bool isTrap = false;
+ if (MBB.end() != MI) {
+ DL = MI->getDebugLoc();
+ unsigned RetOpcode = MI->getOpcode();
+ isTailCall = (RetOpcode == ARM::TCRETURNdi || RetOpcode == ARM::TCRETURNri);
+ isInterrupt =
+ RetOpcode == ARM::SUBS_PC_LR || RetOpcode == ARM::t2SUBS_PC_LR;
+ isTrap =
+ RetOpcode == ARM::TRAP || RetOpcode == ARM::TRAPNaCl ||
+ RetOpcode == ARM::tTRAP;
+ }
+
+ SmallVector<unsigned, 4> Regs;
+ unsigned i = CSI.size();
+ while (i != 0) {
+ unsigned LastReg = 0;
+ bool DeleteRet = false;
+ for (; i != 0; --i) {
+ unsigned Reg = CSI[i-1].getReg();
+ if (!(Func)(Reg, STI.isTargetDarwin())) continue;
+
+ // The aligned reloads from area DPRCS2 are not inserted here.
+ if (Reg >= ARM::D8 && Reg < ARM::D8 + NumAlignedDPRCS2Regs)
+ continue;
+
+ if (Reg == ARM::LR && !isTailCall && !isVarArg && !isInterrupt &&
+ !isTrap && STI.hasV5TOps()) {
+ if (MBB.succ_empty()) {
+ Reg = ARM::PC;
+ DeleteRet = true;
+ LdmOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_RET : ARM::LDMIA_RET;
+ } else
+ LdmOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_UPD : ARM::LDMIA_UPD;
+ // Fold the return instruction into the LDM.
+ }
+
+ // If NoGap is true, pop consecutive registers and then leave the rest
+ // for other instructions. e.g.
+ // vpop {d8, d10, d11} -> vpop {d8}, vpop {d10, d11}
+ if (NoGap && LastReg && LastReg != Reg-1)
+ break;
+
+ LastReg = Reg;
+ Regs.push_back(Reg);
+ }
+
+ if (Regs.empty())
+ continue;
+ if (Regs.size() > 1 || LdrOpc == 0) {
+ MachineInstrBuilder MIB =
+ AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(LdmOpc), ARM::SP)
+ .addReg(ARM::SP));
+ for (unsigned i = 0, e = Regs.size(); i < e; ++i)
+ MIB.addReg(Regs[i], getDefRegState(true));
+ if (DeleteRet && MI != MBB.end()) {
+ MIB.copyImplicitOps(&*MI);
+ MI->eraseFromParent();
+ }
+ MI = MIB;
+ } else if (Regs.size() == 1) {
+ // If we adjusted the reg to PC from LR above, switch it back here. We
+ // only do that for LDM.
+ if (Regs[0] == ARM::PC)
+ Regs[0] = ARM::LR;
+ MachineInstrBuilder MIB =
+ BuildMI(MBB, MI, DL, TII.get(LdrOpc), Regs[0])
+ .addReg(ARM::SP, RegState::Define)
+ .addReg(ARM::SP);
+ // ARM mode needs an extra reg0 here due to addrmode2. Will go away once
+ // that refactoring is complete (eventually).
+ if (LdrOpc == ARM::LDR_POST_REG || LdrOpc == ARM::LDR_POST_IMM) {
+ MIB.addReg(0);
+ MIB.addImm(ARM_AM::getAM2Opc(ARM_AM::add, 4, ARM_AM::no_shift));
+ } else
+ MIB.addImm(4);
+ AddDefaultPred(MIB);
+ }
+ Regs.clear();
+
+ // Put any subsequent vpop instructions after this one: they will refer to
+ // higher register numbers so need to be popped afterwards.
+ if (MI != MBB.end())
+ ++MI;
+ }
+}
+
+/// Emit aligned spill instructions for NumAlignedDPRCS2Regs D-registers
+/// starting from d8. Also insert stack realignment code and leave the stack
+/// pointer pointing to the d8 spill slot.
+static void emitAlignedDPRCS2Spills(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned NumAlignedDPRCS2Regs,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) {
+ MachineFunction &MF = *MBB.getParent();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ DebugLoc DL = MI != MBB.end() ? MI->getDebugLoc() : DebugLoc();
+ const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
+ MachineFrameInfo &MFI = *MF.getFrameInfo();
+
+ // Mark the D-register spill slots as properly aligned. Since MFI computes
+ // stack slot layout backwards, this can actually mean that the d-reg stack
+ // slot offsets can be wrong. The offset for d8 will always be correct.
+ for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+ unsigned DNum = CSI[i].getReg() - ARM::D8;
+ if (DNum > NumAlignedDPRCS2Regs - 1)
+ continue;
+ int FI = CSI[i].getFrameIdx();
+ // The even-numbered registers will be 16-byte aligned, the odd-numbered
+ // registers will be 8-byte aligned.
+ MFI.setObjectAlignment(FI, DNum % 2 ? 8 : 16);
+
+ // The stack slot for D8 needs to be maximally aligned because this is
+ // actually the point where we align the stack pointer. MachineFrameInfo
+ // computes all offsets relative to the incoming stack pointer which is a
+ // bit weird when realigning the stack. Any extra padding for this
+ // over-alignment is not realized because the code inserted below adjusts
+ // the stack pointer by numregs * 8 before aligning the stack pointer.
+ if (DNum == 0)
+ MFI.setObjectAlignment(FI, MFI.getMaxAlignment());
+ }
+
+ // Move the stack pointer to the d8 spill slot, and align it at the same
+ // time. Leave the stack slot address in the scratch register r4.
+ //
+ // sub r4, sp, #numregs * 8
+ // bic r4, r4, #align - 1
+ // mov sp, r4
+ //
+ bool isThumb = AFI->isThumbFunction();
+ assert(!AFI->isThumb1OnlyFunction() && "Can't realign stack for thumb1");
+ AFI->setShouldRestoreSPFromFP(true);
+
+ // sub r4, sp, #numregs * 8
+ // The immediate is <= 64, so it doesn't need any special encoding.
+ unsigned Opc = isThumb ? ARM::t2SUBri : ARM::SUBri;
+ AddDefaultCC(AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(Opc), ARM::R4)
+ .addReg(ARM::SP)
+ .addImm(8 * NumAlignedDPRCS2Regs)));
+
+ unsigned MaxAlign = MF.getFrameInfo()->getMaxAlignment();
+ // We must set parameter MustBeSingleInstruction to true, since
+ // skipAlignedDPRCS2Spills expects exactly 3 instructions to perform
+ // stack alignment. Luckily, this can always be done since all ARM
+ // architecture versions that support Neon also support the BFC
+ // instruction.
+ emitAligningInstructions(MF, AFI, TII, MBB, MI, DL, ARM::R4, MaxAlign, true);
+
+ // mov sp, r4
+ // The stack pointer must be adjusted before spilling anything, otherwise
+ // the stack slots could be clobbered by an interrupt handler.
+ // Leave r4 live, it is used below.
+ Opc = isThumb ? ARM::tMOVr : ARM::MOVr;
+ MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(Opc), ARM::SP)
+ .addReg(ARM::R4);
+ MIB = AddDefaultPred(MIB);
+ if (!isThumb)
+ AddDefaultCC(MIB);
+
+ // Now spill NumAlignedDPRCS2Regs registers starting from d8.
+ // r4 holds the stack slot address.
+ unsigned NextReg = ARM::D8;
+
+ // 16-byte aligned vst1.64 with 4 d-regs and address writeback.
+ // The writeback is only needed when emitting two vst1.64 instructions.
+ if (NumAlignedDPRCS2Regs >= 6) {
+ unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
+ &ARM::QQPRRegClass);
+ MBB.addLiveIn(SupReg);
+ AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VST1d64Qwb_fixed),
+ ARM::R4)
+ .addReg(ARM::R4, RegState::Kill).addImm(16)
+ .addReg(NextReg)
+ .addReg(SupReg, RegState::ImplicitKill));
+ NextReg += 4;
+ NumAlignedDPRCS2Regs -= 4;
+ }
+
+ // We won't modify r4 beyond this point. It currently points to the next
+ // register to be spilled.
+ unsigned R4BaseReg = NextReg;
+
+ // 16-byte aligned vst1.64 with 4 d-regs, no writeback.
+ if (NumAlignedDPRCS2Regs >= 4) {
+ unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
+ &ARM::QQPRRegClass);
+ MBB.addLiveIn(SupReg);
+ AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VST1d64Q))
+ .addReg(ARM::R4).addImm(16).addReg(NextReg)
+ .addReg(SupReg, RegState::ImplicitKill));
+ NextReg += 4;
+ NumAlignedDPRCS2Regs -= 4;
+ }
+
+ // 16-byte aligned vst1.64 with 2 d-regs.
+ if (NumAlignedDPRCS2Regs >= 2) {
+ unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
+ &ARM::QPRRegClass);
+ MBB.addLiveIn(SupReg);
+ AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VST1q64))
+ .addReg(ARM::R4).addImm(16).addReg(SupReg));
+ NextReg += 2;
+ NumAlignedDPRCS2Regs -= 2;
+ }
+
+ // Finally, use a vanilla vstr.64 for the odd last register.
+ if (NumAlignedDPRCS2Regs) {
+ MBB.addLiveIn(NextReg);
+ // vstr.64 uses addrmode5 which has an offset scale of 4.
+ AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VSTRD))
+ .addReg(NextReg)
+ .addReg(ARM::R4).addImm((NextReg-R4BaseReg)*2));
+ }
+
+ // The last spill instruction inserted should kill the scratch register r4.
+ std::prev(MI)->addRegisterKilled(ARM::R4, TRI);
+}
+
+/// Skip past the code inserted by emitAlignedDPRCS2Spills, and return an
+/// iterator to the following instruction.
+static MachineBasicBlock::iterator
+skipAlignedDPRCS2Spills(MachineBasicBlock::iterator MI,
+ unsigned NumAlignedDPRCS2Regs) {
+ // sub r4, sp, #numregs * 8
+ // bic r4, r4, #align - 1
+ // mov sp, r4
+ ++MI; ++MI; ++MI;
+ assert(MI->mayStore() && "Expecting spill instruction");
+
+ // These switches all fall through.
+ switch(NumAlignedDPRCS2Regs) {
+ case 7:
+ ++MI;
+ assert(MI->mayStore() && "Expecting spill instruction");
+ default:
+ ++MI;
+ assert(MI->mayStore() && "Expecting spill instruction");
+ case 1:
+ case 2:
+ case 4:
+ assert(MI->killsRegister(ARM::R4) && "Missed kill flag");
+ ++MI;
+ }
+ return MI;
+}
+
+/// Emit aligned reload instructions for NumAlignedDPRCS2Regs D-registers
+/// starting from d8. These instructions are assumed to execute while the
+/// stack is still aligned, unlike the code inserted by emitPopInst.
+static void emitAlignedDPRCS2Restores(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned NumAlignedDPRCS2Regs,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) {
+ MachineFunction &MF = *MBB.getParent();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ DebugLoc DL = MI != MBB.end() ? MI->getDebugLoc() : DebugLoc();
+ const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
+
+ // Find the frame index assigned to d8.
+ int D8SpillFI = 0;
+ for (unsigned i = 0, e = CSI.size(); i != e; ++i)
+ if (CSI[i].getReg() == ARM::D8) {
+ D8SpillFI = CSI[i].getFrameIdx();
+ break;
+ }
+
+ // Materialize the address of the d8 spill slot into the scratch register r4.
+ // This can be fairly complicated if the stack frame is large, so just use
+ // the normal frame index elimination mechanism to do it. This code runs as
+ // the initial part of the epilog where the stack and base pointers haven't
+ // been changed yet.
+ bool isThumb = AFI->isThumbFunction();
+ assert(!AFI->isThumb1OnlyFunction() && "Can't realign stack for thumb1");
+
+ unsigned Opc = isThumb ? ARM::t2ADDri : ARM::ADDri;
+ AddDefaultCC(AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(Opc), ARM::R4)
+ .addFrameIndex(D8SpillFI).addImm(0)));
+
+ // Now restore NumAlignedDPRCS2Regs registers starting from d8.
+ unsigned NextReg = ARM::D8;
+
+ // 16-byte aligned vld1.64 with 4 d-regs and writeback.
+ if (NumAlignedDPRCS2Regs >= 6) {
+ unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
+ &ARM::QQPRRegClass);
+ AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VLD1d64Qwb_fixed), NextReg)
+ .addReg(ARM::R4, RegState::Define)
+ .addReg(ARM::R4, RegState::Kill).addImm(16)
+ .addReg(SupReg, RegState::ImplicitDefine));
+ NextReg += 4;
+ NumAlignedDPRCS2Regs -= 4;
+ }
+
+ // We won't modify r4 beyond this point. It currently points to the next
+ // register to be spilled.
+ unsigned R4BaseReg = NextReg;
+
+ // 16-byte aligned vld1.64 with 4 d-regs, no writeback.
+ if (NumAlignedDPRCS2Regs >= 4) {
+ unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
+ &ARM::QQPRRegClass);
+ AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VLD1d64Q), NextReg)
+ .addReg(ARM::R4).addImm(16)
+ .addReg(SupReg, RegState::ImplicitDefine));
+ NextReg += 4;
+ NumAlignedDPRCS2Regs -= 4;
+ }
+
+ // 16-byte aligned vld1.64 with 2 d-regs.
+ if (NumAlignedDPRCS2Regs >= 2) {
+ unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
+ &ARM::QPRRegClass);
+ AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VLD1q64), SupReg)
+ .addReg(ARM::R4).addImm(16));
+ NextReg += 2;
+ NumAlignedDPRCS2Regs -= 2;
+ }
+
+ // Finally, use a vanilla vldr.64 for the remaining odd register.
+ if (NumAlignedDPRCS2Regs)
+ AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VLDRD), NextReg)
+ .addReg(ARM::R4).addImm(2*(NextReg-R4BaseReg)));
+
+ // Last store kills r4.
+ std::prev(MI)->addRegisterKilled(ARM::R4, TRI);
+}
+
+bool ARMFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const {
+ if (CSI.empty())
+ return false;
+
+ MachineFunction &MF = *MBB.getParent();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+
+ unsigned PushOpc = AFI->isThumbFunction() ? ARM::t2STMDB_UPD : ARM::STMDB_UPD;
+ unsigned PushOneOpc = AFI->isThumbFunction() ?
+ ARM::t2STR_PRE : ARM::STR_PRE_IMM;
+ unsigned FltOpc = ARM::VSTMDDB_UPD;
+ unsigned NumAlignedDPRCS2Regs = AFI->getNumAlignedDPRCS2Regs();
+ emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, &isARMArea1Register, 0,
+ MachineInstr::FrameSetup);
+ emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, &isARMArea2Register, 0,
+ MachineInstr::FrameSetup);
+ emitPushInst(MBB, MI, CSI, FltOpc, 0, true, &isARMArea3Register,
+ NumAlignedDPRCS2Regs, MachineInstr::FrameSetup);
+
+ // The code above does not insert spill code for the aligned DPRCS2 registers.
+ // The stack realignment code will be inserted between the push instructions
+ // and these spills.
+ if (NumAlignedDPRCS2Regs)
+ emitAlignedDPRCS2Spills(MBB, MI, NumAlignedDPRCS2Regs, CSI, TRI);
+
+ return true;
+}
+
+bool ARMFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const {
+ if (CSI.empty())
+ return false;
+
+ MachineFunction &MF = *MBB.getParent();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ bool isVarArg = AFI->getArgRegsSaveSize() > 0;
+ unsigned NumAlignedDPRCS2Regs = AFI->getNumAlignedDPRCS2Regs();
+
+ // The emitPopInst calls below do not insert reloads for the aligned DPRCS2
+ // registers. Do that here instead.
+ if (NumAlignedDPRCS2Regs)
+ emitAlignedDPRCS2Restores(MBB, MI, NumAlignedDPRCS2Regs, CSI, TRI);
+
+ unsigned PopOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_UPD : ARM::LDMIA_UPD;
+ unsigned LdrOpc = AFI->isThumbFunction() ? ARM::t2LDR_POST :ARM::LDR_POST_IMM;
+ unsigned FltOpc = ARM::VLDMDIA_UPD;
+ emitPopInst(MBB, MI, CSI, FltOpc, 0, isVarArg, true, &isARMArea3Register,
+ NumAlignedDPRCS2Regs);
+ emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false,
+ &isARMArea2Register, 0);
+ emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false,
+ &isARMArea1Register, 0);
+
+ return true;
+}
+
+// FIXME: Make generic?
+static unsigned GetFunctionSizeInBytes(const MachineFunction &MF,
+ const ARMBaseInstrInfo &TII) {
+ unsigned FnSize = 0;
+ for (auto &MBB : MF) {
+ for (auto &MI : MBB)
+ FnSize += TII.GetInstSizeInBytes(&MI);
+ }
+ return FnSize;
+}
+
+/// estimateRSStackSizeLimit - Look at each instruction that references stack
+/// frames and return the stack size limit beyond which some of these
+/// instructions will require a scratch register during their expansion later.
+// FIXME: Move to TII?
+static unsigned estimateRSStackSizeLimit(MachineFunction &MF,
+ const TargetFrameLowering *TFI) {
+ const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ unsigned Limit = (1 << 12) - 1;
+ for (auto &MBB : MF) {
+ for (auto &MI : MBB) {
+ for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+ if (!MI.getOperand(i).isFI())
+ continue;
+
+ // When using ADDri to get the address of a stack object, 255 is the
+ // largest offset guaranteed to fit in the immediate offset.
+ if (MI.getOpcode() == ARM::ADDri) {
+ Limit = std::min(Limit, (1U << 8) - 1);
+ break;
+ }
+
+ // Otherwise check the addressing mode.
+ switch (MI.getDesc().TSFlags & ARMII::AddrModeMask) {
+ case ARMII::AddrMode3:
+ case ARMII::AddrModeT2_i8:
+ Limit = std::min(Limit, (1U << 8) - 1);
+ break;
+ case ARMII::AddrMode5:
+ case ARMII::AddrModeT2_i8s4:
+ Limit = std::min(Limit, ((1U << 8) - 1) * 4);
+ break;
+ case ARMII::AddrModeT2_i12:
+ // i12 supports only positive offset so these will be converted to
+ // i8 opcodes. See llvm::rewriteT2FrameIndex.
+ if (TFI->hasFP(MF) && AFI->hasStackFrame())
+ Limit = std::min(Limit, (1U << 8) - 1);
+ break;
+ case ARMII::AddrMode4:
+ case ARMII::AddrMode6:
+ // Addressing modes 4 & 6 (load/store) instructions can't encode an
+ // immediate offset for stack references.
+ return 0;
+ default:
+ break;
+ }
+ break; // At most one FI per instruction
+ }
+ }
+ }
+
+ return Limit;
+}
+
+// In functions that realign the stack, it can be an advantage to spill the
+// callee-saved vector registers after realigning the stack. The vst1 and vld1
+// instructions take alignment hints that can improve performance.
+//
+static void
+checkNumAlignedDPRCS2Regs(MachineFunction &MF, BitVector &SavedRegs) {
+ MF.getInfo<ARMFunctionInfo>()->setNumAlignedDPRCS2Regs(0);
+ if (!SpillAlignedNEONRegs)
+ return;
+
+ // Naked functions don't spill callee-saved registers.
+ if (MF.getFunction()->hasFnAttribute(Attribute::Naked))
+ return;
+
+ // We are planning to use NEON instructions vst1 / vld1.
+ if (!static_cast<const ARMSubtarget &>(MF.getSubtarget()).hasNEON())
+ return;
+
+ // Don't bother if the default stack alignment is sufficiently high.
+ if (MF.getSubtarget().getFrameLowering()->getStackAlignment() >= 8)
+ return;
+
+ // Aligned spills require stack realignment.
+ if (!static_cast<const ARMBaseRegisterInfo *>(
+ MF.getSubtarget().getRegisterInfo())->canRealignStack(MF))
+ return;
+
+ // We always spill contiguous d-registers starting from d8. Count how many
+ // needs spilling. The register allocator will almost always use the
+ // callee-saved registers in order, but it can happen that there are holes in
+ // the range. Registers above the hole will be spilled to the standard DPRCS
+ // area.
+ unsigned NumSpills = 0;
+ for (; NumSpills < 8; ++NumSpills)
+ if (!SavedRegs.test(ARM::D8 + NumSpills))
+ break;
+
+ // Don't do this for just one d-register. It's not worth it.
+ if (NumSpills < 2)
+ return;
+
+ // Spill the first NumSpills D-registers after realigning the stack.
+ MF.getInfo<ARMFunctionInfo>()->setNumAlignedDPRCS2Regs(NumSpills);
+
+ // A scratch register is required for the vst1 / vld1 instructions.
+ SavedRegs.set(ARM::R4);
+}
+
+void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
+ BitVector &SavedRegs,
+ RegScavenger *RS) const {
+ TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
+ // This tells PEI to spill the FP as if it is any other callee-save register
+ // to take advantage the eliminateFrameIndex machinery. This also ensures it
+ // is spilled in the order specified by getCalleeSavedRegs() to make it easier
+ // to combine multiple loads / stores.
+ bool CanEliminateFrame = true;
+ bool CS1Spilled = false;
+ bool LRSpilled = false;
+ unsigned NumGPRSpills = 0;
+ SmallVector<unsigned, 4> UnspilledCS1GPRs;
+ SmallVector<unsigned, 4> UnspilledCS2GPRs;
+ const ARMBaseRegisterInfo *RegInfo = static_cast<const ARMBaseRegisterInfo *>(
+ MF.getSubtarget().getRegisterInfo());
+ const ARMBaseInstrInfo &TII =
+ *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ unsigned FramePtr = RegInfo->getFrameRegister(MF);
+
+ // Spill R4 if Thumb2 function requires stack realignment - it will be used as
+ // scratch register. Also spill R4 if Thumb2 function has varsized objects,
+ // since it's not always possible to restore sp from fp in a single
+ // instruction.
+ // FIXME: It will be better just to find spare register here.
+ if (AFI->isThumb2Function() &&
+ (MFI->hasVarSizedObjects() || RegInfo->needsStackRealignment(MF)))
+ SavedRegs.set(ARM::R4);
+
+ if (AFI->isThumb1OnlyFunction()) {
+ // Spill LR if Thumb1 function uses variable length argument lists.
+ if (AFI->getArgRegsSaveSize() > 0)
+ SavedRegs.set(ARM::LR);
+
+ // Spill R4 if Thumb1 epilogue has to restore SP from FP. We don't know
+ // for sure what the stack size will be, but for this, an estimate is good
+ // enough. If there anything changes it, it'll be a spill, which implies
+ // we've used all the registers and so R4 is already used, so not marking
+ // it here will be OK.
+ // FIXME: It will be better just to find spare register here.
+ unsigned StackSize = MFI->estimateStackSize(MF);
+ if (MFI->hasVarSizedObjects() || StackSize > 508)
+ SavedRegs.set(ARM::R4);
+ }
+
+ // See if we can spill vector registers to aligned stack.
+ checkNumAlignedDPRCS2Regs(MF, SavedRegs);
+
+ // Spill the BasePtr if it's used.
+ if (RegInfo->hasBasePointer(MF))
+ SavedRegs.set(RegInfo->getBaseRegister());
+
+ // Don't spill FP if the frame can be eliminated. This is determined
+ // by scanning the callee-save registers to see if any is modified.
+ const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
+ for (unsigned i = 0; CSRegs[i]; ++i) {
+ unsigned Reg = CSRegs[i];
+ bool Spilled = false;
+ if (SavedRegs.test(Reg)) {
+ Spilled = true;
+ CanEliminateFrame = false;
+ }
+
+ if (!ARM::GPRRegClass.contains(Reg))
+ continue;
+
+ if (Spilled) {
+ NumGPRSpills++;
+
+ if (!STI.isTargetDarwin()) {
+ if (Reg == ARM::LR)
+ LRSpilled = true;
+ CS1Spilled = true;
+ continue;
+ }
+
+ // Keep track if LR and any of R4, R5, R6, and R7 is spilled.
+ switch (Reg) {
+ case ARM::LR:
+ LRSpilled = true;
+ // Fallthrough
+ case ARM::R0: case ARM::R1:
+ case ARM::R2: case ARM::R3:
+ case ARM::R4: case ARM::R5:
+ case ARM::R6: case ARM::R7:
+ CS1Spilled = true;
+ break;
+ default:
+ break;
+ }
+ } else {
+ if (!STI.isTargetDarwin()) {
+ UnspilledCS1GPRs.push_back(Reg);
+ continue;
+ }
+
+ switch (Reg) {
+ case ARM::R0: case ARM::R1:
+ case ARM::R2: case ARM::R3:
+ case ARM::R4: case ARM::R5:
+ case ARM::R6: case ARM::R7:
+ case ARM::LR:
+ UnspilledCS1GPRs.push_back(Reg);
+ break;
+ default:
+ UnspilledCS2GPRs.push_back(Reg);
+ break;
+ }
+ }
+ }
+
+ bool ForceLRSpill = false;
+ if (!LRSpilled && AFI->isThumb1OnlyFunction()) {
+ unsigned FnSize = GetFunctionSizeInBytes(MF, TII);
+ // Force LR to be spilled if the Thumb function size is > 2048. This enables
+ // use of BL to implement far jump. If it turns out that it's not needed
+ // then the branch fix up path will undo it.
+ if (FnSize >= (1 << 11)) {
+ CanEliminateFrame = false;
+ ForceLRSpill = true;
+ }
+ }
+
+ // If any of the stack slot references may be out of range of an immediate
+ // offset, make sure a register (or a spill slot) is available for the
+ // register scavenger. Note that if we're indexing off the frame pointer, the
+ // effective stack size is 4 bytes larger since the FP points to the stack
+ // slot of the previous FP. Also, if we have variable sized objects in the
+ // function, stack slot references will often be negative, and some of
+ // our instructions are positive-offset only, so conservatively consider
+ // that case to want a spill slot (or register) as well. Similarly, if
+ // the function adjusts the stack pointer during execution and the
+ // adjustments aren't already part of our stack size estimate, our offset
+ // calculations may be off, so be conservative.
+ // FIXME: We could add logic to be more precise about negative offsets
+ // and which instructions will need a scratch register for them. Is it
+ // worth the effort and added fragility?
+ bool BigStack = (RS && (MFI->estimateStackSize(MF) +
+ ((hasFP(MF) && AFI->hasStackFrame()) ? 4 : 0) >=
+ estimateRSStackSizeLimit(MF, this))) ||
+ MFI->hasVarSizedObjects() ||
+ (MFI->adjustsStack() && !canSimplifyCallFramePseudos(MF));
+
+ bool ExtraCSSpill = false;
+ if (BigStack || !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF)) {
+ AFI->setHasStackFrame(true);
+
+ // If LR is not spilled, but at least one of R4, R5, R6, and R7 is spilled.
+ // Spill LR as well so we can fold BX_RET to the registers restore (LDM).
+ if (!LRSpilled && CS1Spilled) {
+ SavedRegs.set(ARM::LR);
+ NumGPRSpills++;
+ SmallVectorImpl<unsigned>::iterator LRPos;
+ LRPos = std::find(UnspilledCS1GPRs.begin(), UnspilledCS1GPRs.end(),
+ (unsigned)ARM::LR);
+ if (LRPos != UnspilledCS1GPRs.end())
+ UnspilledCS1GPRs.erase(LRPos);
+
+ ForceLRSpill = false;
+ ExtraCSSpill = true;
+ }
+
+ if (hasFP(MF)) {
+ SavedRegs.set(FramePtr);
+ auto FPPos = std::find(UnspilledCS1GPRs.begin(), UnspilledCS1GPRs.end(),
+ FramePtr);
+ if (FPPos != UnspilledCS1GPRs.end())
+ UnspilledCS1GPRs.erase(FPPos);
+ NumGPRSpills++;
+ }
+
+ // If stack and double are 8-byte aligned and we are spilling an odd number
+ // of GPRs, spill one extra callee save GPR so we won't have to pad between
+ // the integer and double callee save areas.
+ unsigned TargetAlign = getStackAlignment();
+ if (TargetAlign >= 8 && (NumGPRSpills & 1)) {
+ if (CS1Spilled && !UnspilledCS1GPRs.empty()) {
+ for (unsigned i = 0, e = UnspilledCS1GPRs.size(); i != e; ++i) {
+ unsigned Reg = UnspilledCS1GPRs[i];
+ // Don't spill high register if the function is thumb. In the case of
+ // Windows on ARM, accept R11 (frame pointer)
+ if (!AFI->isThumbFunction() ||
+ (STI.isTargetWindows() && Reg == ARM::R11) ||
+ isARMLowRegister(Reg) || Reg == ARM::LR) {
+ SavedRegs.set(Reg);
+ if (!MRI.isReserved(Reg))
+ ExtraCSSpill = true;
+ break;
+ }
+ }
+ } else if (!UnspilledCS2GPRs.empty() && !AFI->isThumb1OnlyFunction()) {
+ unsigned Reg = UnspilledCS2GPRs.front();
+ SavedRegs.set(Reg);
+ if (!MRI.isReserved(Reg))
+ ExtraCSSpill = true;
+ }
+ }
+
+ // Estimate if we might need to scavenge a register at some point in order
+ // to materialize a stack offset. If so, either spill one additional
+ // callee-saved register or reserve a special spill slot to facilitate
+ // register scavenging. Thumb1 needs a spill slot for stack pointer
+ // adjustments also, even when the frame itself is small.
+ if (BigStack && !ExtraCSSpill) {
+ // If any non-reserved CS register isn't spilled, just spill one or two
+ // extra. That should take care of it!
+ unsigned NumExtras = TargetAlign / 4;
+ SmallVector<unsigned, 2> Extras;
+ while (NumExtras && !UnspilledCS1GPRs.empty()) {
+ unsigned Reg = UnspilledCS1GPRs.back();
+ UnspilledCS1GPRs.pop_back();
+ if (!MRI.isReserved(Reg) &&
+ (!AFI->isThumb1OnlyFunction() || isARMLowRegister(Reg) ||
+ Reg == ARM::LR)) {
+ Extras.push_back(Reg);
+ NumExtras--;
+ }
+ }
+ // For non-Thumb1 functions, also check for hi-reg CS registers
+ if (!AFI->isThumb1OnlyFunction()) {
+ while (NumExtras && !UnspilledCS2GPRs.empty()) {
+ unsigned Reg = UnspilledCS2GPRs.back();
+ UnspilledCS2GPRs.pop_back();
+ if (!MRI.isReserved(Reg)) {
+ Extras.push_back(Reg);
+ NumExtras--;
+ }
+ }
+ }
+ if (Extras.size() && NumExtras == 0) {
+ for (unsigned i = 0, e = Extras.size(); i != e; ++i) {
+ SavedRegs.set(Extras[i]);
+ }
+ } else if (!AFI->isThumb1OnlyFunction()) {
+ // note: Thumb1 functions spill to R12, not the stack. Reserve a slot
+ // closest to SP or frame pointer.
+ const TargetRegisterClass *RC = &ARM::GPRRegClass;
+ RS->addScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(),
+ RC->getAlignment(),
+ false));
+ }
+ }
+ }
+
+ if (ForceLRSpill) {
+ SavedRegs.set(ARM::LR);
+ AFI->setLRIsSpilledForFarJump(true);
+ }
+}
+
+
+void ARMFrameLowering::
+eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const {
+ const ARMBaseInstrInfo &TII =
+ *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
+ if (!hasReservedCallFrame(MF)) {
+ // If we have alloca, convert as follows:
+ // ADJCALLSTACKDOWN -> sub, sp, sp, amount
+ // ADJCALLSTACKUP -> add, sp, sp, amount
+ MachineInstr *Old = I;
+ DebugLoc dl = Old->getDebugLoc();
+ unsigned Amount = Old->getOperand(0).getImm();
+ if (Amount != 0) {
+ // We need to keep the stack aligned properly. To do this, we round the
+ // amount of space needed for the outgoing arguments up to the next
+ // alignment boundary.
+ Amount = alignSPAdjust(Amount);
+
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ assert(!AFI->isThumb1OnlyFunction() &&
+ "This eliminateCallFramePseudoInstr does not support Thumb1!");
+ bool isARM = !AFI->isThumbFunction();
+
+ // Replace the pseudo instruction with a new instruction...
+ unsigned Opc = Old->getOpcode();
+ int PIdx = Old->findFirstPredOperandIdx();
+ ARMCC::CondCodes Pred = (PIdx == -1)
+ ? ARMCC::AL : (ARMCC::CondCodes)Old->getOperand(PIdx).getImm();
+ if (Opc == ARM::ADJCALLSTACKDOWN || Opc == ARM::tADJCALLSTACKDOWN) {
+ // Note: PredReg is operand 2 for ADJCALLSTACKDOWN.
+ unsigned PredReg = Old->getOperand(2).getReg();
+ emitSPUpdate(isARM, MBB, I, dl, TII, -Amount, MachineInstr::NoFlags,
+ Pred, PredReg);
+ } else {
+ // Note: PredReg is operand 3 for ADJCALLSTACKUP.
+ unsigned PredReg = Old->getOperand(3).getReg();
+ assert(Opc == ARM::ADJCALLSTACKUP || Opc == ARM::tADJCALLSTACKUP);
+ emitSPUpdate(isARM, MBB, I, dl, TII, Amount, MachineInstr::NoFlags,
+ Pred, PredReg);
+ }
+ }
+ }
+ MBB.erase(I);
+}
+
+/// Get the minimum constant for ARM that is greater than or equal to the
+/// argument. In ARM, constants can have any value that can be produced by
+/// rotating an 8-bit value to the right by an even number of bits within a
+/// 32-bit word.
+static uint32_t alignToARMConstant(uint32_t Value) {
+ unsigned Shifted = 0;
+
+ if (Value == 0)
+ return 0;
+
+ while (!(Value & 0xC0000000)) {
+ Value = Value << 2;
+ Shifted += 2;
+ }
+
+ bool Carry = (Value & 0x00FFFFFF);
+ Value = ((Value & 0xFF000000) >> 24) + Carry;
+
+ if (Value & 0x0000100)
+ Value = Value & 0x000001FC;
+
+ if (Shifted > 24)
+ Value = Value >> (Shifted - 24);
+ else
+ Value = Value << (24 - Shifted);
+
+ return Value;
+}
+
+// The stack limit in the TCB is set to this many bytes above the actual
+// stack limit.
+static const uint64_t kSplitStackAvailable = 256;
+
+// Adjust the function prologue to enable split stacks. This currently only
+// supports android and linux.
+//
+// The ABI of the segmented stack prologue is a little arbitrarily chosen, but
+// must be well defined in order to allow for consistent implementations of the
+// __morestack helper function. The ABI is also not a normal ABI in that it
+// doesn't follow the normal calling conventions because this allows the
+// prologue of each function to be optimized further.
+//
+// Currently, the ABI looks like (when calling __morestack)
+//
+// * r4 holds the minimum stack size requested for this function call
+// * r5 holds the stack size of the arguments to the function
+// * the beginning of the function is 3 instructions after the call to
+// __morestack
+//
+// Implementations of __morestack should use r4 to allocate a new stack, r5 to
+// place the arguments on to the new stack, and the 3-instruction knowledge to
+// jump directly to the body of the function when working on the new stack.
+//
+// An old (and possibly no longer compatible) implementation of __morestack for
+// ARM can be found at [1].
+//
+// [1] - https://github.com/mozilla/rust/blob/86efd9/src/rt/arch/arm/morestack.S
+void ARMFrameLowering::adjustForSegmentedStacks(
+ MachineFunction &MF, MachineBasicBlock &PrologueMBB) const {
+ unsigned Opcode;
+ unsigned CFIIndex;
+ const ARMSubtarget *ST = &MF.getSubtarget<ARMSubtarget>();
+ bool Thumb = ST->isThumb();
+
+ // Sadly, this currently doesn't support varargs, platforms other than
+ // android/linux. Note that thumb1/thumb2 are support for android/linux.
+ if (MF.getFunction()->isVarArg())
+ report_fatal_error("Segmented stacks do not support vararg functions.");
+ if (!ST->isTargetAndroid() && !ST->isTargetLinux())
+ report_fatal_error("Segmented stacks not supported on this platform.");
+
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ MachineModuleInfo &MMI = MF.getMMI();
+ MCContext &Context = MMI.getContext();
+ const MCRegisterInfo *MRI = Context.getRegisterInfo();
+ const ARMBaseInstrInfo &TII =
+ *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
+ ARMFunctionInfo *ARMFI = MF.getInfo<ARMFunctionInfo>();
+ DebugLoc DL;
+
+ uint64_t StackSize = MFI->getStackSize();
+
+ // Do not generate a prologue for functions with a stack of size zero
+ if (StackSize == 0)
+ return;
+
+ // Use R4 and R5 as scratch registers.
+ // We save R4 and R5 before use and restore them before leaving the function.
+ unsigned ScratchReg0 = ARM::R4;
+ unsigned ScratchReg1 = ARM::R5;
+ uint64_t AlignedStackSize;
+
+ MachineBasicBlock *PrevStackMBB = MF.CreateMachineBasicBlock();
+ MachineBasicBlock *PostStackMBB = MF.CreateMachineBasicBlock();
+ MachineBasicBlock *AllocMBB = MF.CreateMachineBasicBlock();
+ MachineBasicBlock *GetMBB = MF.CreateMachineBasicBlock();
+ MachineBasicBlock *McrMBB = MF.CreateMachineBasicBlock();
+
+ // Grab everything that reaches PrologueMBB to update there liveness as well.
+ SmallPtrSet<MachineBasicBlock *, 8> BeforePrologueRegion;
+ SmallVector<MachineBasicBlock *, 2> WalkList;
+ WalkList.push_back(&PrologueMBB);
+
+ do {
+ MachineBasicBlock *CurMBB = WalkList.pop_back_val();
+ for (MachineBasicBlock *PredBB : CurMBB->predecessors()) {
+ if (BeforePrologueRegion.insert(PredBB).second)
+ WalkList.push_back(PredBB);
+ }
+ } while (!WalkList.empty());
+
+ // The order in that list is important.
+ // The blocks will all be inserted before PrologueMBB using that order.
+ // Therefore the block that should appear first in the CFG should appear
+ // first in the list.
+ MachineBasicBlock *AddedBlocks[] = {PrevStackMBB, McrMBB, GetMBB, AllocMBB,
+ PostStackMBB};
+
+ for (MachineBasicBlock *B : AddedBlocks)
+ BeforePrologueRegion.insert(B);
+
+ for (const auto &LI : PrologueMBB.liveins()) {
+ for (MachineBasicBlock *PredBB : BeforePrologueRegion)
+ PredBB->addLiveIn(LI);
+ }
+
+ // Remove the newly added blocks from the list, since we know
+ // we do not have to do the following updates for them.
+ for (MachineBasicBlock *B : AddedBlocks) {
+ BeforePrologueRegion.erase(B);
+ MF.insert(PrologueMBB.getIterator(), B);
+ }
+
+ for (MachineBasicBlock *MBB : BeforePrologueRegion) {
+ // Make sure the LiveIns are still sorted and unique.
+ MBB->sortUniqueLiveIns();
+ // Replace the edges to PrologueMBB by edges to the sequences
+ // we are about to add.
+ MBB->ReplaceUsesOfBlockWith(&PrologueMBB, AddedBlocks[0]);
+ }
+
+ // The required stack size that is aligned to ARM constant criterion.
+ AlignedStackSize = alignToARMConstant(StackSize);
+
+ // When the frame size is less than 256 we just compare the stack
+ // boundary directly to the value of the stack pointer, per gcc.
+ bool CompareStackPointer = AlignedStackSize < kSplitStackAvailable;
+
+ // We will use two of the callee save registers as scratch registers so we
+ // need to save those registers onto the stack.
+ // We will use SR0 to hold stack limit and SR1 to hold the stack size
+ // requested and arguments for __morestack().
+ // SR0: Scratch Register #0
+ // SR1: Scratch Register #1
+ // push {SR0, SR1}
+ if (Thumb) {
+ AddDefaultPred(BuildMI(PrevStackMBB, DL, TII.get(ARM::tPUSH)))
+ .addReg(ScratchReg0).addReg(ScratchReg1);
+ } else {
+ AddDefaultPred(BuildMI(PrevStackMBB, DL, TII.get(ARM::STMDB_UPD))
+ .addReg(ARM::SP, RegState::Define).addReg(ARM::SP))
+ .addReg(ScratchReg0).addReg(ScratchReg1);
+ }
+
+ // Emit the relevant DWARF information about the change in stack pointer as
+ // well as where to find both r4 and r5 (the callee-save registers)
+ CFIIndex =
+ MMI.addFrameInst(MCCFIInstruction::createDefCfaOffset(nullptr, -8));
+ BuildMI(PrevStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex);
+ CFIIndex = MMI.addFrameInst(MCCFIInstruction::createOffset(
+ nullptr, MRI->getDwarfRegNum(ScratchReg1, true), -4));
+ BuildMI(PrevStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex);
+ CFIIndex = MMI.addFrameInst(MCCFIInstruction::createOffset(
+ nullptr, MRI->getDwarfRegNum(ScratchReg0, true), -8));
+ BuildMI(PrevStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex);
+
+ // mov SR1, sp
+ if (Thumb) {
+ AddDefaultPred(BuildMI(McrMBB, DL, TII.get(ARM::tMOVr), ScratchReg1)
+ .addReg(ARM::SP));
+ } else if (CompareStackPointer) {
+ AddDefaultPred(BuildMI(McrMBB, DL, TII.get(ARM::MOVr), ScratchReg1)
+ .addReg(ARM::SP)).addReg(0);
+ }
+
+ // sub SR1, sp, #StackSize
+ if (!CompareStackPointer && Thumb) {
+ AddDefaultPred(
+ AddDefaultCC(BuildMI(McrMBB, DL, TII.get(ARM::tSUBi8), ScratchReg1))
+ .addReg(ScratchReg1).addImm(AlignedStackSize));
+ } else if (!CompareStackPointer) {
+ AddDefaultPred(BuildMI(McrMBB, DL, TII.get(ARM::SUBri), ScratchReg1)
+ .addReg(ARM::SP).addImm(AlignedStackSize)).addReg(0);
+ }
+
+ if (Thumb && ST->isThumb1Only()) {
+ unsigned PCLabelId = ARMFI->createPICLabelUId();
+ ARMConstantPoolValue *NewCPV = ARMConstantPoolSymbol::Create(
+ MF.getFunction()->getContext(), "__STACK_LIMIT", PCLabelId, 0);
+ MachineConstantPool *MCP = MF.getConstantPool();
+ unsigned CPI = MCP->getConstantPoolIndex(NewCPV, 4);
+
+ // ldr SR0, [pc, offset(STACK_LIMIT)]
+ AddDefaultPred(BuildMI(GetMBB, DL, TII.get(ARM::tLDRpci), ScratchReg0)
+ .addConstantPoolIndex(CPI));
+
+ // ldr SR0, [SR0]
+ AddDefaultPred(BuildMI(GetMBB, DL, TII.get(ARM::tLDRi), ScratchReg0)
+ .addReg(ScratchReg0).addImm(0));
+ } else {
+ // Get TLS base address from the coprocessor
+ // mrc p15, #0, SR0, c13, c0, #3
+ AddDefaultPred(BuildMI(McrMBB, DL, TII.get(ARM::MRC), ScratchReg0)
+ .addImm(15)
+ .addImm(0)
+ .addImm(13)
+ .addImm(0)
+ .addImm(3));
+
+ // Use the last tls slot on android and a private field of the TCP on linux.
+ assert(ST->isTargetAndroid() || ST->isTargetLinux());
+ unsigned TlsOffset = ST->isTargetAndroid() ? 63 : 1;
+
+ // Get the stack limit from the right offset
+ // ldr SR0, [sr0, #4 * TlsOffset]
+ AddDefaultPred(BuildMI(GetMBB, DL, TII.get(ARM::LDRi12), ScratchReg0)
+ .addReg(ScratchReg0).addImm(4 * TlsOffset));
+ }
+
+ // Compare stack limit with stack size requested.
+ // cmp SR0, SR1
+ Opcode = Thumb ? ARM::tCMPr : ARM::CMPrr;
+ AddDefaultPred(BuildMI(GetMBB, DL, TII.get(Opcode))
+ .addReg(ScratchReg0)
+ .addReg(ScratchReg1));
+
+ // This jump is taken if StackLimit < SP - stack required.
+ Opcode = Thumb ? ARM::tBcc : ARM::Bcc;
+ BuildMI(GetMBB, DL, TII.get(Opcode)).addMBB(PostStackMBB)
+ .addImm(ARMCC::LO)
+ .addReg(ARM::CPSR);
+
+
+ // Calling __morestack(StackSize, Size of stack arguments).
+ // __morestack knows that the stack size requested is in SR0(r4)
+ // and amount size of stack arguments is in SR1(r5).
+
+ // Pass first argument for the __morestack by Scratch Register #0.
+ // The amount size of stack required
+ if (Thumb) {
+ AddDefaultPred(AddDefaultCC(BuildMI(AllocMBB, DL, TII.get(ARM::tMOVi8),
+ ScratchReg0)).addImm(AlignedStackSize));
+ } else {
+ AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(ARM::MOVi), ScratchReg0)
+ .addImm(AlignedStackSize)).addReg(0);
+ }
+ // Pass second argument for the __morestack by Scratch Register #1.
+ // The amount size of stack consumed to save function arguments.
+ if (Thumb) {
+ AddDefaultPred(
+ AddDefaultCC(BuildMI(AllocMBB, DL, TII.get(ARM::tMOVi8), ScratchReg1))
+ .addImm(alignToARMConstant(ARMFI->getArgumentStackSize())));
+ } else {
+ AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(ARM::MOVi), ScratchReg1)
+ .addImm(alignToARMConstant(ARMFI->getArgumentStackSize())))
+ .addReg(0);
+ }
+
+ // push {lr} - Save return address of this function.
+ if (Thumb) {
+ AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(ARM::tPUSH)))
+ .addReg(ARM::LR);
+ } else {
+ AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(ARM::STMDB_UPD))
+ .addReg(ARM::SP, RegState::Define)
+ .addReg(ARM::SP))
+ .addReg(ARM::LR);
+ }
+
+ // Emit the DWARF info about the change in stack as well as where to find the
+ // previous link register
+ CFIIndex =
+ MMI.addFrameInst(MCCFIInstruction::createDefCfaOffset(nullptr, -12));
+ BuildMI(AllocMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex);
+ CFIIndex = MMI.addFrameInst(MCCFIInstruction::createOffset(
+ nullptr, MRI->getDwarfRegNum(ARM::LR, true), -12));
+ BuildMI(AllocMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex);
+
+ // Call __morestack().
+ if (Thumb) {
+ AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(ARM::tBL)))
+ .addExternalSymbol("__morestack");
+ } else {
+ BuildMI(AllocMBB, DL, TII.get(ARM::BL))
+ .addExternalSymbol("__morestack");
+ }
+
+ // pop {lr} - Restore return address of this original function.
+ if (Thumb) {
+ if (ST->isThumb1Only()) {
+ AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(ARM::tPOP)))
+ .addReg(ScratchReg0);
+ AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(ARM::tMOVr), ARM::LR)
+ .addReg(ScratchReg0));
+ } else {
+ AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(ARM::t2LDR_POST))
+ .addReg(ARM::LR, RegState::Define)
+ .addReg(ARM::SP, RegState::Define)
+ .addReg(ARM::SP)
+ .addImm(4));
+ }
+ } else {
+ AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(ARM::LDMIA_UPD))
+ .addReg(ARM::SP, RegState::Define)
+ .addReg(ARM::SP))
+ .addReg(ARM::LR);
+ }
+
+ // Restore SR0 and SR1 in case of __morestack() was called.
+ // __morestack() will skip PostStackMBB block so we need to restore
+ // scratch registers from here.
+ // pop {SR0, SR1}
+ if (Thumb) {
+ AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(ARM::tPOP)))
+ .addReg(ScratchReg0)
+ .addReg(ScratchReg1);
+ } else {
+ AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(ARM::LDMIA_UPD))
+ .addReg(ARM::SP, RegState::Define)
+ .addReg(ARM::SP))
+ .addReg(ScratchReg0)
+ .addReg(ScratchReg1);
+ }
+
+ // Update the CFA offset now that we've popped
+ CFIIndex = MMI.addFrameInst(MCCFIInstruction::createDefCfaOffset(nullptr, 0));
+ BuildMI(AllocMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex);
+
+ // bx lr - Return from this function.
+ Opcode = Thumb ? ARM::tBX_RET : ARM::BX_RET;
+ AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(Opcode)));
+
+ // Restore SR0 and SR1 in case of __morestack() was not called.
+ // pop {SR0, SR1}
+ if (Thumb) {
+ AddDefaultPred(BuildMI(PostStackMBB, DL, TII.get(ARM::tPOP)))
+ .addReg(ScratchReg0)
+ .addReg(ScratchReg1);
+ } else {
+ AddDefaultPred(BuildMI(PostStackMBB, DL, TII.get(ARM::LDMIA_UPD))
+ .addReg(ARM::SP, RegState::Define)
+ .addReg(ARM::SP))
+ .addReg(ScratchReg0)
+ .addReg(ScratchReg1);
+ }
+
+ // Update the CFA offset now that we've popped
+ CFIIndex = MMI.addFrameInst(MCCFIInstruction::createDefCfaOffset(nullptr, 0));
+ BuildMI(PostStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex);
+
+ // Tell debuggers that r4 and r5 are now the same as they were in the
+ // previous function, that they're the "Same Value".
+ CFIIndex = MMI.addFrameInst(MCCFIInstruction::createSameValue(
+ nullptr, MRI->getDwarfRegNum(ScratchReg0, true)));
+ BuildMI(PostStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex);
+ CFIIndex = MMI.addFrameInst(MCCFIInstruction::createSameValue(
+ nullptr, MRI->getDwarfRegNum(ScratchReg1, true)));
+ BuildMI(PostStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex);
+
+ // Organizing MBB lists
+ PostStackMBB->addSuccessor(&PrologueMBB);
+
+ AllocMBB->addSuccessor(PostStackMBB);
+
+ GetMBB->addSuccessor(PostStackMBB);
+ GetMBB->addSuccessor(AllocMBB);
+
+ McrMBB->addSuccessor(GetMBB);
+
+ PrevStackMBB->addSuccessor(McrMBB);
+
+#ifdef XDEBUG
+ MF.verify();
+#endif
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMFrameLowering.h b/contrib/llvm/lib/Target/ARM/ARMFrameLowering.h
new file mode 100644
index 0000000..66f4dfb
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMFrameLowering.h
@@ -0,0 +1,85 @@
+//==-- ARMTargetFrameLowering.h - Define frame lowering for ARM --*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_ARM_ARMFRAMELOWERING_H
+#define LLVM_LIB_TARGET_ARM_ARMFRAMELOWERING_H
+
+#include "llvm/Target/TargetFrameLowering.h"
+
+namespace llvm {
+ class ARMSubtarget;
+
+class ARMFrameLowering : public TargetFrameLowering {
+protected:
+ const ARMSubtarget &STI;
+
+public:
+ explicit ARMFrameLowering(const ARMSubtarget &sti);
+
+ /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
+ /// the function.
+ void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
+ void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
+
+ bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const override;
+
+ bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const override;
+
+ bool noFramePointerElim(const MachineFunction &MF) const override;
+
+ bool hasFP(const MachineFunction &MF) const override;
+ bool hasReservedCallFrame(const MachineFunction &MF) const override;
+ bool canSimplifyCallFramePseudos(const MachineFunction &MF) const override;
+ int getFrameIndexReference(const MachineFunction &MF, int FI,
+ unsigned &FrameReg) const override;
+ int ResolveFrameIndexReference(const MachineFunction &MF, int FI,
+ unsigned &FrameReg, int SPAdj) const;
+
+ void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs,
+ RegScavenger *RS) const override;
+
+ void adjustForSegmentedStacks(MachineFunction &MF,
+ MachineBasicBlock &MBB) const override;
+
+ /// Returns true if the target will correctly handle shrink wrapping.
+ bool enableShrinkWrapping(const MachineFunction &MF) const override {
+ return true;
+ }
+
+ private:
+ void emitPushInst(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI, unsigned StmOpc,
+ unsigned StrOpc, bool NoGap,
+ bool(*Func)(unsigned, bool), unsigned NumAlignedDPRCS2Regs,
+ unsigned MIFlags = 0) const;
+ void emitPopInst(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI, unsigned LdmOpc,
+ unsigned LdrOpc, bool isVarArg, bool NoGap,
+ bool(*Func)(unsigned, bool),
+ unsigned NumAlignedDPRCS2Regs) const;
+
+ void
+ eliminateCallFramePseudoInstr(MachineFunction &MF,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI) const override;
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/ARM/ARMHazardRecognizer.cpp b/contrib/llvm/lib/Target/ARM/ARMHazardRecognizer.cpp
new file mode 100644
index 0000000..0157c0a
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMHazardRecognizer.cpp
@@ -0,0 +1,102 @@
+//===-- ARMHazardRecognizer.cpp - ARM postra hazard recognizer ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARMHazardRecognizer.h"
+#include "ARMBaseInstrInfo.h"
+#include "ARMBaseRegisterInfo.h"
+#include "ARMSubtarget.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+using namespace llvm;
+
+static bool hasRAWHazard(MachineInstr *DefMI, MachineInstr *MI,
+ const TargetRegisterInfo &TRI) {
+ // FIXME: Detect integer instructions properly.
+ const MCInstrDesc &MCID = MI->getDesc();
+ unsigned Domain = MCID.TSFlags & ARMII::DomainMask;
+ if (MI->mayStore())
+ return false;
+ unsigned Opcode = MCID.getOpcode();
+ if (Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD)
+ return false;
+ if ((Domain & ARMII::DomainVFP) || (Domain & ARMII::DomainNEON))
+ return MI->readsRegister(DefMI->getOperand(0).getReg(), &TRI);
+ return false;
+}
+
+ScheduleHazardRecognizer::HazardType
+ARMHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
+ assert(Stalls == 0 && "ARM hazards don't support scoreboard lookahead");
+
+ MachineInstr *MI = SU->getInstr();
+
+ if (!MI->isDebugValue()) {
+ // Look for special VMLA / VMLS hazards. A VMUL / VADD / VSUB following
+ // a VMLA / VMLS will cause 4 cycle stall.
+ const MCInstrDesc &MCID = MI->getDesc();
+ if (LastMI && (MCID.TSFlags & ARMII::DomainMask) != ARMII::DomainGeneral) {
+ MachineInstr *DefMI = LastMI;
+ const MCInstrDesc &LastMCID = LastMI->getDesc();
+ const MachineFunction *MF = MI->getParent()->getParent();
+ const ARMBaseInstrInfo &TII = *static_cast<const ARMBaseInstrInfo *>(
+ MF->getSubtarget().getInstrInfo());
+
+ // Skip over one non-VFP / NEON instruction.
+ if (!LastMI->isBarrier() &&
+ // On A9, AGU and NEON/FPU are muxed.
+ !(TII.getSubtarget().isLikeA9() && LastMI->mayLoadOrStore()) &&
+ (LastMCID.TSFlags & ARMII::DomainMask) == ARMII::DomainGeneral) {
+ MachineBasicBlock::iterator I = LastMI;
+ if (I != LastMI->getParent()->begin()) {
+ I = std::prev(I);
+ DefMI = &*I;
+ }
+ }
+
+ if (TII.isFpMLxInstruction(DefMI->getOpcode()) &&
+ (TII.canCauseFpMLxStall(MI->getOpcode()) ||
+ hasRAWHazard(DefMI, MI, TII.getRegisterInfo()))) {
+ // Try to schedule another instruction for the next 4 cycles.
+ if (FpMLxStalls == 0)
+ FpMLxStalls = 4;
+ return Hazard;
+ }
+ }
+ }
+
+ return ScoreboardHazardRecognizer::getHazardType(SU, Stalls);
+}
+
+void ARMHazardRecognizer::Reset() {
+ LastMI = nullptr;
+ FpMLxStalls = 0;
+ ScoreboardHazardRecognizer::Reset();
+}
+
+void ARMHazardRecognizer::EmitInstruction(SUnit *SU) {
+ MachineInstr *MI = SU->getInstr();
+ if (!MI->isDebugValue()) {
+ LastMI = MI;
+ FpMLxStalls = 0;
+ }
+
+ ScoreboardHazardRecognizer::EmitInstruction(SU);
+}
+
+void ARMHazardRecognizer::AdvanceCycle() {
+ if (FpMLxStalls && --FpMLxStalls == 0)
+ // Stalled for 4 cycles but still can't schedule any other instructions.
+ LastMI = nullptr;
+ ScoreboardHazardRecognizer::AdvanceCycle();
+}
+
+void ARMHazardRecognizer::RecedeCycle() {
+ llvm_unreachable("reverse ARM hazard checking unsupported");
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMHazardRecognizer.h b/contrib/llvm/lib/Target/ARM/ARMHazardRecognizer.h
new file mode 100644
index 0000000..ccf09db
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMHazardRecognizer.h
@@ -0,0 +1,49 @@
+//===-- ARMHazardRecognizer.h - ARM Hazard Recognizers ----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines hazard recognizers for scheduling ARM functions.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_ARM_ARMHAZARDRECOGNIZER_H
+#define LLVM_LIB_TARGET_ARM_ARMHAZARDRECOGNIZER_H
+
+#include "llvm/CodeGen/ScoreboardHazardRecognizer.h"
+
+namespace llvm {
+
+class ARMBaseInstrInfo;
+class ARMBaseRegisterInfo;
+class ARMSubtarget;
+class MachineInstr;
+
+/// ARMHazardRecognizer handles special constraints that are not expressed in
+/// the scheduling itinerary. This is only used during postRA scheduling. The
+/// ARM preRA scheduler uses an unspecialized instance of the
+/// ScoreboardHazardRecognizer.
+class ARMHazardRecognizer : public ScoreboardHazardRecognizer {
+ MachineInstr *LastMI;
+ unsigned FpMLxStalls;
+
+public:
+ ARMHazardRecognizer(const InstrItineraryData *ItinData,
+ const ScheduleDAG *DAG)
+ : ScoreboardHazardRecognizer(ItinData, DAG, "post-RA-sched"),
+ LastMI(nullptr) {}
+
+ HazardType getHazardType(SUnit *SU, int Stalls) override;
+ void Reset() override;
+ void EmitInstruction(SUnit *SU) override;
+ void AdvanceCycle() override;
+ void RecedeCycle() override;
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp b/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
new file mode 100644
index 0000000..0242440
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -0,0 +1,3954 @@
+//===-- ARMISelDAGToDAG.cpp - A dag to dag inst selector for ARM ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines an instruction selector for the ARM target.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARM.h"
+#include "ARMBaseInstrInfo.h"
+#include "ARMTargetMachine.h"
+#include "MCTargetDesc/ARMAddressingModes.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetOptions.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "arm-isel"
+
+static cl::opt<bool>
+DisableShifterOp("disable-shifter-op", cl::Hidden,
+ cl::desc("Disable isel of shifter-op"),
+ cl::init(false));
+
+static cl::opt<bool>
+CheckVMLxHazard("check-vmlx-hazard", cl::Hidden,
+ cl::desc("Check fp vmla / vmls hazard at isel time"),
+ cl::init(true));
+
+//===--------------------------------------------------------------------===//
+/// ARMDAGToDAGISel - ARM specific code to select ARM machine
+/// instructions for SelectionDAG operations.
+///
+namespace {
+
+enum AddrMode2Type {
+ AM2_BASE, // Simple AM2 (+-imm12)
+ AM2_SHOP // Shifter-op AM2
+};
+
+class ARMDAGToDAGISel : public SelectionDAGISel {
+ /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
+ /// make the right decision when generating code for different targets.
+ const ARMSubtarget *Subtarget;
+
+public:
+ explicit ARMDAGToDAGISel(ARMBaseTargetMachine &tm, CodeGenOpt::Level OptLevel)
+ : SelectionDAGISel(tm, OptLevel) {}
+
+ bool runOnMachineFunction(MachineFunction &MF) override {
+ // Reset the subtarget each time through.
+ Subtarget = &MF.getSubtarget<ARMSubtarget>();
+ SelectionDAGISel::runOnMachineFunction(MF);
+ return true;
+ }
+
+ const char *getPassName() const override {
+ return "ARM Instruction Selection";
+ }
+
+ void PreprocessISelDAG() override;
+
+ /// getI32Imm - Return a target constant of type i32 with the specified
+ /// value.
+ inline SDValue getI32Imm(unsigned Imm, SDLoc dl) {
+ return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
+ }
+
+ SDNode *Select(SDNode *N) override;
+
+
+ bool hasNoVMLxHazardUse(SDNode *N) const;
+ bool isShifterOpProfitable(const SDValue &Shift,
+ ARM_AM::ShiftOpc ShOpcVal, unsigned ShAmt);
+ bool SelectRegShifterOperand(SDValue N, SDValue &A,
+ SDValue &B, SDValue &C,
+ bool CheckProfitability = true);
+ bool SelectImmShifterOperand(SDValue N, SDValue &A,
+ SDValue &B, bool CheckProfitability = true);
+ bool SelectShiftRegShifterOperand(SDValue N, SDValue &A,
+ SDValue &B, SDValue &C) {
+ // Don't apply the profitability check
+ return SelectRegShifterOperand(N, A, B, C, false);
+ }
+ bool SelectShiftImmShifterOperand(SDValue N, SDValue &A,
+ SDValue &B) {
+ // Don't apply the profitability check
+ return SelectImmShifterOperand(N, A, B, false);
+ }
+
+ bool SelectAddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
+ bool SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, SDValue &Opc);
+
+ AddrMode2Type SelectAddrMode2Worker(SDValue N, SDValue &Base,
+ SDValue &Offset, SDValue &Opc);
+ bool SelectAddrMode2Base(SDValue N, SDValue &Base, SDValue &Offset,
+ SDValue &Opc) {
+ return SelectAddrMode2Worker(N, Base, Offset, Opc) == AM2_BASE;
+ }
+
+ bool SelectAddrMode2ShOp(SDValue N, SDValue &Base, SDValue &Offset,
+ SDValue &Opc) {
+ return SelectAddrMode2Worker(N, Base, Offset, Opc) == AM2_SHOP;
+ }
+
+ bool SelectAddrMode2(SDValue N, SDValue &Base, SDValue &Offset,
+ SDValue &Opc) {
+ SelectAddrMode2Worker(N, Base, Offset, Opc);
+// return SelectAddrMode2ShOp(N, Base, Offset, Opc);
+ // This always matches one way or another.
+ return true;
+ }
+
+ bool SelectCMOVPred(SDValue N, SDValue &Pred, SDValue &Reg) {
+ const ConstantSDNode *CN = cast<ConstantSDNode>(N);
+ Pred = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(N), MVT::i32);
+ Reg = CurDAG->getRegister(ARM::CPSR, MVT::i32);
+ return true;
+ }
+
+ bool SelectAddrMode2OffsetReg(SDNode *Op, SDValue N,
+ SDValue &Offset, SDValue &Opc);
+ bool SelectAddrMode2OffsetImm(SDNode *Op, SDValue N,
+ SDValue &Offset, SDValue &Opc);
+ bool SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N,
+ SDValue &Offset, SDValue &Opc);
+ bool SelectAddrOffsetNone(SDValue N, SDValue &Base);
+ bool SelectAddrMode3(SDValue N, SDValue &Base,
+ SDValue &Offset, SDValue &Opc);
+ bool SelectAddrMode3Offset(SDNode *Op, SDValue N,
+ SDValue &Offset, SDValue &Opc);
+ bool SelectAddrMode5(SDValue N, SDValue &Base,
+ SDValue &Offset);
+ bool SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,SDValue &Align);
+ bool SelectAddrMode6Offset(SDNode *Op, SDValue N, SDValue &Offset);
+
+ bool SelectAddrModePC(SDValue N, SDValue &Offset, SDValue &Label);
+
+ // Thumb Addressing Modes:
+ bool SelectThumbAddrModeRR(SDValue N, SDValue &Base, SDValue &Offset);
+ bool SelectThumbAddrModeImm5S(SDValue N, unsigned Scale, SDValue &Base,
+ SDValue &OffImm);
+ bool SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base,
+ SDValue &OffImm);
+ bool SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base,
+ SDValue &OffImm);
+ bool SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base,
+ SDValue &OffImm);
+ bool SelectThumbAddrModeSP(SDValue N, SDValue &Base, SDValue &OffImm);
+
+ // Thumb 2 Addressing Modes:
+ bool SelectT2AddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
+ bool SelectT2AddrModeImm8(SDValue N, SDValue &Base,
+ SDValue &OffImm);
+ bool SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N,
+ SDValue &OffImm);
+ bool SelectT2AddrModeSoReg(SDValue N, SDValue &Base,
+ SDValue &OffReg, SDValue &ShImm);
+ bool SelectT2AddrModeExclusive(SDValue N, SDValue &Base, SDValue &OffImm);
+
+ inline bool is_so_imm(unsigned Imm) const {
+ return ARM_AM::getSOImmVal(Imm) != -1;
+ }
+
+ inline bool is_so_imm_not(unsigned Imm) const {
+ return ARM_AM::getSOImmVal(~Imm) != -1;
+ }
+
+ inline bool is_t2_so_imm(unsigned Imm) const {
+ return ARM_AM::getT2SOImmVal(Imm) != -1;
+ }
+
+ inline bool is_t2_so_imm_not(unsigned Imm) const {
+ return ARM_AM::getT2SOImmVal(~Imm) != -1;
+ }
+
+ // Include the pieces autogenerated from the target description.
+#include "ARMGenDAGISel.inc"
+
+private:
+ /// SelectARMIndexedLoad - Indexed (pre/post inc/dec) load matching code for
+ /// ARM.
+ SDNode *SelectARMIndexedLoad(SDNode *N);
+ SDNode *SelectT2IndexedLoad(SDNode *N);
+
+ /// SelectVLD - Select NEON load intrinsics. NumVecs should be
+ /// 1, 2, 3 or 4. The opcode arrays specify the instructions used for
+ /// loads of D registers and even subregs and odd subregs of Q registers.
+ /// For NumVecs <= 2, QOpcodes1 is not used.
+ SDNode *SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
+ const uint16_t *DOpcodes,
+ const uint16_t *QOpcodes0, const uint16_t *QOpcodes1);
+
+ /// SelectVST - Select NEON store intrinsics. NumVecs should
+ /// be 1, 2, 3 or 4. The opcode arrays specify the instructions used for
+ /// stores of D registers and even subregs and odd subregs of Q registers.
+ /// For NumVecs <= 2, QOpcodes1 is not used.
+ SDNode *SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
+ const uint16_t *DOpcodes,
+ const uint16_t *QOpcodes0, const uint16_t *QOpcodes1);
+
+ /// SelectVLDSTLane - Select NEON load/store lane intrinsics. NumVecs should
+ /// be 2, 3 or 4. The opcode arrays specify the instructions used for
+ /// load/store of D registers and Q registers.
+ SDNode *SelectVLDSTLane(SDNode *N, bool IsLoad,
+ bool isUpdating, unsigned NumVecs,
+ const uint16_t *DOpcodes, const uint16_t *QOpcodes);
+
+ /// SelectVLDDup - Select NEON load-duplicate intrinsics. NumVecs
+ /// should be 2, 3 or 4. The opcode array specifies the instructions used
+ /// for loading D registers. (Q registers are not supported.)
+ SDNode *SelectVLDDup(SDNode *N, bool isUpdating, unsigned NumVecs,
+ const uint16_t *Opcodes);
+
+ /// SelectVTBL - Select NEON VTBL and VTBX intrinsics. NumVecs should be 2,
+ /// 3 or 4. These are custom-selected so that a REG_SEQUENCE can be
+ /// generated to force the table registers to be consecutive.
+ SDNode *SelectVTBL(SDNode *N, bool IsExt, unsigned NumVecs, unsigned Opc);
+
+ /// SelectV6T2BitfieldExtractOp - Select SBFX/UBFX instructions for ARM.
+ SDNode *SelectV6T2BitfieldExtractOp(SDNode *N, bool isSigned);
+
+ // Select special operations if node forms integer ABS pattern
+ SDNode *SelectABSOp(SDNode *N);
+
+ SDNode *SelectReadRegister(SDNode *N);
+ SDNode *SelectWriteRegister(SDNode *N);
+
+ SDNode *SelectInlineAsm(SDNode *N);
+
+ SDNode *SelectConcatVector(SDNode *N);
+
+ /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
+ /// inline asm expressions.
+ bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
+ std::vector<SDValue> &OutOps) override;
+
+ // Form pairs of consecutive R, S, D, or Q registers.
+ SDNode *createGPRPairNode(EVT VT, SDValue V0, SDValue V1);
+ SDNode *createSRegPairNode(EVT VT, SDValue V0, SDValue V1);
+ SDNode *createDRegPairNode(EVT VT, SDValue V0, SDValue V1);
+ SDNode *createQRegPairNode(EVT VT, SDValue V0, SDValue V1);
+
+ // Form sequences of 4 consecutive S, D, or Q registers.
+ SDNode *createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
+ SDNode *createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
+ SDNode *createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
+
+ // Get the alignment operand for a NEON VLD or VST instruction.
+ SDValue GetVLDSTAlign(SDValue Align, SDLoc dl, unsigned NumVecs,
+ bool is64BitVector);
+
+ /// Returns the number of instructions required to materialize the given
+ /// constant in a register, or 3 if a literal pool load is needed.
+ unsigned ConstantMaterializationCost(unsigned Val) const;
+
+ /// Checks if N is a multiplication by a constant where we can extract out a
+ /// power of two from the constant so that it can be used in a shift, but only
+ /// if it simplifies the materialization of the constant. Returns true if it
+ /// is, and assigns to PowerOfTwo the power of two that should be extracted
+ /// out and to NewMulConst the new constant to be multiplied by.
+ bool canExtractShiftFromMul(const SDValue &N, unsigned MaxShift,
+ unsigned &PowerOfTwo, SDValue &NewMulConst) const;
+
+ /// Replace N with M in CurDAG, in a way that also ensures that M gets
+ /// selected when N would have been selected.
+ void replaceDAGValue(const SDValue &N, SDValue M);
+};
+}
+
+/// isInt32Immediate - This method tests to see if the node is a 32-bit constant
+/// operand. If so Imm will receive the 32-bit value.
+static bool isInt32Immediate(SDNode *N, unsigned &Imm) {
+ if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i32) {
+ Imm = cast<ConstantSDNode>(N)->getZExtValue();
+ return true;
+ }
+ return false;
+}
+
+// isInt32Immediate - This method tests to see if a constant operand.
+// If so Imm will receive the 32 bit value.
+static bool isInt32Immediate(SDValue N, unsigned &Imm) {
+ return isInt32Immediate(N.getNode(), Imm);
+}
+
+// isOpcWithIntImmediate - This method tests to see if the node is a specific
+// opcode and that it has a immediate integer right operand.
+// If so Imm will receive the 32 bit value.
+static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) {
+ return N->getOpcode() == Opc &&
+ isInt32Immediate(N->getOperand(1).getNode(), Imm);
+}
+
+/// \brief Check whether a particular node is a constant value representable as
+/// (N * Scale) where (N in [\p RangeMin, \p RangeMax).
+///
+/// \param ScaledConstant [out] - On success, the pre-scaled constant value.
+static bool isScaledConstantInRange(SDValue Node, int Scale,
+ int RangeMin, int RangeMax,
+ int &ScaledConstant) {
+ assert(Scale > 0 && "Invalid scale!");
+
+ // Check that this is a constant.
+ const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Node);
+ if (!C)
+ return false;
+
+ ScaledConstant = (int) C->getZExtValue();
+ if ((ScaledConstant % Scale) != 0)
+ return false;
+
+ ScaledConstant /= Scale;
+ return ScaledConstant >= RangeMin && ScaledConstant < RangeMax;
+}
+
+void ARMDAGToDAGISel::PreprocessISelDAG() {
+ if (!Subtarget->hasV6T2Ops())
+ return;
+
+ bool isThumb2 = Subtarget->isThumb();
+ for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
+ E = CurDAG->allnodes_end(); I != E; ) {
+ SDNode *N = &*I++; // Preincrement iterator to avoid invalidation issues.
+
+ if (N->getOpcode() != ISD::ADD)
+ continue;
+
+ // Look for (add X1, (and (srl X2, c1), c2)) where c2 is constant with
+ // leading zeros, followed by consecutive set bits, followed by 1 or 2
+ // trailing zeros, e.g. 1020.
+ // Transform the expression to
+ // (add X1, (shl (and (srl X2, c1), (c2>>tz)), tz)) where tz is the number
+ // of trailing zeros of c2. The left shift would be folded as an shifter
+ // operand of 'add' and the 'and' and 'srl' would become a bits extraction
+ // node (UBFX).
+
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ unsigned And_imm = 0;
+ if (!isOpcWithIntImmediate(N1.getNode(), ISD::AND, And_imm)) {
+ if (isOpcWithIntImmediate(N0.getNode(), ISD::AND, And_imm))
+ std::swap(N0, N1);
+ }
+ if (!And_imm)
+ continue;
+
+ // Check if the AND mask is an immediate of the form: 000.....1111111100
+ unsigned TZ = countTrailingZeros(And_imm);
+ if (TZ != 1 && TZ != 2)
+ // Be conservative here. Shifter operands aren't always free. e.g. On
+ // Swift, left shifter operand of 1 / 2 for free but others are not.
+ // e.g.
+ // ubfx r3, r1, #16, #8
+ // ldr.w r3, [r0, r3, lsl #2]
+ // vs.
+ // mov.w r9, #1020
+ // and.w r2, r9, r1, lsr #14
+ // ldr r2, [r0, r2]
+ continue;
+ And_imm >>= TZ;
+ if (And_imm & (And_imm + 1))
+ continue;
+
+ // Look for (and (srl X, c1), c2).
+ SDValue Srl = N1.getOperand(0);
+ unsigned Srl_imm = 0;
+ if (!isOpcWithIntImmediate(Srl.getNode(), ISD::SRL, Srl_imm) ||
+ (Srl_imm <= 2))
+ continue;
+
+ // Make sure first operand is not a shifter operand which would prevent
+ // folding of the left shift.
+ SDValue CPTmp0;
+ SDValue CPTmp1;
+ SDValue CPTmp2;
+ if (isThumb2) {
+ if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1))
+ continue;
+ } else {
+ if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1) ||
+ SelectRegShifterOperand(N0, CPTmp0, CPTmp1, CPTmp2))
+ continue;
+ }
+
+ // Now make the transformation.
+ Srl = CurDAG->getNode(ISD::SRL, SDLoc(Srl), MVT::i32,
+ Srl.getOperand(0),
+ CurDAG->getConstant(Srl_imm + TZ, SDLoc(Srl),
+ MVT::i32));
+ N1 = CurDAG->getNode(ISD::AND, SDLoc(N1), MVT::i32,
+ Srl,
+ CurDAG->getConstant(And_imm, SDLoc(Srl), MVT::i32));
+ N1 = CurDAG->getNode(ISD::SHL, SDLoc(N1), MVT::i32,
+ N1, CurDAG->getConstant(TZ, SDLoc(Srl), MVT::i32));
+ CurDAG->UpdateNodeOperands(N, N0, N1);
+ }
+}
+
+/// hasNoVMLxHazardUse - Return true if it's desirable to select a FP MLA / MLS
+/// node. VFP / NEON fp VMLA / VMLS instructions have special RAW hazards (at
+/// least on current ARM implementations) which should be avoidded.
+bool ARMDAGToDAGISel::hasNoVMLxHazardUse(SDNode *N) const {
+ if (OptLevel == CodeGenOpt::None)
+ return true;
+
+ if (!CheckVMLxHazard)
+ return true;
+
+ if (!Subtarget->isCortexA7() && !Subtarget->isCortexA8() &&
+ !Subtarget->isCortexA9() && !Subtarget->isSwift())
+ return true;
+
+ if (!N->hasOneUse())
+ return false;
+
+ SDNode *Use = *N->use_begin();
+ if (Use->getOpcode() == ISD::CopyToReg)
+ return true;
+ if (Use->isMachineOpcode()) {
+ const ARMBaseInstrInfo *TII = static_cast<const ARMBaseInstrInfo *>(
+ CurDAG->getSubtarget().getInstrInfo());
+
+ const MCInstrDesc &MCID = TII->get(Use->getMachineOpcode());
+ if (MCID.mayStore())
+ return true;
+ unsigned Opcode = MCID.getOpcode();
+ if (Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD)
+ return true;
+ // vmlx feeding into another vmlx. We actually want to unfold
+ // the use later in the MLxExpansion pass. e.g.
+ // vmla
+ // vmla (stall 8 cycles)
+ //
+ // vmul (5 cycles)
+ // vadd (5 cycles)
+ // vmla
+ // This adds up to about 18 - 19 cycles.
+ //
+ // vmla
+ // vmul (stall 4 cycles)
+ // vadd adds up to about 14 cycles.
+ return TII->isFpMLxInstruction(Opcode);
+ }
+
+ return false;
+}
+
+bool ARMDAGToDAGISel::isShifterOpProfitable(const SDValue &Shift,
+ ARM_AM::ShiftOpc ShOpcVal,
+ unsigned ShAmt) {
+ if (!Subtarget->isLikeA9() && !Subtarget->isSwift())
+ return true;
+ if (Shift.hasOneUse())
+ return true;
+ // R << 2 is free.
+ return ShOpcVal == ARM_AM::lsl &&
+ (ShAmt == 2 || (Subtarget->isSwift() && ShAmt == 1));
+}
+
+unsigned ARMDAGToDAGISel::ConstantMaterializationCost(unsigned Val) const {
+ if (Subtarget->isThumb()) {
+ if (Val <= 255) return 1; // MOV
+ if (Subtarget->hasV6T2Ops() && Val <= 0xffff) return 1; // MOVW
+ if (~Val <= 255) return 2; // MOV + MVN
+ if (ARM_AM::isThumbImmShiftedVal(Val)) return 2; // MOV + LSL
+ } else {
+ if (ARM_AM::getSOImmVal(Val) != -1) return 1; // MOV
+ if (ARM_AM::getSOImmVal(~Val) != -1) return 1; // MVN
+ if (Subtarget->hasV6T2Ops() && Val <= 0xffff) return 1; // MOVW
+ if (ARM_AM::isSOImmTwoPartVal(Val)) return 2; // two instrs
+ }
+ if (Subtarget->useMovt(*MF)) return 2; // MOVW + MOVT
+ return 3; // Literal pool load
+}
+
+bool ARMDAGToDAGISel::canExtractShiftFromMul(const SDValue &N,
+ unsigned MaxShift,
+ unsigned &PowerOfTwo,
+ SDValue &NewMulConst) const {
+ assert(N.getOpcode() == ISD::MUL);
+ assert(MaxShift > 0);
+
+ // If the multiply is used in more than one place then changing the constant
+ // will make other uses incorrect, so don't.
+ if (!N.hasOneUse()) return false;
+ // Check if the multiply is by a constant
+ ConstantSDNode *MulConst = dyn_cast<ConstantSDNode>(N.getOperand(1));
+ if (!MulConst) return false;
+ // If the constant is used in more than one place then modifying it will mean
+ // we need to materialize two constants instead of one, which is a bad idea.
+ if (!MulConst->hasOneUse()) return false;
+ unsigned MulConstVal = MulConst->getZExtValue();
+ if (MulConstVal == 0) return false;
+
+ // Find the largest power of 2 that MulConstVal is a multiple of
+ PowerOfTwo = MaxShift;
+ while ((MulConstVal % (1 << PowerOfTwo)) != 0) {
+ --PowerOfTwo;
+ if (PowerOfTwo == 0) return false;
+ }
+
+ // Only optimise if the new cost is better
+ unsigned NewMulConstVal = MulConstVal / (1 << PowerOfTwo);
+ NewMulConst = CurDAG->getConstant(NewMulConstVal, SDLoc(N), MVT::i32);
+ unsigned OldCost = ConstantMaterializationCost(MulConstVal);
+ unsigned NewCost = ConstantMaterializationCost(NewMulConstVal);
+ return NewCost < OldCost;
+}
+
+void ARMDAGToDAGISel::replaceDAGValue(const SDValue &N, SDValue M) {
+ CurDAG->RepositionNode(N.getNode()->getIterator(), M.getNode());
+ CurDAG->ReplaceAllUsesWith(N, M);
+}
+
+bool ARMDAGToDAGISel::SelectImmShifterOperand(SDValue N,
+ SDValue &BaseReg,
+ SDValue &Opc,
+ bool CheckProfitability) {
+ if (DisableShifterOp)
+ return false;
+
+ // If N is a multiply-by-constant and it's profitable to extract a shift and
+ // use it in a shifted operand do so.
+ if (N.getOpcode() == ISD::MUL) {
+ unsigned PowerOfTwo = 0;
+ SDValue NewMulConst;
+ if (canExtractShiftFromMul(N, 31, PowerOfTwo, NewMulConst)) {
+ BaseReg = SDValue(Select(CurDAG->getNode(ISD::MUL, SDLoc(N), MVT::i32,
+ N.getOperand(0), NewMulConst)
+ .getNode()),
+ 0);
+ replaceDAGValue(N.getOperand(1), NewMulConst);
+ Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ARM_AM::lsl,
+ PowerOfTwo),
+ SDLoc(N), MVT::i32);
+ return true;
+ }
+ }
+
+ ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
+
+ // Don't match base register only case. That is matched to a separate
+ // lower complexity pattern with explicit register operand.
+ if (ShOpcVal == ARM_AM::no_shift) return false;
+
+ BaseReg = N.getOperand(0);
+ unsigned ShImmVal = 0;
+ ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
+ if (!RHS) return false;
+ ShImmVal = RHS->getZExtValue() & 31;
+ Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal),
+ SDLoc(N), MVT::i32);
+ return true;
+}
+
+bool ARMDAGToDAGISel::SelectRegShifterOperand(SDValue N,
+ SDValue &BaseReg,
+ SDValue &ShReg,
+ SDValue &Opc,
+ bool CheckProfitability) {
+ if (DisableShifterOp)
+ return false;
+
+ ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
+
+ // Don't match base register only case. That is matched to a separate
+ // lower complexity pattern with explicit register operand.
+ if (ShOpcVal == ARM_AM::no_shift) return false;
+
+ BaseReg = N.getOperand(0);
+ unsigned ShImmVal = 0;
+ ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
+ if (RHS) return false;
+
+ ShReg = N.getOperand(1);
+ if (CheckProfitability && !isShifterOpProfitable(N, ShOpcVal, ShImmVal))
+ return false;
+ Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal),
+ SDLoc(N), MVT::i32);
+ return true;
+}
+
+
+bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N,
+ SDValue &Base,
+ SDValue &OffImm) {
+ // Match simple R + imm12 operands.
+
+ // Base only.
+ if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
+ !CurDAG->isBaseWithConstantOffset(N)) {
+ if (N.getOpcode() == ISD::FrameIndex) {
+ // Match frame index.
+ int FI = cast<FrameIndexSDNode>(N)->getIndex();
+ Base = CurDAG->getTargetFrameIndex(
+ FI, TLI->getPointerTy(CurDAG->getDataLayout()));
+ OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
+ return true;
+ }
+
+ if (N.getOpcode() == ARMISD::Wrapper &&
+ N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress) {
+ Base = N.getOperand(0);
+ } else
+ Base = N;
+ OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
+ return true;
+ }
+
+ if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
+ int RHSC = (int)RHS->getSExtValue();
+ if (N.getOpcode() == ISD::SUB)
+ RHSC = -RHSC;
+
+ if (RHSC > -0x1000 && RHSC < 0x1000) { // 12 bits
+ Base = N.getOperand(0);
+ if (Base.getOpcode() == ISD::FrameIndex) {
+ int FI = cast<FrameIndexSDNode>(Base)->getIndex();
+ Base = CurDAG->getTargetFrameIndex(
+ FI, TLI->getPointerTy(CurDAG->getDataLayout()));
+ }
+ OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
+ return true;
+ }
+ }
+
+ // Base only.
+ Base = N;
+ OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
+ return true;
+}
+
+
+
+bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset,
+ SDValue &Opc) {
+ if (N.getOpcode() == ISD::MUL &&
+ ((!Subtarget->isLikeA9() && !Subtarget->isSwift()) || N.hasOneUse())) {
+ if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
+ // X * [3,5,9] -> X + X * [2,4,8] etc.
+ int RHSC = (int)RHS->getZExtValue();
+ if (RHSC & 1) {
+ RHSC = RHSC & ~1;
+ ARM_AM::AddrOpc AddSub = ARM_AM::add;
+ if (RHSC < 0) {
+ AddSub = ARM_AM::sub;
+ RHSC = - RHSC;
+ }
+ if (isPowerOf2_32(RHSC)) {
+ unsigned ShAmt = Log2_32(RHSC);
+ Base = Offset = N.getOperand(0);
+ Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt,
+ ARM_AM::lsl),
+ SDLoc(N), MVT::i32);
+ return true;
+ }
+ }
+ }
+ }
+
+ if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
+ // ISD::OR that is equivalent to an ISD::ADD.
+ !CurDAG->isBaseWithConstantOffset(N))
+ return false;
+
+ // Leave simple R +/- imm12 operands for LDRi12
+ if (N.getOpcode() == ISD::ADD || N.getOpcode() == ISD::OR) {
+ int RHSC;
+ if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1,
+ -0x1000+1, 0x1000, RHSC)) // 12 bits.
+ return false;
+ }
+
+ // Otherwise this is R +/- [possibly shifted] R.
+ ARM_AM::AddrOpc AddSub = N.getOpcode() == ISD::SUB ? ARM_AM::sub:ARM_AM::add;
+ ARM_AM::ShiftOpc ShOpcVal =
+ ARM_AM::getShiftOpcForNode(N.getOperand(1).getOpcode());
+ unsigned ShAmt = 0;
+
+ Base = N.getOperand(0);
+ Offset = N.getOperand(1);
+
+ if (ShOpcVal != ARM_AM::no_shift) {
+ // Check to see if the RHS of the shift is a constant, if not, we can't fold
+ // it.
+ if (ConstantSDNode *Sh =
+ dyn_cast<ConstantSDNode>(N.getOperand(1).getOperand(1))) {
+ ShAmt = Sh->getZExtValue();
+ if (isShifterOpProfitable(Offset, ShOpcVal, ShAmt))
+ Offset = N.getOperand(1).getOperand(0);
+ else {
+ ShAmt = 0;
+ ShOpcVal = ARM_AM::no_shift;
+ }
+ } else {
+ ShOpcVal = ARM_AM::no_shift;
+ }
+ }
+
+ // Try matching (R shl C) + (R).
+ if (N.getOpcode() != ISD::SUB && ShOpcVal == ARM_AM::no_shift &&
+ !(Subtarget->isLikeA9() || Subtarget->isSwift() ||
+ N.getOperand(0).hasOneUse())) {
+ ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0).getOpcode());
+ if (ShOpcVal != ARM_AM::no_shift) {
+ // Check to see if the RHS of the shift is a constant, if not, we can't
+ // fold it.
+ if (ConstantSDNode *Sh =
+ dyn_cast<ConstantSDNode>(N.getOperand(0).getOperand(1))) {
+ ShAmt = Sh->getZExtValue();
+ if (isShifterOpProfitable(N.getOperand(0), ShOpcVal, ShAmt)) {
+ Offset = N.getOperand(0).getOperand(0);
+ Base = N.getOperand(1);
+ } else {
+ ShAmt = 0;
+ ShOpcVal = ARM_AM::no_shift;
+ }
+ } else {
+ ShOpcVal = ARM_AM::no_shift;
+ }
+ }
+ }
+
+ // If Offset is a multiply-by-constant and it's profitable to extract a shift
+ // and use it in a shifted operand do so.
+ if (Offset.getOpcode() == ISD::MUL) {
+ unsigned PowerOfTwo = 0;
+ SDValue NewMulConst;
+ if (canExtractShiftFromMul(Offset, 31, PowerOfTwo, NewMulConst)) {
+ replaceDAGValue(Offset.getOperand(1), NewMulConst);
+ ShAmt = PowerOfTwo;
+ ShOpcVal = ARM_AM::lsl;
+ }
+ }
+
+ Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
+ SDLoc(N), MVT::i32);
+ return true;
+}
+
+
+//-----
+
+AddrMode2Type ARMDAGToDAGISel::SelectAddrMode2Worker(SDValue N,
+ SDValue &Base,
+ SDValue &Offset,
+ SDValue &Opc) {
+ if (N.getOpcode() == ISD::MUL &&
+ (!(Subtarget->isLikeA9() || Subtarget->isSwift()) || N.hasOneUse())) {
+ if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
+ // X * [3,5,9] -> X + X * [2,4,8] etc.
+ int RHSC = (int)RHS->getZExtValue();
+ if (RHSC & 1) {
+ RHSC = RHSC & ~1;
+ ARM_AM::AddrOpc AddSub = ARM_AM::add;
+ if (RHSC < 0) {
+ AddSub = ARM_AM::sub;
+ RHSC = - RHSC;
+ }
+ if (isPowerOf2_32(RHSC)) {
+ unsigned ShAmt = Log2_32(RHSC);
+ Base = Offset = N.getOperand(0);
+ Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt,
+ ARM_AM::lsl),
+ SDLoc(N), MVT::i32);
+ return AM2_SHOP;
+ }
+ }
+ }
+ }
+
+ if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
+ // ISD::OR that is equivalent to an ADD.
+ !CurDAG->isBaseWithConstantOffset(N)) {
+ Base = N;
+ if (N.getOpcode() == ISD::FrameIndex) {
+ int FI = cast<FrameIndexSDNode>(N)->getIndex();
+ Base = CurDAG->getTargetFrameIndex(
+ FI, TLI->getPointerTy(CurDAG->getDataLayout()));
+ } else if (N.getOpcode() == ARMISD::Wrapper &&
+ N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress) {
+ Base = N.getOperand(0);
+ }
+ Offset = CurDAG->getRegister(0, MVT::i32);
+ Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(ARM_AM::add, 0,
+ ARM_AM::no_shift),
+ SDLoc(N), MVT::i32);
+ return AM2_BASE;
+ }
+
+ // Match simple R +/- imm12 operands.
+ if (N.getOpcode() != ISD::SUB) {
+ int RHSC;
+ if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1,
+ -0x1000+1, 0x1000, RHSC)) { // 12 bits.
+ Base = N.getOperand(0);
+ if (Base.getOpcode() == ISD::FrameIndex) {
+ int FI = cast<FrameIndexSDNode>(Base)->getIndex();
+ Base = CurDAG->getTargetFrameIndex(
+ FI, TLI->getPointerTy(CurDAG->getDataLayout()));
+ }
+ Offset = CurDAG->getRegister(0, MVT::i32);
+
+ ARM_AM::AddrOpc AddSub = ARM_AM::add;
+ if (RHSC < 0) {
+ AddSub = ARM_AM::sub;
+ RHSC = - RHSC;
+ }
+ Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, RHSC,
+ ARM_AM::no_shift),
+ SDLoc(N), MVT::i32);
+ return AM2_BASE;
+ }
+ }
+
+ if ((Subtarget->isLikeA9() || Subtarget->isSwift()) && !N.hasOneUse()) {
+ // Compute R +/- (R << N) and reuse it.
+ Base = N;
+ Offset = CurDAG->getRegister(0, MVT::i32);
+ Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(ARM_AM::add, 0,
+ ARM_AM::no_shift),
+ SDLoc(N), MVT::i32);
+ return AM2_BASE;
+ }
+
+ // Otherwise this is R +/- [possibly shifted] R.
+ ARM_AM::AddrOpc AddSub = N.getOpcode() != ISD::SUB ? ARM_AM::add:ARM_AM::sub;
+ ARM_AM::ShiftOpc ShOpcVal =
+ ARM_AM::getShiftOpcForNode(N.getOperand(1).getOpcode());
+ unsigned ShAmt = 0;
+
+ Base = N.getOperand(0);
+ Offset = N.getOperand(1);
+
+ if (ShOpcVal != ARM_AM::no_shift) {
+ // Check to see if the RHS of the shift is a constant, if not, we can't fold
+ // it.
+ if (ConstantSDNode *Sh =
+ dyn_cast<ConstantSDNode>(N.getOperand(1).getOperand(1))) {
+ ShAmt = Sh->getZExtValue();
+ if (isShifterOpProfitable(Offset, ShOpcVal, ShAmt))
+ Offset = N.getOperand(1).getOperand(0);
+ else {
+ ShAmt = 0;
+ ShOpcVal = ARM_AM::no_shift;
+ }
+ } else {
+ ShOpcVal = ARM_AM::no_shift;
+ }
+ }
+
+ // Try matching (R shl C) + (R).
+ if (N.getOpcode() != ISD::SUB && ShOpcVal == ARM_AM::no_shift &&
+ !(Subtarget->isLikeA9() || Subtarget->isSwift() ||
+ N.getOperand(0).hasOneUse())) {
+ ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0).getOpcode());
+ if (ShOpcVal != ARM_AM::no_shift) {
+ // Check to see if the RHS of the shift is a constant, if not, we can't
+ // fold it.
+ if (ConstantSDNode *Sh =
+ dyn_cast<ConstantSDNode>(N.getOperand(0).getOperand(1))) {
+ ShAmt = Sh->getZExtValue();
+ if (isShifterOpProfitable(N.getOperand(0), ShOpcVal, ShAmt)) {
+ Offset = N.getOperand(0).getOperand(0);
+ Base = N.getOperand(1);
+ } else {
+ ShAmt = 0;
+ ShOpcVal = ARM_AM::no_shift;
+ }
+ } else {
+ ShOpcVal = ARM_AM::no_shift;
+ }
+ }
+ }
+
+ Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
+ SDLoc(N), MVT::i32);
+ return AM2_SHOP;
+}
+
+bool ARMDAGToDAGISel::SelectAddrMode2OffsetReg(SDNode *Op, SDValue N,
+ SDValue &Offset, SDValue &Opc) {
+ unsigned Opcode = Op->getOpcode();
+ ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
+ ? cast<LoadSDNode>(Op)->getAddressingMode()
+ : cast<StoreSDNode>(Op)->getAddressingMode();
+ ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
+ ? ARM_AM::add : ARM_AM::sub;
+ int Val;
+ if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val))
+ return false;
+
+ Offset = N;
+ ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
+ unsigned ShAmt = 0;
+ if (ShOpcVal != ARM_AM::no_shift) {
+ // Check to see if the RHS of the shift is a constant, if not, we can't fold
+ // it.
+ if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
+ ShAmt = Sh->getZExtValue();
+ if (isShifterOpProfitable(N, ShOpcVal, ShAmt))
+ Offset = N.getOperand(0);
+ else {
+ ShAmt = 0;
+ ShOpcVal = ARM_AM::no_shift;
+ }
+ } else {
+ ShOpcVal = ARM_AM::no_shift;
+ }
+ }
+
+ Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
+ SDLoc(N), MVT::i32);
+ return true;
+}
+
+bool ARMDAGToDAGISel::SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N,
+ SDValue &Offset, SDValue &Opc) {
+ unsigned Opcode = Op->getOpcode();
+ ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
+ ? cast<LoadSDNode>(Op)->getAddressingMode()
+ : cast<StoreSDNode>(Op)->getAddressingMode();
+ ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
+ ? ARM_AM::add : ARM_AM::sub;
+ int Val;
+ if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits.
+ if (AddSub == ARM_AM::sub) Val *= -1;
+ Offset = CurDAG->getRegister(0, MVT::i32);
+ Opc = CurDAG->getTargetConstant(Val, SDLoc(Op), MVT::i32);
+ return true;
+ }
+
+ return false;
+}
+
+
+bool ARMDAGToDAGISel::SelectAddrMode2OffsetImm(SDNode *Op, SDValue N,
+ SDValue &Offset, SDValue &Opc) {
+ unsigned Opcode = Op->getOpcode();
+ ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
+ ? cast<LoadSDNode>(Op)->getAddressingMode()
+ : cast<StoreSDNode>(Op)->getAddressingMode();
+ ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
+ ? ARM_AM::add : ARM_AM::sub;
+ int Val;
+ if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits.
+ Offset = CurDAG->getRegister(0, MVT::i32);
+ Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, Val,
+ ARM_AM::no_shift),
+ SDLoc(Op), MVT::i32);
+ return true;
+ }
+
+ return false;
+}
+
+bool ARMDAGToDAGISel::SelectAddrOffsetNone(SDValue N, SDValue &Base) {
+ Base = N;
+ return true;
+}
+
+bool ARMDAGToDAGISel::SelectAddrMode3(SDValue N,
+ SDValue &Base, SDValue &Offset,
+ SDValue &Opc) {
+ if (N.getOpcode() == ISD::SUB) {
+ // X - C is canonicalize to X + -C, no need to handle it here.
+ Base = N.getOperand(0);
+ Offset = N.getOperand(1);
+ Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::sub, 0), SDLoc(N),
+ MVT::i32);
+ return true;
+ }
+
+ if (!CurDAG->isBaseWithConstantOffset(N)) {
+ Base = N;
+ if (N.getOpcode() == ISD::FrameIndex) {
+ int FI = cast<FrameIndexSDNode>(N)->getIndex();
+ Base = CurDAG->getTargetFrameIndex(
+ FI, TLI->getPointerTy(CurDAG->getDataLayout()));
+ }
+ Offset = CurDAG->getRegister(0, MVT::i32);
+ Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), SDLoc(N),
+ MVT::i32);
+ return true;
+ }
+
+ // If the RHS is +/- imm8, fold into addr mode.
+ int RHSC;
+ if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1,
+ -256 + 1, 256, RHSC)) { // 8 bits.
+ Base = N.getOperand(0);
+ if (Base.getOpcode() == ISD::FrameIndex) {
+ int FI = cast<FrameIndexSDNode>(Base)->getIndex();
+ Base = CurDAG->getTargetFrameIndex(
+ FI, TLI->getPointerTy(CurDAG->getDataLayout()));
+ }
+ Offset = CurDAG->getRegister(0, MVT::i32);
+
+ ARM_AM::AddrOpc AddSub = ARM_AM::add;
+ if (RHSC < 0) {
+ AddSub = ARM_AM::sub;
+ RHSC = -RHSC;
+ }
+ Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, RHSC), SDLoc(N),
+ MVT::i32);
+ return true;
+ }
+
+ Base = N.getOperand(0);
+ Offset = N.getOperand(1);
+ Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), SDLoc(N),
+ MVT::i32);
+ return true;
+}
+
+bool ARMDAGToDAGISel::SelectAddrMode3Offset(SDNode *Op, SDValue N,
+ SDValue &Offset, SDValue &Opc) {
+ unsigned Opcode = Op->getOpcode();
+ ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
+ ? cast<LoadSDNode>(Op)->getAddressingMode()
+ : cast<StoreSDNode>(Op)->getAddressingMode();
+ ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
+ ? ARM_AM::add : ARM_AM::sub;
+ int Val;
+ if (isScaledConstantInRange(N, /*Scale=*/1, 0, 256, Val)) { // 12 bits.
+ Offset = CurDAG->getRegister(0, MVT::i32);
+ Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, Val), SDLoc(Op),
+ MVT::i32);
+ return true;
+ }
+
+ Offset = N;
+ Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, 0), SDLoc(Op),
+ MVT::i32);
+ return true;
+}
+
+bool ARMDAGToDAGISel::SelectAddrMode5(SDValue N,
+ SDValue &Base, SDValue &Offset) {
+ if (!CurDAG->isBaseWithConstantOffset(N)) {
+ Base = N;
+ if (N.getOpcode() == ISD::FrameIndex) {
+ int FI = cast<FrameIndexSDNode>(N)->getIndex();
+ Base = CurDAG->getTargetFrameIndex(
+ FI, TLI->getPointerTy(CurDAG->getDataLayout()));
+ } else if (N.getOpcode() == ARMISD::Wrapper &&
+ N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress) {
+ Base = N.getOperand(0);
+ }
+ Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0),
+ SDLoc(N), MVT::i32);
+ return true;
+ }
+
+ // If the RHS is +/- imm8, fold into addr mode.
+ int RHSC;
+ if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/4,
+ -256 + 1, 256, RHSC)) {
+ Base = N.getOperand(0);
+ if (Base.getOpcode() == ISD::FrameIndex) {
+ int FI = cast<FrameIndexSDNode>(Base)->getIndex();
+ Base = CurDAG->getTargetFrameIndex(
+ FI, TLI->getPointerTy(CurDAG->getDataLayout()));
+ }
+
+ ARM_AM::AddrOpc AddSub = ARM_AM::add;
+ if (RHSC < 0) {
+ AddSub = ARM_AM::sub;
+ RHSC = -RHSC;
+ }
+ Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(AddSub, RHSC),
+ SDLoc(N), MVT::i32);
+ return true;
+ }
+
+ Base = N;
+ Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0),
+ SDLoc(N), MVT::i32);
+ return true;
+}
+
+bool ARMDAGToDAGISel::SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,
+ SDValue &Align) {
+ Addr = N;
+
+ unsigned Alignment = 0;
+
+ MemSDNode *MemN = cast<MemSDNode>(Parent);
+
+ if (isa<LSBaseSDNode>(MemN) ||
+ ((MemN->getOpcode() == ARMISD::VST1_UPD ||
+ MemN->getOpcode() == ARMISD::VLD1_UPD) &&
+ MemN->getConstantOperandVal(MemN->getNumOperands() - 1) == 1)) {
+ // This case occurs only for VLD1-lane/dup and VST1-lane instructions.
+ // The maximum alignment is equal to the memory size being referenced.
+ unsigned MMOAlign = MemN->getAlignment();
+ unsigned MemSize = MemN->getMemoryVT().getSizeInBits() / 8;
+ if (MMOAlign >= MemSize && MemSize > 1)
+ Alignment = MemSize;
+ } else {
+ // All other uses of addrmode6 are for intrinsics. For now just record
+ // the raw alignment value; it will be refined later based on the legal
+ // alignment operands for the intrinsic.
+ Alignment = MemN->getAlignment();
+ }
+
+ Align = CurDAG->getTargetConstant(Alignment, SDLoc(N), MVT::i32);
+ return true;
+}
+
+bool ARMDAGToDAGISel::SelectAddrMode6Offset(SDNode *Op, SDValue N,
+ SDValue &Offset) {
+ LSBaseSDNode *LdSt = cast<LSBaseSDNode>(Op);
+ ISD::MemIndexedMode AM = LdSt->getAddressingMode();
+ if (AM != ISD::POST_INC)
+ return false;
+ Offset = N;
+ if (ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N)) {
+ if (NC->getZExtValue() * 8 == LdSt->getMemoryVT().getSizeInBits())
+ Offset = CurDAG->getRegister(0, MVT::i32);
+ }
+ return true;
+}
+
+bool ARMDAGToDAGISel::SelectAddrModePC(SDValue N,
+ SDValue &Offset, SDValue &Label) {
+ if (N.getOpcode() == ARMISD::PIC_ADD && N.hasOneUse()) {
+ Offset = N.getOperand(0);
+ SDValue N1 = N.getOperand(1);
+ Label = CurDAG->getTargetConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
+ SDLoc(N), MVT::i32);
+ return true;
+ }
+
+ return false;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Thumb Addressing Modes
+//===----------------------------------------------------------------------===//
+
+bool ARMDAGToDAGISel::SelectThumbAddrModeRR(SDValue N,
+ SDValue &Base, SDValue &Offset){
+ if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N)) {
+ ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N);
+ if (!NC || !NC->isNullValue())
+ return false;
+
+ Base = Offset = N;
+ return true;
+ }
+
+ Base = N.getOperand(0);
+ Offset = N.getOperand(1);
+ return true;
+}
+
+bool
+ARMDAGToDAGISel::SelectThumbAddrModeImm5S(SDValue N, unsigned Scale,
+ SDValue &Base, SDValue &OffImm) {
+ if (!CurDAG->isBaseWithConstantOffset(N)) {
+ if (N.getOpcode() == ISD::ADD) {
+ return false; // We want to select register offset instead
+ } else if (N.getOpcode() == ARMISD::Wrapper &&
+ N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress) {
+ Base = N.getOperand(0);
+ } else {
+ Base = N;
+ }
+
+ OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
+ return true;
+ }
+
+ // If the RHS is + imm5 * scale, fold into addr mode.
+ int RHSC;
+ if (isScaledConstantInRange(N.getOperand(1), Scale, 0, 32, RHSC)) {
+ Base = N.getOperand(0);
+ OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
+ return true;
+ }
+
+ // Offset is too large, so use register offset instead.
+ return false;
+}
+
+bool
+ARMDAGToDAGISel::SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base,
+ SDValue &OffImm) {
+ return SelectThumbAddrModeImm5S(N, 4, Base, OffImm);
+}
+
+bool
+ARMDAGToDAGISel::SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base,
+ SDValue &OffImm) {
+ return SelectThumbAddrModeImm5S(N, 2, Base, OffImm);
+}
+
+bool
+ARMDAGToDAGISel::SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base,
+ SDValue &OffImm) {
+ return SelectThumbAddrModeImm5S(N, 1, Base, OffImm);
+}
+
+bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N,
+ SDValue &Base, SDValue &OffImm) {
+ if (N.getOpcode() == ISD::FrameIndex) {
+ int FI = cast<FrameIndexSDNode>(N)->getIndex();
+ // Only multiples of 4 are allowed for the offset, so the frame object
+ // alignment must be at least 4.
+ MachineFrameInfo *MFI = MF->getFrameInfo();
+ if (MFI->getObjectAlignment(FI) < 4)
+ MFI->setObjectAlignment(FI, 4);
+ Base = CurDAG->getTargetFrameIndex(
+ FI, TLI->getPointerTy(CurDAG->getDataLayout()));
+ OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
+ return true;
+ }
+
+ if (!CurDAG->isBaseWithConstantOffset(N))
+ return false;
+
+ RegisterSDNode *LHSR = dyn_cast<RegisterSDNode>(N.getOperand(0));
+ if (N.getOperand(0).getOpcode() == ISD::FrameIndex ||
+ (LHSR && LHSR->getReg() == ARM::SP)) {
+ // If the RHS is + imm8 * scale, fold into addr mode.
+ int RHSC;
+ if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/4, 0, 256, RHSC)) {
+ Base = N.getOperand(0);
+ if (Base.getOpcode() == ISD::FrameIndex) {
+ int FI = cast<FrameIndexSDNode>(Base)->getIndex();
+ // For LHS+RHS to result in an offset that's a multiple of 4 the object
+ // indexed by the LHS must be 4-byte aligned.
+ MachineFrameInfo *MFI = MF->getFrameInfo();
+ if (MFI->getObjectAlignment(FI) < 4)
+ MFI->setObjectAlignment(FI, 4);
+ Base = CurDAG->getTargetFrameIndex(
+ FI, TLI->getPointerTy(CurDAG->getDataLayout()));
+ }
+ OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
+ return true;
+ }
+ }
+
+ return false;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Thumb 2 Addressing Modes
+//===----------------------------------------------------------------------===//
+
+
+bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N,
+ SDValue &Base, SDValue &OffImm) {
+ // Match simple R + imm12 operands.
+
+ // Base only.
+ if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
+ !CurDAG->isBaseWithConstantOffset(N)) {
+ if (N.getOpcode() == ISD::FrameIndex) {
+ // Match frame index.
+ int FI = cast<FrameIndexSDNode>(N)->getIndex();
+ Base = CurDAG->getTargetFrameIndex(
+ FI, TLI->getPointerTy(CurDAG->getDataLayout()));
+ OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
+ return true;
+ }
+
+ if (N.getOpcode() == ARMISD::Wrapper &&
+ N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress) {
+ Base = N.getOperand(0);
+ if (Base.getOpcode() == ISD::TargetConstantPool)
+ return false; // We want to select t2LDRpci instead.
+ } else
+ Base = N;
+ OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
+ return true;
+ }
+
+ if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
+ if (SelectT2AddrModeImm8(N, Base, OffImm))
+ // Let t2LDRi8 handle (R - imm8).
+ return false;
+
+ int RHSC = (int)RHS->getZExtValue();
+ if (N.getOpcode() == ISD::SUB)
+ RHSC = -RHSC;
+
+ if (RHSC >= 0 && RHSC < 0x1000) { // 12 bits (unsigned)
+ Base = N.getOperand(0);
+ if (Base.getOpcode() == ISD::FrameIndex) {
+ int FI = cast<FrameIndexSDNode>(Base)->getIndex();
+ Base = CurDAG->getTargetFrameIndex(
+ FI, TLI->getPointerTy(CurDAG->getDataLayout()));
+ }
+ OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
+ return true;
+ }
+ }
+
+ // Base only.
+ Base = N;
+ OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
+ return true;
+}
+
+bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N,
+ SDValue &Base, SDValue &OffImm) {
+ // Match simple R - imm8 operands.
+ if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
+ !CurDAG->isBaseWithConstantOffset(N))
+ return false;
+
+ if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
+ int RHSC = (int)RHS->getSExtValue();
+ if (N.getOpcode() == ISD::SUB)
+ RHSC = -RHSC;
+
+ if ((RHSC >= -255) && (RHSC < 0)) { // 8 bits (always negative)
+ Base = N.getOperand(0);
+ if (Base.getOpcode() == ISD::FrameIndex) {
+ int FI = cast<FrameIndexSDNode>(Base)->getIndex();
+ Base = CurDAG->getTargetFrameIndex(
+ FI, TLI->getPointerTy(CurDAG->getDataLayout()));
+ }
+ OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
+ return true;
+ }
+ }
+
+ return false;
+}
+
+bool ARMDAGToDAGISel::SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N,
+ SDValue &OffImm){
+ unsigned Opcode = Op->getOpcode();
+ ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
+ ? cast<LoadSDNode>(Op)->getAddressingMode()
+ : cast<StoreSDNode>(Op)->getAddressingMode();
+ int RHSC;
+ if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x100, RHSC)) { // 8 bits.
+ OffImm = ((AM == ISD::PRE_INC) || (AM == ISD::POST_INC))
+ ? CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32)
+ : CurDAG->getTargetConstant(-RHSC, SDLoc(N), MVT::i32);
+ return true;
+ }
+
+ return false;
+}
+
+bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDValue N,
+ SDValue &Base,
+ SDValue &OffReg, SDValue &ShImm) {
+ // (R - imm8) should be handled by t2LDRi8. The rest are handled by t2LDRi12.
+ if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N))
+ return false;
+
+ // Leave (R + imm12) for t2LDRi12, (R - imm8) for t2LDRi8.
+ if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
+ int RHSC = (int)RHS->getZExtValue();
+ if (RHSC >= 0 && RHSC < 0x1000) // 12 bits (unsigned)
+ return false;
+ else if (RHSC < 0 && RHSC >= -255) // 8 bits
+ return false;
+ }
+
+ // Look for (R + R) or (R + (R << [1,2,3])).
+ unsigned ShAmt = 0;
+ Base = N.getOperand(0);
+ OffReg = N.getOperand(1);
+
+ // Swap if it is ((R << c) + R).
+ ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(OffReg.getOpcode());
+ if (ShOpcVal != ARM_AM::lsl) {
+ ShOpcVal = ARM_AM::getShiftOpcForNode(Base.getOpcode());
+ if (ShOpcVal == ARM_AM::lsl)
+ std::swap(Base, OffReg);
+ }
+
+ if (ShOpcVal == ARM_AM::lsl) {
+ // Check to see if the RHS of the shift is a constant, if not, we can't fold
+ // it.
+ if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(OffReg.getOperand(1))) {
+ ShAmt = Sh->getZExtValue();
+ if (ShAmt < 4 && isShifterOpProfitable(OffReg, ShOpcVal, ShAmt))
+ OffReg = OffReg.getOperand(0);
+ else {
+ ShAmt = 0;
+ }
+ }
+ }
+
+ // If OffReg is a multiply-by-constant and it's profitable to extract a shift
+ // and use it in a shifted operand do so.
+ if (OffReg.getOpcode() == ISD::MUL) {
+ unsigned PowerOfTwo = 0;
+ SDValue NewMulConst;
+ if (canExtractShiftFromMul(OffReg, 3, PowerOfTwo, NewMulConst)) {
+ replaceDAGValue(OffReg.getOperand(1), NewMulConst);
+ ShAmt = PowerOfTwo;
+ }
+ }
+
+ ShImm = CurDAG->getTargetConstant(ShAmt, SDLoc(N), MVT::i32);
+
+ return true;
+}
+
+bool ARMDAGToDAGISel::SelectT2AddrModeExclusive(SDValue N, SDValue &Base,
+ SDValue &OffImm) {
+ // This *must* succeed since it's used for the irreplaceable ldrex and strex
+ // instructions.
+ Base = N;
+ OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
+
+ if (N.getOpcode() != ISD::ADD || !CurDAG->isBaseWithConstantOffset(N))
+ return true;
+
+ ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
+ if (!RHS)
+ return true;
+
+ uint32_t RHSC = (int)RHS->getZExtValue();
+ if (RHSC > 1020 || RHSC % 4 != 0)
+ return true;
+
+ Base = N.getOperand(0);
+ if (Base.getOpcode() == ISD::FrameIndex) {
+ int FI = cast<FrameIndexSDNode>(Base)->getIndex();
+ Base = CurDAG->getTargetFrameIndex(
+ FI, TLI->getPointerTy(CurDAG->getDataLayout()));
+ }
+
+ OffImm = CurDAG->getTargetConstant(RHSC/4, SDLoc(N), MVT::i32);
+ return true;
+}
+
+//===--------------------------------------------------------------------===//
+
+/// getAL - Returns a ARMCC::AL immediate node.
+static inline SDValue getAL(SelectionDAG *CurDAG, SDLoc dl) {
+ return CurDAG->getTargetConstant((uint64_t)ARMCC::AL, dl, MVT::i32);
+}
+
+SDNode *ARMDAGToDAGISel::SelectARMIndexedLoad(SDNode *N) {
+ LoadSDNode *LD = cast<LoadSDNode>(N);
+ ISD::MemIndexedMode AM = LD->getAddressingMode();
+ if (AM == ISD::UNINDEXED)
+ return nullptr;
+
+ EVT LoadedVT = LD->getMemoryVT();
+ SDValue Offset, AMOpc;
+ bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
+ unsigned Opcode = 0;
+ bool Match = false;
+ if (LoadedVT == MVT::i32 && isPre &&
+ SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) {
+ Opcode = ARM::LDR_PRE_IMM;
+ Match = true;
+ } else if (LoadedVT == MVT::i32 && !isPre &&
+ SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) {
+ Opcode = ARM::LDR_POST_IMM;
+ Match = true;
+ } else if (LoadedVT == MVT::i32 &&
+ SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) {
+ Opcode = isPre ? ARM::LDR_PRE_REG : ARM::LDR_POST_REG;
+ Match = true;
+
+ } else if (LoadedVT == MVT::i16 &&
+ SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) {
+ Match = true;
+ Opcode = (LD->getExtensionType() == ISD::SEXTLOAD)
+ ? (isPre ? ARM::LDRSH_PRE : ARM::LDRSH_POST)
+ : (isPre ? ARM::LDRH_PRE : ARM::LDRH_POST);
+ } else if (LoadedVT == MVT::i8 || LoadedVT == MVT::i1) {
+ if (LD->getExtensionType() == ISD::SEXTLOAD) {
+ if (SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) {
+ Match = true;
+ Opcode = isPre ? ARM::LDRSB_PRE : ARM::LDRSB_POST;
+ }
+ } else {
+ if (isPre &&
+ SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) {
+ Match = true;
+ Opcode = ARM::LDRB_PRE_IMM;
+ } else if (!isPre &&
+ SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) {
+ Match = true;
+ Opcode = ARM::LDRB_POST_IMM;
+ } else if (SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) {
+ Match = true;
+ Opcode = isPre ? ARM::LDRB_PRE_REG : ARM::LDRB_POST_REG;
+ }
+ }
+ }
+
+ if (Match) {
+ if (Opcode == ARM::LDR_PRE_IMM || Opcode == ARM::LDRB_PRE_IMM) {
+ SDValue Chain = LD->getChain();
+ SDValue Base = LD->getBasePtr();
+ SDValue Ops[]= { Base, AMOpc, getAL(CurDAG, SDLoc(N)),
+ CurDAG->getRegister(0, MVT::i32), Chain };
+ return CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32,
+ MVT::i32, MVT::Other, Ops);
+ } else {
+ SDValue Chain = LD->getChain();
+ SDValue Base = LD->getBasePtr();
+ SDValue Ops[]= { Base, Offset, AMOpc, getAL(CurDAG, SDLoc(N)),
+ CurDAG->getRegister(0, MVT::i32), Chain };
+ return CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32,
+ MVT::i32, MVT::Other, Ops);
+ }
+ }
+
+ return nullptr;
+}
+
+SDNode *ARMDAGToDAGISel::SelectT2IndexedLoad(SDNode *N) {
+ LoadSDNode *LD = cast<LoadSDNode>(N);
+ ISD::MemIndexedMode AM = LD->getAddressingMode();
+ if (AM == ISD::UNINDEXED)
+ return nullptr;
+
+ EVT LoadedVT = LD->getMemoryVT();
+ bool isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD;
+ SDValue Offset;
+ bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
+ unsigned Opcode = 0;
+ bool Match = false;
+ if (SelectT2AddrModeImm8Offset(N, LD->getOffset(), Offset)) {
+ switch (LoadedVT.getSimpleVT().SimpleTy) {
+ case MVT::i32:
+ Opcode = isPre ? ARM::t2LDR_PRE : ARM::t2LDR_POST;
+ break;
+ case MVT::i16:
+ if (isSExtLd)
+ Opcode = isPre ? ARM::t2LDRSH_PRE : ARM::t2LDRSH_POST;
+ else
+ Opcode = isPre ? ARM::t2LDRH_PRE : ARM::t2LDRH_POST;
+ break;
+ case MVT::i8:
+ case MVT::i1:
+ if (isSExtLd)
+ Opcode = isPre ? ARM::t2LDRSB_PRE : ARM::t2LDRSB_POST;
+ else
+ Opcode = isPre ? ARM::t2LDRB_PRE : ARM::t2LDRB_POST;
+ break;
+ default:
+ return nullptr;
+ }
+ Match = true;
+ }
+
+ if (Match) {
+ SDValue Chain = LD->getChain();
+ SDValue Base = LD->getBasePtr();
+ SDValue Ops[]= { Base, Offset, getAL(CurDAG, SDLoc(N)),
+ CurDAG->getRegister(0, MVT::i32), Chain };
+ return CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32,
+ MVT::Other, Ops);
+ }
+
+ return nullptr;
+}
+
+/// \brief Form a GPRPair pseudo register from a pair of GPR regs.
+SDNode *ARMDAGToDAGISel::createGPRPairNode(EVT VT, SDValue V0, SDValue V1) {
+ SDLoc dl(V0.getNode());
+ SDValue RegClass =
+ CurDAG->getTargetConstant(ARM::GPRPairRegClassID, dl, MVT::i32);
+ SDValue SubReg0 = CurDAG->getTargetConstant(ARM::gsub_0, dl, MVT::i32);
+ SDValue SubReg1 = CurDAG->getTargetConstant(ARM::gsub_1, dl, MVT::i32);
+ const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
+ return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
+}
+
+/// \brief Form a D register from a pair of S registers.
+SDNode *ARMDAGToDAGISel::createSRegPairNode(EVT VT, SDValue V0, SDValue V1) {
+ SDLoc dl(V0.getNode());
+ SDValue RegClass =
+ CurDAG->getTargetConstant(ARM::DPR_VFP2RegClassID, dl, MVT::i32);
+ SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, dl, MVT::i32);
+ SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, dl, MVT::i32);
+ const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
+ return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
+}
+
+/// \brief Form a quad register from a pair of D registers.
+SDNode *ARMDAGToDAGISel::createDRegPairNode(EVT VT, SDValue V0, SDValue V1) {
+ SDLoc dl(V0.getNode());
+ SDValue RegClass = CurDAG->getTargetConstant(ARM::QPRRegClassID, dl,
+ MVT::i32);
+ SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, dl, MVT::i32);
+ SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, dl, MVT::i32);
+ const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
+ return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
+}
+
+/// \brief Form 4 consecutive D registers from a pair of Q registers.
+SDNode *ARMDAGToDAGISel::createQRegPairNode(EVT VT, SDValue V0, SDValue V1) {
+ SDLoc dl(V0.getNode());
+ SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, dl,
+ MVT::i32);
+ SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, dl, MVT::i32);
+ SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, dl, MVT::i32);
+ const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
+ return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
+}
+
+/// \brief Form 4 consecutive S registers.
+SDNode *ARMDAGToDAGISel::createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1,
+ SDValue V2, SDValue V3) {
+ SDLoc dl(V0.getNode());
+ SDValue RegClass =
+ CurDAG->getTargetConstant(ARM::QPR_VFP2RegClassID, dl, MVT::i32);
+ SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, dl, MVT::i32);
+ SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, dl, MVT::i32);
+ SDValue SubReg2 = CurDAG->getTargetConstant(ARM::ssub_2, dl, MVT::i32);
+ SDValue SubReg3 = CurDAG->getTargetConstant(ARM::ssub_3, dl, MVT::i32);
+ const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
+ V2, SubReg2, V3, SubReg3 };
+ return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
+}
+
+/// \brief Form 4 consecutive D registers.
+SDNode *ARMDAGToDAGISel::createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1,
+ SDValue V2, SDValue V3) {
+ SDLoc dl(V0.getNode());
+ SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, dl,
+ MVT::i32);
+ SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, dl, MVT::i32);
+ SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, dl, MVT::i32);
+ SDValue SubReg2 = CurDAG->getTargetConstant(ARM::dsub_2, dl, MVT::i32);
+ SDValue SubReg3 = CurDAG->getTargetConstant(ARM::dsub_3, dl, MVT::i32);
+ const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
+ V2, SubReg2, V3, SubReg3 };
+ return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
+}
+
+/// \brief Form 4 consecutive Q registers.
+SDNode *ARMDAGToDAGISel::createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1,
+ SDValue V2, SDValue V3) {
+ SDLoc dl(V0.getNode());
+ SDValue RegClass = CurDAG->getTargetConstant(ARM::QQQQPRRegClassID, dl,
+ MVT::i32);
+ SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, dl, MVT::i32);
+ SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, dl, MVT::i32);
+ SDValue SubReg2 = CurDAG->getTargetConstant(ARM::qsub_2, dl, MVT::i32);
+ SDValue SubReg3 = CurDAG->getTargetConstant(ARM::qsub_3, dl, MVT::i32);
+ const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
+ V2, SubReg2, V3, SubReg3 };
+ return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
+}
+
+/// GetVLDSTAlign - Get the alignment (in bytes) for the alignment operand
+/// of a NEON VLD or VST instruction. The supported values depend on the
+/// number of registers being loaded.
+SDValue ARMDAGToDAGISel::GetVLDSTAlign(SDValue Align, SDLoc dl,
+ unsigned NumVecs, bool is64BitVector) {
+ unsigned NumRegs = NumVecs;
+ if (!is64BitVector && NumVecs < 3)
+ NumRegs *= 2;
+
+ unsigned Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
+ if (Alignment >= 32 && NumRegs == 4)
+ Alignment = 32;
+ else if (Alignment >= 16 && (NumRegs == 2 || NumRegs == 4))
+ Alignment = 16;
+ else if (Alignment >= 8)
+ Alignment = 8;
+ else
+ Alignment = 0;
+
+ return CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
+}
+
+static bool isVLDfixed(unsigned Opc)
+{
+ switch (Opc) {
+ default: return false;
+ case ARM::VLD1d8wb_fixed : return true;
+ case ARM::VLD1d16wb_fixed : return true;
+ case ARM::VLD1d64Qwb_fixed : return true;
+ case ARM::VLD1d32wb_fixed : return true;
+ case ARM::VLD1d64wb_fixed : return true;
+ case ARM::VLD1d64TPseudoWB_fixed : return true;
+ case ARM::VLD1d64QPseudoWB_fixed : return true;
+ case ARM::VLD1q8wb_fixed : return true;
+ case ARM::VLD1q16wb_fixed : return true;
+ case ARM::VLD1q32wb_fixed : return true;
+ case ARM::VLD1q64wb_fixed : return true;
+ case ARM::VLD2d8wb_fixed : return true;
+ case ARM::VLD2d16wb_fixed : return true;
+ case ARM::VLD2d32wb_fixed : return true;
+ case ARM::VLD2q8PseudoWB_fixed : return true;
+ case ARM::VLD2q16PseudoWB_fixed : return true;
+ case ARM::VLD2q32PseudoWB_fixed : return true;
+ case ARM::VLD2DUPd8wb_fixed : return true;
+ case ARM::VLD2DUPd16wb_fixed : return true;
+ case ARM::VLD2DUPd32wb_fixed : return true;
+ }
+}
+
+static bool isVSTfixed(unsigned Opc)
+{
+ switch (Opc) {
+ default: return false;
+ case ARM::VST1d8wb_fixed : return true;
+ case ARM::VST1d16wb_fixed : return true;
+ case ARM::VST1d32wb_fixed : return true;
+ case ARM::VST1d64wb_fixed : return true;
+ case ARM::VST1q8wb_fixed : return true;
+ case ARM::VST1q16wb_fixed : return true;
+ case ARM::VST1q32wb_fixed : return true;
+ case ARM::VST1q64wb_fixed : return true;
+ case ARM::VST1d64TPseudoWB_fixed : return true;
+ case ARM::VST1d64QPseudoWB_fixed : return true;
+ case ARM::VST2d8wb_fixed : return true;
+ case ARM::VST2d16wb_fixed : return true;
+ case ARM::VST2d32wb_fixed : return true;
+ case ARM::VST2q8PseudoWB_fixed : return true;
+ case ARM::VST2q16PseudoWB_fixed : return true;
+ case ARM::VST2q32PseudoWB_fixed : return true;
+ }
+}
+
+// Get the register stride update opcode of a VLD/VST instruction that
+// is otherwise equivalent to the given fixed stride updating instruction.
+static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) {
+ assert((isVLDfixed(Opc) || isVSTfixed(Opc))
+ && "Incorrect fixed stride updating instruction.");
+ switch (Opc) {
+ default: break;
+ case ARM::VLD1d8wb_fixed: return ARM::VLD1d8wb_register;
+ case ARM::VLD1d16wb_fixed: return ARM::VLD1d16wb_register;
+ case ARM::VLD1d32wb_fixed: return ARM::VLD1d32wb_register;
+ case ARM::VLD1d64wb_fixed: return ARM::VLD1d64wb_register;
+ case ARM::VLD1q8wb_fixed: return ARM::VLD1q8wb_register;
+ case ARM::VLD1q16wb_fixed: return ARM::VLD1q16wb_register;
+ case ARM::VLD1q32wb_fixed: return ARM::VLD1q32wb_register;
+ case ARM::VLD1q64wb_fixed: return ARM::VLD1q64wb_register;
+ case ARM::VLD1d64Twb_fixed: return ARM::VLD1d64Twb_register;
+ case ARM::VLD1d64Qwb_fixed: return ARM::VLD1d64Qwb_register;
+ case ARM::VLD1d64TPseudoWB_fixed: return ARM::VLD1d64TPseudoWB_register;
+ case ARM::VLD1d64QPseudoWB_fixed: return ARM::VLD1d64QPseudoWB_register;
+
+ case ARM::VST1d8wb_fixed: return ARM::VST1d8wb_register;
+ case ARM::VST1d16wb_fixed: return ARM::VST1d16wb_register;
+ case ARM::VST1d32wb_fixed: return ARM::VST1d32wb_register;
+ case ARM::VST1d64wb_fixed: return ARM::VST1d64wb_register;
+ case ARM::VST1q8wb_fixed: return ARM::VST1q8wb_register;
+ case ARM::VST1q16wb_fixed: return ARM::VST1q16wb_register;
+ case ARM::VST1q32wb_fixed: return ARM::VST1q32wb_register;
+ case ARM::VST1q64wb_fixed: return ARM::VST1q64wb_register;
+ case ARM::VST1d64TPseudoWB_fixed: return ARM::VST1d64TPseudoWB_register;
+ case ARM::VST1d64QPseudoWB_fixed: return ARM::VST1d64QPseudoWB_register;
+
+ case ARM::VLD2d8wb_fixed: return ARM::VLD2d8wb_register;
+ case ARM::VLD2d16wb_fixed: return ARM::VLD2d16wb_register;
+ case ARM::VLD2d32wb_fixed: return ARM::VLD2d32wb_register;
+ case ARM::VLD2q8PseudoWB_fixed: return ARM::VLD2q8PseudoWB_register;
+ case ARM::VLD2q16PseudoWB_fixed: return ARM::VLD2q16PseudoWB_register;
+ case ARM::VLD2q32PseudoWB_fixed: return ARM::VLD2q32PseudoWB_register;
+
+ case ARM::VST2d8wb_fixed: return ARM::VST2d8wb_register;
+ case ARM::VST2d16wb_fixed: return ARM::VST2d16wb_register;
+ case ARM::VST2d32wb_fixed: return ARM::VST2d32wb_register;
+ case ARM::VST2q8PseudoWB_fixed: return ARM::VST2q8PseudoWB_register;
+ case ARM::VST2q16PseudoWB_fixed: return ARM::VST2q16PseudoWB_register;
+ case ARM::VST2q32PseudoWB_fixed: return ARM::VST2q32PseudoWB_register;
+
+ case ARM::VLD2DUPd8wb_fixed: return ARM::VLD2DUPd8wb_register;
+ case ARM::VLD2DUPd16wb_fixed: return ARM::VLD2DUPd16wb_register;
+ case ARM::VLD2DUPd32wb_fixed: return ARM::VLD2DUPd32wb_register;
+ }
+ return Opc; // If not one we handle, return it unchanged.
+}
+
+SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
+ const uint16_t *DOpcodes,
+ const uint16_t *QOpcodes0,
+ const uint16_t *QOpcodes1) {
+ assert(NumVecs >= 1 && NumVecs <= 4 && "VLD NumVecs out-of-range");
+ SDLoc dl(N);
+
+ SDValue MemAddr, Align;
+ unsigned AddrOpIdx = isUpdating ? 1 : 2;
+ if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
+ return nullptr;
+
+ SDValue Chain = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+ bool is64BitVector = VT.is64BitVector();
+ Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector);
+
+ unsigned OpcodeIndex;
+ switch (VT.getSimpleVT().SimpleTy) {
+ default: llvm_unreachable("unhandled vld type");
+ // Double-register operations:
+ case MVT::v8i8: OpcodeIndex = 0; break;
+ case MVT::v4i16: OpcodeIndex = 1; break;
+ case MVT::v2f32:
+ case MVT::v2i32: OpcodeIndex = 2; break;
+ case MVT::v1i64: OpcodeIndex = 3; break;
+ // Quad-register operations:
+ case MVT::v16i8: OpcodeIndex = 0; break;
+ case MVT::v8i16: OpcodeIndex = 1; break;
+ case MVT::v4f32:
+ case MVT::v4i32: OpcodeIndex = 2; break;
+ case MVT::v2f64:
+ case MVT::v2i64: OpcodeIndex = 3;
+ assert(NumVecs == 1 && "v2i64 type only supported for VLD1");
+ break;
+ }
+
+ EVT ResTy;
+ if (NumVecs == 1)
+ ResTy = VT;
+ else {
+ unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
+ if (!is64BitVector)
+ ResTyElts *= 2;
+ ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts);
+ }
+ std::vector<EVT> ResTys;
+ ResTys.push_back(ResTy);
+ if (isUpdating)
+ ResTys.push_back(MVT::i32);
+ ResTys.push_back(MVT::Other);
+
+ SDValue Pred = getAL(CurDAG, dl);
+ SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
+ SDNode *VLd;
+ SmallVector<SDValue, 7> Ops;
+
+ // Double registers and VLD1/VLD2 quad registers are directly supported.
+ if (is64BitVector || NumVecs <= 2) {
+ unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
+ QOpcodes0[OpcodeIndex]);
+ Ops.push_back(MemAddr);
+ Ops.push_back(Align);
+ if (isUpdating) {
+ SDValue Inc = N->getOperand(AddrOpIdx + 1);
+ // FIXME: VLD1/VLD2 fixed increment doesn't need Reg0. Remove the reg0
+ // case entirely when the rest are updated to that form, too.
+ if ((NumVecs <= 2) && !isa<ConstantSDNode>(Inc.getNode()))
+ Opc = getVLDSTRegisterUpdateOpcode(Opc);
+ // FIXME: We use a VLD1 for v1i64 even if the pseudo says vld2/3/4, so
+ // check for that explicitly too. Horribly hacky, but temporary.
+ if ((NumVecs > 2 && !isVLDfixed(Opc)) ||
+ !isa<ConstantSDNode>(Inc.getNode()))
+ Ops.push_back(isa<ConstantSDNode>(Inc.getNode()) ? Reg0 : Inc);
+ }
+ Ops.push_back(Pred);
+ Ops.push_back(Reg0);
+ Ops.push_back(Chain);
+ VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
+
+ } else {
+ // Otherwise, quad registers are loaded with two separate instructions,
+ // where one loads the even registers and the other loads the odd registers.
+ EVT AddrTy = MemAddr.getValueType();
+
+ // Load the even subregs. This is always an updating load, so that it
+ // provides the address to the second load for the odd subregs.
+ SDValue ImplDef =
+ SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy), 0);
+ const SDValue OpsA[] = { MemAddr, Align, Reg0, ImplDef, Pred, Reg0, Chain };
+ SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl,
+ ResTy, AddrTy, MVT::Other, OpsA);
+ Chain = SDValue(VLdA, 2);
+
+ // Load the odd subregs.
+ Ops.push_back(SDValue(VLdA, 1));
+ Ops.push_back(Align);
+ if (isUpdating) {
+ SDValue Inc = N->getOperand(AddrOpIdx + 1);
+ assert(isa<ConstantSDNode>(Inc.getNode()) &&
+ "only constant post-increment update allowed for VLD3/4");
+ (void)Inc;
+ Ops.push_back(Reg0);
+ }
+ Ops.push_back(SDValue(VLdA, 0));
+ Ops.push_back(Pred);
+ Ops.push_back(Reg0);
+ Ops.push_back(Chain);
+ VLd = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, Ops);
+ }
+
+ // Transfer memoperands.
+ MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+ MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
+ cast<MachineSDNode>(VLd)->setMemRefs(MemOp, MemOp + 1);
+
+ if (NumVecs == 1)
+ return VLd;
+
+ // Extract out the subregisters.
+ SDValue SuperReg = SDValue(VLd, 0);
+ assert(ARM::dsub_7 == ARM::dsub_0+7 &&
+ ARM::qsub_3 == ARM::qsub_0+3 && "Unexpected subreg numbering");
+ unsigned Sub0 = (is64BitVector ? ARM::dsub_0 : ARM::qsub_0);
+ for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
+ ReplaceUses(SDValue(N, Vec),
+ CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg));
+ ReplaceUses(SDValue(N, NumVecs), SDValue(VLd, 1));
+ if (isUpdating)
+ ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLd, 2));
+ return nullptr;
+}
+
+SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
+ const uint16_t *DOpcodes,
+ const uint16_t *QOpcodes0,
+ const uint16_t *QOpcodes1) {
+ assert(NumVecs >= 1 && NumVecs <= 4 && "VST NumVecs out-of-range");
+ SDLoc dl(N);
+
+ SDValue MemAddr, Align;
+ unsigned AddrOpIdx = isUpdating ? 1 : 2;
+ unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1)
+ if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
+ return nullptr;
+
+ MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+ MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
+
+ SDValue Chain = N->getOperand(0);
+ EVT VT = N->getOperand(Vec0Idx).getValueType();
+ bool is64BitVector = VT.is64BitVector();
+ Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector);
+
+ unsigned OpcodeIndex;
+ switch (VT.getSimpleVT().SimpleTy) {
+ default: llvm_unreachable("unhandled vst type");
+ // Double-register operations:
+ case MVT::v8i8: OpcodeIndex = 0; break;
+ case MVT::v4i16: OpcodeIndex = 1; break;
+ case MVT::v2f32:
+ case MVT::v2i32: OpcodeIndex = 2; break;
+ case MVT::v1i64: OpcodeIndex = 3; break;
+ // Quad-register operations:
+ case MVT::v16i8: OpcodeIndex = 0; break;
+ case MVT::v8i16: OpcodeIndex = 1; break;
+ case MVT::v4f32:
+ case MVT::v4i32: OpcodeIndex = 2; break;
+ case MVT::v2f64:
+ case MVT::v2i64: OpcodeIndex = 3;
+ assert(NumVecs == 1 && "v2i64 type only supported for VST1");
+ break;
+ }
+
+ std::vector<EVT> ResTys;
+ if (isUpdating)
+ ResTys.push_back(MVT::i32);
+ ResTys.push_back(MVT::Other);
+
+ SDValue Pred = getAL(CurDAG, dl);
+ SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
+ SmallVector<SDValue, 7> Ops;
+
+ // Double registers and VST1/VST2 quad registers are directly supported.
+ if (is64BitVector || NumVecs <= 2) {
+ SDValue SrcReg;
+ if (NumVecs == 1) {
+ SrcReg = N->getOperand(Vec0Idx);
+ } else if (is64BitVector) {
+ // Form a REG_SEQUENCE to force register allocation.
+ SDValue V0 = N->getOperand(Vec0Idx + 0);
+ SDValue V1 = N->getOperand(Vec0Idx + 1);
+ if (NumVecs == 2)
+ SrcReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0);
+ else {
+ SDValue V2 = N->getOperand(Vec0Idx + 2);
+ // If it's a vst3, form a quad D-register and leave the last part as
+ // an undef.
+ SDValue V3 = (NumVecs == 3)
+ ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0)
+ : N->getOperand(Vec0Idx + 3);
+ SrcReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0);
+ }
+ } else {
+ // Form a QQ register.
+ SDValue Q0 = N->getOperand(Vec0Idx);
+ SDValue Q1 = N->getOperand(Vec0Idx + 1);
+ SrcReg = SDValue(createQRegPairNode(MVT::v4i64, Q0, Q1), 0);
+ }
+
+ unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
+ QOpcodes0[OpcodeIndex]);
+ Ops.push_back(MemAddr);
+ Ops.push_back(Align);
+ if (isUpdating) {
+ SDValue Inc = N->getOperand(AddrOpIdx + 1);
+ // FIXME: VST1/VST2 fixed increment doesn't need Reg0. Remove the reg0
+ // case entirely when the rest are updated to that form, too.
+ if (NumVecs <= 2 && !isa<ConstantSDNode>(Inc.getNode()))
+ Opc = getVLDSTRegisterUpdateOpcode(Opc);
+ // FIXME: We use a VST1 for v1i64 even if the pseudo says vld2/3/4, so
+ // check for that explicitly too. Horribly hacky, but temporary.
+ if (!isa<ConstantSDNode>(Inc.getNode()))
+ Ops.push_back(Inc);
+ else if (NumVecs > 2 && !isVSTfixed(Opc))
+ Ops.push_back(Reg0);
+ }
+ Ops.push_back(SrcReg);
+ Ops.push_back(Pred);
+ Ops.push_back(Reg0);
+ Ops.push_back(Chain);
+ SDNode *VSt = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
+
+ // Transfer memoperands.
+ cast<MachineSDNode>(VSt)->setMemRefs(MemOp, MemOp + 1);
+
+ return VSt;
+ }
+
+ // Otherwise, quad registers are stored with two separate instructions,
+ // where one stores the even registers and the other stores the odd registers.
+
+ // Form the QQQQ REG_SEQUENCE.
+ SDValue V0 = N->getOperand(Vec0Idx + 0);
+ SDValue V1 = N->getOperand(Vec0Idx + 1);
+ SDValue V2 = N->getOperand(Vec0Idx + 2);
+ SDValue V3 = (NumVecs == 3)
+ ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
+ : N->getOperand(Vec0Idx + 3);
+ SDValue RegSeq = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0);
+
+ // Store the even D registers. This is always an updating store, so that it
+ // provides the address to the second store for the odd subregs.
+ const SDValue OpsA[] = { MemAddr, Align, Reg0, RegSeq, Pred, Reg0, Chain };
+ SDNode *VStA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl,
+ MemAddr.getValueType(),
+ MVT::Other, OpsA);
+ cast<MachineSDNode>(VStA)->setMemRefs(MemOp, MemOp + 1);
+ Chain = SDValue(VStA, 1);
+
+ // Store the odd D registers.
+ Ops.push_back(SDValue(VStA, 0));
+ Ops.push_back(Align);
+ if (isUpdating) {
+ SDValue Inc = N->getOperand(AddrOpIdx + 1);
+ assert(isa<ConstantSDNode>(Inc.getNode()) &&
+ "only constant post-increment update allowed for VST3/4");
+ (void)Inc;
+ Ops.push_back(Reg0);
+ }
+ Ops.push_back(RegSeq);
+ Ops.push_back(Pred);
+ Ops.push_back(Reg0);
+ Ops.push_back(Chain);
+ SDNode *VStB = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys,
+ Ops);
+ cast<MachineSDNode>(VStB)->setMemRefs(MemOp, MemOp + 1);
+ return VStB;
+}
+
+SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad,
+ bool isUpdating, unsigned NumVecs,
+ const uint16_t *DOpcodes,
+ const uint16_t *QOpcodes) {
+ assert(NumVecs >=2 && NumVecs <= 4 && "VLDSTLane NumVecs out-of-range");
+ SDLoc dl(N);
+
+ SDValue MemAddr, Align;
+ unsigned AddrOpIdx = isUpdating ? 1 : 2;
+ unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1)
+ if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
+ return nullptr;
+
+ MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+ MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
+
+ SDValue Chain = N->getOperand(0);
+ unsigned Lane =
+ cast<ConstantSDNode>(N->getOperand(Vec0Idx + NumVecs))->getZExtValue();
+ EVT VT = N->getOperand(Vec0Idx).getValueType();
+ bool is64BitVector = VT.is64BitVector();
+
+ unsigned Alignment = 0;
+ if (NumVecs != 3) {
+ Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
+ unsigned NumBytes = NumVecs * VT.getVectorElementType().getSizeInBits()/8;
+ if (Alignment > NumBytes)
+ Alignment = NumBytes;
+ if (Alignment < 8 && Alignment < NumBytes)
+ Alignment = 0;
+ // Alignment must be a power of two; make sure of that.
+ Alignment = (Alignment & -Alignment);
+ if (Alignment == 1)
+ Alignment = 0;
+ }
+ Align = CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
+
+ unsigned OpcodeIndex;
+ switch (VT.getSimpleVT().SimpleTy) {
+ default: llvm_unreachable("unhandled vld/vst lane type");
+ // Double-register operations:
+ case MVT::v8i8: OpcodeIndex = 0; break;
+ case MVT::v4i16: OpcodeIndex = 1; break;
+ case MVT::v2f32:
+ case MVT::v2i32: OpcodeIndex = 2; break;
+ // Quad-register operations:
+ case MVT::v8i16: OpcodeIndex = 0; break;
+ case MVT::v4f32:
+ case MVT::v4i32: OpcodeIndex = 1; break;
+ }
+
+ std::vector<EVT> ResTys;
+ if (IsLoad) {
+ unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
+ if (!is64BitVector)
+ ResTyElts *= 2;
+ ResTys.push_back(EVT::getVectorVT(*CurDAG->getContext(),
+ MVT::i64, ResTyElts));
+ }
+ if (isUpdating)
+ ResTys.push_back(MVT::i32);
+ ResTys.push_back(MVT::Other);
+
+ SDValue Pred = getAL(CurDAG, dl);
+ SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
+
+ SmallVector<SDValue, 8> Ops;
+ Ops.push_back(MemAddr);
+ Ops.push_back(Align);
+ if (isUpdating) {
+ SDValue Inc = N->getOperand(AddrOpIdx + 1);
+ Ops.push_back(isa<ConstantSDNode>(Inc.getNode()) ? Reg0 : Inc);
+ }
+
+ SDValue SuperReg;
+ SDValue V0 = N->getOperand(Vec0Idx + 0);
+ SDValue V1 = N->getOperand(Vec0Idx + 1);
+ if (NumVecs == 2) {
+ if (is64BitVector)
+ SuperReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0);
+ else
+ SuperReg = SDValue(createQRegPairNode(MVT::v4i64, V0, V1), 0);
+ } else {
+ SDValue V2 = N->getOperand(Vec0Idx + 2);
+ SDValue V3 = (NumVecs == 3)
+ ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
+ : N->getOperand(Vec0Idx + 3);
+ if (is64BitVector)
+ SuperReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0);
+ else
+ SuperReg = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0);
+ }
+ Ops.push_back(SuperReg);
+ Ops.push_back(getI32Imm(Lane, dl));
+ Ops.push_back(Pred);
+ Ops.push_back(Reg0);
+ Ops.push_back(Chain);
+
+ unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
+ QOpcodes[OpcodeIndex]);
+ SDNode *VLdLn = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
+ cast<MachineSDNode>(VLdLn)->setMemRefs(MemOp, MemOp + 1);
+ if (!IsLoad)
+ return VLdLn;
+
+ // Extract the subregisters.
+ SuperReg = SDValue(VLdLn, 0);
+ assert(ARM::dsub_7 == ARM::dsub_0+7 &&
+ ARM::qsub_3 == ARM::qsub_0+3 && "Unexpected subreg numbering");
+ unsigned Sub0 = is64BitVector ? ARM::dsub_0 : ARM::qsub_0;
+ for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
+ ReplaceUses(SDValue(N, Vec),
+ CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg));
+ ReplaceUses(SDValue(N, NumVecs), SDValue(VLdLn, 1));
+ if (isUpdating)
+ ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdLn, 2));
+ return nullptr;
+}
+
+SDNode *ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool isUpdating,
+ unsigned NumVecs,
+ const uint16_t *Opcodes) {
+ assert(NumVecs >=2 && NumVecs <= 4 && "VLDDup NumVecs out-of-range");
+ SDLoc dl(N);
+
+ SDValue MemAddr, Align;
+ if (!SelectAddrMode6(N, N->getOperand(1), MemAddr, Align))
+ return nullptr;
+
+ MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+ MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
+
+ SDValue Chain = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+
+ unsigned Alignment = 0;
+ if (NumVecs != 3) {
+ Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
+ unsigned NumBytes = NumVecs * VT.getVectorElementType().getSizeInBits()/8;
+ if (Alignment > NumBytes)
+ Alignment = NumBytes;
+ if (Alignment < 8 && Alignment < NumBytes)
+ Alignment = 0;
+ // Alignment must be a power of two; make sure of that.
+ Alignment = (Alignment & -Alignment);
+ if (Alignment == 1)
+ Alignment = 0;
+ }
+ Align = CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
+
+ unsigned OpcodeIndex;
+ switch (VT.getSimpleVT().SimpleTy) {
+ default: llvm_unreachable("unhandled vld-dup type");
+ case MVT::v8i8: OpcodeIndex = 0; break;
+ case MVT::v4i16: OpcodeIndex = 1; break;
+ case MVT::v2f32:
+ case MVT::v2i32: OpcodeIndex = 2; break;
+ }
+
+ SDValue Pred = getAL(CurDAG, dl);
+ SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
+ SDValue SuperReg;
+ unsigned Opc = Opcodes[OpcodeIndex];
+ SmallVector<SDValue, 6> Ops;
+ Ops.push_back(MemAddr);
+ Ops.push_back(Align);
+ if (isUpdating) {
+ // fixed-stride update instructions don't have an explicit writeback
+ // operand. It's implicit in the opcode itself.
+ SDValue Inc = N->getOperand(2);
+ if (!isa<ConstantSDNode>(Inc.getNode()))
+ Ops.push_back(Inc);
+ // FIXME: VLD3 and VLD4 haven't been updated to that form yet.
+ else if (NumVecs > 2)
+ Ops.push_back(Reg0);
+ }
+ Ops.push_back(Pred);
+ Ops.push_back(Reg0);
+ Ops.push_back(Chain);
+
+ unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
+ std::vector<EVT> ResTys;
+ ResTys.push_back(EVT::getVectorVT(*CurDAG->getContext(), MVT::i64,ResTyElts));
+ if (isUpdating)
+ ResTys.push_back(MVT::i32);
+ ResTys.push_back(MVT::Other);
+ SDNode *VLdDup = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
+ cast<MachineSDNode>(VLdDup)->setMemRefs(MemOp, MemOp + 1);
+ SuperReg = SDValue(VLdDup, 0);
+
+ // Extract the subregisters.
+ assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
+ unsigned SubIdx = ARM::dsub_0;
+ for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
+ ReplaceUses(SDValue(N, Vec),
+ CurDAG->getTargetExtractSubreg(SubIdx+Vec, dl, VT, SuperReg));
+ ReplaceUses(SDValue(N, NumVecs), SDValue(VLdDup, 1));
+ if (isUpdating)
+ ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdDup, 2));
+ return nullptr;
+}
+
+SDNode *ARMDAGToDAGISel::SelectVTBL(SDNode *N, bool IsExt, unsigned NumVecs,
+ unsigned Opc) {
+ assert(NumVecs >= 2 && NumVecs <= 4 && "VTBL NumVecs out-of-range");
+ SDLoc dl(N);
+ EVT VT = N->getValueType(0);
+ unsigned FirstTblReg = IsExt ? 2 : 1;
+
+ // Form a REG_SEQUENCE to force register allocation.
+ SDValue RegSeq;
+ SDValue V0 = N->getOperand(FirstTblReg + 0);
+ SDValue V1 = N->getOperand(FirstTblReg + 1);
+ if (NumVecs == 2)
+ RegSeq = SDValue(createDRegPairNode(MVT::v16i8, V0, V1), 0);
+ else {
+ SDValue V2 = N->getOperand(FirstTblReg + 2);
+ // If it's a vtbl3, form a quad D-register and leave the last part as
+ // an undef.
+ SDValue V3 = (NumVecs == 3)
+ ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
+ : N->getOperand(FirstTblReg + 3);
+ RegSeq = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0);
+ }
+
+ SmallVector<SDValue, 6> Ops;
+ if (IsExt)
+ Ops.push_back(N->getOperand(1));
+ Ops.push_back(RegSeq);
+ Ops.push_back(N->getOperand(FirstTblReg + NumVecs));
+ Ops.push_back(getAL(CurDAG, dl)); // predicate
+ Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // predicate register
+ return CurDAG->getMachineNode(Opc, dl, VT, Ops);
+}
+
+SDNode *ARMDAGToDAGISel::SelectV6T2BitfieldExtractOp(SDNode *N,
+ bool isSigned) {
+ if (!Subtarget->hasV6T2Ops())
+ return nullptr;
+
+ unsigned Opc = isSigned
+ ? (Subtarget->isThumb() ? ARM::t2SBFX : ARM::SBFX)
+ : (Subtarget->isThumb() ? ARM::t2UBFX : ARM::UBFX);
+ SDLoc dl(N);
+
+ // For unsigned extracts, check for a shift right and mask
+ unsigned And_imm = 0;
+ if (N->getOpcode() == ISD::AND) {
+ if (isOpcWithIntImmediate(N, ISD::AND, And_imm)) {
+
+ // The immediate is a mask of the low bits iff imm & (imm+1) == 0
+ if (And_imm & (And_imm + 1))
+ return nullptr;
+
+ unsigned Srl_imm = 0;
+ if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL,
+ Srl_imm)) {
+ assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
+
+ // Note: The width operand is encoded as width-1.
+ unsigned Width = countTrailingOnes(And_imm) - 1;
+ unsigned LSB = Srl_imm;
+
+ SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
+
+ if ((LSB + Width + 1) == N->getValueType(0).getSizeInBits()) {
+ // It's cheaper to use a right shift to extract the top bits.
+ if (Subtarget->isThumb()) {
+ Opc = isSigned ? ARM::t2ASRri : ARM::t2LSRri;
+ SDValue Ops[] = { N->getOperand(0).getOperand(0),
+ CurDAG->getTargetConstant(LSB, dl, MVT::i32),
+ getAL(CurDAG, dl), Reg0, Reg0 };
+ return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
+ }
+
+ // ARM models shift instructions as MOVsi with shifter operand.
+ ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(ISD::SRL);
+ SDValue ShOpc =
+ CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, LSB), dl,
+ MVT::i32);
+ SDValue Ops[] = { N->getOperand(0).getOperand(0), ShOpc,
+ getAL(CurDAG, dl), Reg0, Reg0 };
+ return CurDAG->SelectNodeTo(N, ARM::MOVsi, MVT::i32, Ops);
+ }
+
+ SDValue Ops[] = { N->getOperand(0).getOperand(0),
+ CurDAG->getTargetConstant(LSB, dl, MVT::i32),
+ CurDAG->getTargetConstant(Width, dl, MVT::i32),
+ getAL(CurDAG, dl), Reg0 };
+ return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
+ }
+ }
+ return nullptr;
+ }
+
+ // Otherwise, we're looking for a shift of a shift
+ unsigned Shl_imm = 0;
+ if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, Shl_imm)) {
+ assert(Shl_imm > 0 && Shl_imm < 32 && "bad amount in shift node!");
+ unsigned Srl_imm = 0;
+ if (isInt32Immediate(N->getOperand(1), Srl_imm)) {
+ assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
+ // Note: The width operand is encoded as width-1.
+ unsigned Width = 32 - Srl_imm - 1;
+ int LSB = Srl_imm - Shl_imm;
+ if (LSB < 0)
+ return nullptr;
+ SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
+ SDValue Ops[] = { N->getOperand(0).getOperand(0),
+ CurDAG->getTargetConstant(LSB, dl, MVT::i32),
+ CurDAG->getTargetConstant(Width, dl, MVT::i32),
+ getAL(CurDAG, dl), Reg0 };
+ return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
+ }
+ }
+
+ if (N->getOpcode() == ISD::SIGN_EXTEND_INREG) {
+ unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits();
+ unsigned LSB = 0;
+ if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL, LSB) &&
+ !isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRA, LSB))
+ return nullptr;
+
+ if (LSB + Width > 32)
+ return nullptr;
+
+ SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
+ SDValue Ops[] = { N->getOperand(0).getOperand(0),
+ CurDAG->getTargetConstant(LSB, dl, MVT::i32),
+ CurDAG->getTargetConstant(Width - 1, dl, MVT::i32),
+ getAL(CurDAG, dl), Reg0 };
+ return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
+ }
+
+ return nullptr;
+}
+
+/// Target-specific DAG combining for ISD::XOR.
+/// Target-independent combining lowers SELECT_CC nodes of the form
+/// select_cc setg[ge] X, 0, X, -X
+/// select_cc setgt X, -1, X, -X
+/// select_cc setl[te] X, 0, -X, X
+/// select_cc setlt X, 1, -X, X
+/// which represent Integer ABS into:
+/// Y = sra (X, size(X)-1); xor (add (X, Y), Y)
+/// ARM instruction selection detects the latter and matches it to
+/// ARM::ABS or ARM::t2ABS machine node.
+SDNode *ARMDAGToDAGISel::SelectABSOp(SDNode *N){
+ SDValue XORSrc0 = N->getOperand(0);
+ SDValue XORSrc1 = N->getOperand(1);
+ EVT VT = N->getValueType(0);
+
+ if (Subtarget->isThumb1Only())
+ return nullptr;
+
+ if (XORSrc0.getOpcode() != ISD::ADD || XORSrc1.getOpcode() != ISD::SRA)
+ return nullptr;
+
+ SDValue ADDSrc0 = XORSrc0.getOperand(0);
+ SDValue ADDSrc1 = XORSrc0.getOperand(1);
+ SDValue SRASrc0 = XORSrc1.getOperand(0);
+ SDValue SRASrc1 = XORSrc1.getOperand(1);
+ ConstantSDNode *SRAConstant = dyn_cast<ConstantSDNode>(SRASrc1);
+ EVT XType = SRASrc0.getValueType();
+ unsigned Size = XType.getSizeInBits() - 1;
+
+ if (ADDSrc1 == XORSrc1 && ADDSrc0 == SRASrc0 &&
+ XType.isInteger() && SRAConstant != nullptr &&
+ Size == SRAConstant->getZExtValue()) {
+ unsigned Opcode = Subtarget->isThumb2() ? ARM::t2ABS : ARM::ABS;
+ return CurDAG->SelectNodeTo(N, Opcode, VT, ADDSrc0);
+ }
+
+ return nullptr;
+}
+
+SDNode *ARMDAGToDAGISel::SelectConcatVector(SDNode *N) {
+ // The only time a CONCAT_VECTORS operation can have legal types is when
+ // two 64-bit vectors are concatenated to a 128-bit vector.
+ EVT VT = N->getValueType(0);
+ if (!VT.is128BitVector() || N->getNumOperands() != 2)
+ llvm_unreachable("unexpected CONCAT_VECTORS");
+ return createDRegPairNode(VT, N->getOperand(0), N->getOperand(1));
+}
+
+SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
+ SDLoc dl(N);
+
+ if (N->isMachineOpcode()) {
+ N->setNodeId(-1);
+ return nullptr; // Already selected.
+ }
+
+ switch (N->getOpcode()) {
+ default: break;
+ case ISD::WRITE_REGISTER: {
+ SDNode *ResNode = SelectWriteRegister(N);
+ if (ResNode)
+ return ResNode;
+ break;
+ }
+ case ISD::READ_REGISTER: {
+ SDNode *ResNode = SelectReadRegister(N);
+ if (ResNode)
+ return ResNode;
+ break;
+ }
+ case ISD::INLINEASM: {
+ SDNode *ResNode = SelectInlineAsm(N);
+ if (ResNode)
+ return ResNode;
+ break;
+ }
+ case ISD::XOR: {
+ // Select special operations if XOR node forms integer ABS pattern
+ SDNode *ResNode = SelectABSOp(N);
+ if (ResNode)
+ return ResNode;
+ // Other cases are autogenerated.
+ break;
+ }
+ case ISD::Constant: {
+ unsigned Val = cast<ConstantSDNode>(N)->getZExtValue();
+ // If we can't materialize the constant we need to use a literal pool
+ if (ConstantMaterializationCost(Val) > 2) {
+ SDValue CPIdx = CurDAG->getTargetConstantPool(
+ ConstantInt::get(Type::getInt32Ty(*CurDAG->getContext()), Val),
+ TLI->getPointerTy(CurDAG->getDataLayout()));
+
+ SDNode *ResNode;
+ if (Subtarget->isThumb()) {
+ SDValue Pred = getAL(CurDAG, dl);
+ SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
+ SDValue Ops[] = { CPIdx, Pred, PredReg, CurDAG->getEntryNode() };
+ ResNode = CurDAG->getMachineNode(ARM::tLDRpci, dl, MVT::i32, MVT::Other,
+ Ops);
+ } else {
+ SDValue Ops[] = {
+ CPIdx,
+ CurDAG->getTargetConstant(0, dl, MVT::i32),
+ getAL(CurDAG, dl),
+ CurDAG->getRegister(0, MVT::i32),
+ CurDAG->getEntryNode()
+ };
+ ResNode=CurDAG->getMachineNode(ARM::LDRcp, dl, MVT::i32, MVT::Other,
+ Ops);
+ }
+ ReplaceUses(SDValue(N, 0), SDValue(ResNode, 0));
+ return nullptr;
+ }
+
+ // Other cases are autogenerated.
+ break;
+ }
+ case ISD::FrameIndex: {
+ // Selects to ADDri FI, 0 which in turn will become ADDri SP, imm.
+ int FI = cast<FrameIndexSDNode>(N)->getIndex();
+ SDValue TFI = CurDAG->getTargetFrameIndex(
+ FI, TLI->getPointerTy(CurDAG->getDataLayout()));
+ if (Subtarget->isThumb1Only()) {
+ // Set the alignment of the frame object to 4, to avoid having to generate
+ // more than one ADD
+ MachineFrameInfo *MFI = MF->getFrameInfo();
+ if (MFI->getObjectAlignment(FI) < 4)
+ MFI->setObjectAlignment(FI, 4);
+ return CurDAG->SelectNodeTo(N, ARM::tADDframe, MVT::i32, TFI,
+ CurDAG->getTargetConstant(0, dl, MVT::i32));
+ } else {
+ unsigned Opc = ((Subtarget->isThumb() && Subtarget->hasThumb2()) ?
+ ARM::t2ADDri : ARM::ADDri);
+ SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, dl, MVT::i32),
+ getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
+ CurDAG->getRegister(0, MVT::i32) };
+ return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
+ }
+ }
+ case ISD::SRL:
+ if (SDNode *I = SelectV6T2BitfieldExtractOp(N, false))
+ return I;
+ break;
+ case ISD::SIGN_EXTEND_INREG:
+ case ISD::SRA:
+ if (SDNode *I = SelectV6T2BitfieldExtractOp(N, true))
+ return I;
+ break;
+ case ISD::MUL:
+ if (Subtarget->isThumb1Only())
+ break;
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
+ unsigned RHSV = C->getZExtValue();
+ if (!RHSV) break;
+ if (isPowerOf2_32(RHSV-1)) { // 2^n+1?
+ unsigned ShImm = Log2_32(RHSV-1);
+ if (ShImm >= 32)
+ break;
+ SDValue V = N->getOperand(0);
+ ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm);
+ SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, dl, MVT::i32);
+ SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
+ if (Subtarget->isThumb()) {
+ SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 };
+ return CurDAG->SelectNodeTo(N, ARM::t2ADDrs, MVT::i32, Ops);
+ } else {
+ SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0,
+ Reg0 };
+ return CurDAG->SelectNodeTo(N, ARM::ADDrsi, MVT::i32, Ops);
+ }
+ }
+ if (isPowerOf2_32(RHSV+1)) { // 2^n-1?
+ unsigned ShImm = Log2_32(RHSV+1);
+ if (ShImm >= 32)
+ break;
+ SDValue V = N->getOperand(0);
+ ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm);
+ SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, dl, MVT::i32);
+ SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
+ if (Subtarget->isThumb()) {
+ SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 };
+ return CurDAG->SelectNodeTo(N, ARM::t2RSBrs, MVT::i32, Ops);
+ } else {
+ SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0,
+ Reg0 };
+ return CurDAG->SelectNodeTo(N, ARM::RSBrsi, MVT::i32, Ops);
+ }
+ }
+ }
+ break;
+ case ISD::AND: {
+ // Check for unsigned bitfield extract
+ if (SDNode *I = SelectV6T2BitfieldExtractOp(N, false))
+ return I;
+
+ // (and (or x, c2), c1) and top 16-bits of c1 and c2 match, lower 16-bits
+ // of c1 are 0xffff, and lower 16-bit of c2 are 0. That is, the top 16-bits
+ // are entirely contributed by c2 and lower 16-bits are entirely contributed
+ // by x. That's equal to (or (and x, 0xffff), (and c1, 0xffff0000)).
+ // Select it to: "movt x, ((c1 & 0xffff) >> 16)
+ EVT VT = N->getValueType(0);
+ if (VT != MVT::i32)
+ break;
+ unsigned Opc = (Subtarget->isThumb() && Subtarget->hasThumb2())
+ ? ARM::t2MOVTi16
+ : (Subtarget->hasV6T2Ops() ? ARM::MOVTi16 : 0);
+ if (!Opc)
+ break;
+ SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+ if (!N1C)
+ break;
+ if (N0.getOpcode() == ISD::OR && N0.getNode()->hasOneUse()) {
+ SDValue N2 = N0.getOperand(1);
+ ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2);
+ if (!N2C)
+ break;
+ unsigned N1CVal = N1C->getZExtValue();
+ unsigned N2CVal = N2C->getZExtValue();
+ if ((N1CVal & 0xffff0000U) == (N2CVal & 0xffff0000U) &&
+ (N1CVal & 0xffffU) == 0xffffU &&
+ (N2CVal & 0xffffU) == 0x0U) {
+ SDValue Imm16 = CurDAG->getTargetConstant((N2CVal & 0xFFFF0000U) >> 16,
+ dl, MVT::i32);
+ SDValue Ops[] = { N0.getOperand(0), Imm16,
+ getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32) };
+ return CurDAG->getMachineNode(Opc, dl, VT, Ops);
+ }
+ }
+ break;
+ }
+ case ARMISD::VMOVRRD:
+ return CurDAG->getMachineNode(ARM::VMOVRRD, dl, MVT::i32, MVT::i32,
+ N->getOperand(0), getAL(CurDAG, dl),
+ CurDAG->getRegister(0, MVT::i32));
+ case ISD::UMUL_LOHI: {
+ if (Subtarget->isThumb1Only())
+ break;
+ if (Subtarget->isThumb()) {
+ SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
+ getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32) };
+ return CurDAG->getMachineNode(ARM::t2UMULL, dl, MVT::i32, MVT::i32, Ops);
+ } else {
+ SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
+ getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
+ CurDAG->getRegister(0, MVT::i32) };
+ return CurDAG->getMachineNode(Subtarget->hasV6Ops() ?
+ ARM::UMULL : ARM::UMULLv5,
+ dl, MVT::i32, MVT::i32, Ops);
+ }
+ }
+ case ISD::SMUL_LOHI: {
+ if (Subtarget->isThumb1Only())
+ break;
+ if (Subtarget->isThumb()) {
+ SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
+ getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32) };
+ return CurDAG->getMachineNode(ARM::t2SMULL, dl, MVT::i32, MVT::i32, Ops);
+ } else {
+ SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
+ getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
+ CurDAG->getRegister(0, MVT::i32) };
+ return CurDAG->getMachineNode(Subtarget->hasV6Ops() ?
+ ARM::SMULL : ARM::SMULLv5,
+ dl, MVT::i32, MVT::i32, Ops);
+ }
+ }
+ case ARMISD::UMLAL:{
+ if (Subtarget->isThumb()) {
+ SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
+ N->getOperand(3), getAL(CurDAG, dl),
+ CurDAG->getRegister(0, MVT::i32)};
+ return CurDAG->getMachineNode(ARM::t2UMLAL, dl, MVT::i32, MVT::i32, Ops);
+ }else{
+ SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
+ N->getOperand(3), getAL(CurDAG, dl),
+ CurDAG->getRegister(0, MVT::i32),
+ CurDAG->getRegister(0, MVT::i32) };
+ return CurDAG->getMachineNode(Subtarget->hasV6Ops() ?
+ ARM::UMLAL : ARM::UMLALv5,
+ dl, MVT::i32, MVT::i32, Ops);
+ }
+ }
+ case ARMISD::SMLAL:{
+ if (Subtarget->isThumb()) {
+ SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
+ N->getOperand(3), getAL(CurDAG, dl),
+ CurDAG->getRegister(0, MVT::i32)};
+ return CurDAG->getMachineNode(ARM::t2SMLAL, dl, MVT::i32, MVT::i32, Ops);
+ }else{
+ SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
+ N->getOperand(3), getAL(CurDAG, dl),
+ CurDAG->getRegister(0, MVT::i32),
+ CurDAG->getRegister(0, MVT::i32) };
+ return CurDAG->getMachineNode(Subtarget->hasV6Ops() ?
+ ARM::SMLAL : ARM::SMLALv5,
+ dl, MVT::i32, MVT::i32, Ops);
+ }
+ }
+ case ISD::LOAD: {
+ SDNode *ResNode = nullptr;
+ if (Subtarget->isThumb() && Subtarget->hasThumb2())
+ ResNode = SelectT2IndexedLoad(N);
+ else
+ ResNode = SelectARMIndexedLoad(N);
+ if (ResNode)
+ return ResNode;
+ // Other cases are autogenerated.
+ break;
+ }
+ case ARMISD::BRCOND: {
+ // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
+ // Emits: (Bcc:void (bb:Other):$dst, (imm:i32):$cc)
+ // Pattern complexity = 6 cost = 1 size = 0
+
+ // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
+ // Emits: (tBcc:void (bb:Other):$dst, (imm:i32):$cc)
+ // Pattern complexity = 6 cost = 1 size = 0
+
+ // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
+ // Emits: (t2Bcc:void (bb:Other):$dst, (imm:i32):$cc)
+ // Pattern complexity = 6 cost = 1 size = 0
+
+ unsigned Opc = Subtarget->isThumb() ?
+ ((Subtarget->hasThumb2()) ? ARM::t2Bcc : ARM::tBcc) : ARM::Bcc;
+ SDValue Chain = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDValue N2 = N->getOperand(2);
+ SDValue N3 = N->getOperand(3);
+ SDValue InFlag = N->getOperand(4);
+ assert(N1.getOpcode() == ISD::BasicBlock);
+ assert(N2.getOpcode() == ISD::Constant);
+ assert(N3.getOpcode() == ISD::Register);
+
+ SDValue Tmp2 = CurDAG->getTargetConstant(((unsigned)
+ cast<ConstantSDNode>(N2)->getZExtValue()), dl,
+ MVT::i32);
+ SDValue Ops[] = { N1, Tmp2, N3, Chain, InFlag };
+ SDNode *ResNode = CurDAG->getMachineNode(Opc, dl, MVT::Other,
+ MVT::Glue, Ops);
+ Chain = SDValue(ResNode, 0);
+ if (N->getNumValues() == 2) {
+ InFlag = SDValue(ResNode, 1);
+ ReplaceUses(SDValue(N, 1), InFlag);
+ }
+ ReplaceUses(SDValue(N, 0),
+ SDValue(Chain.getNode(), Chain.getResNo()));
+ return nullptr;
+ }
+ case ARMISD::VZIP: {
+ unsigned Opc = 0;
+ EVT VT = N->getValueType(0);
+ switch (VT.getSimpleVT().SimpleTy) {
+ default: return nullptr;
+ case MVT::v8i8: Opc = ARM::VZIPd8; break;
+ case MVT::v4i16: Opc = ARM::VZIPd16; break;
+ case MVT::v2f32:
+ // vzip.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
+ case MVT::v2i32: Opc = ARM::VTRNd32; break;
+ case MVT::v16i8: Opc = ARM::VZIPq8; break;
+ case MVT::v8i16: Opc = ARM::VZIPq16; break;
+ case MVT::v4f32:
+ case MVT::v4i32: Opc = ARM::VZIPq32; break;
+ }
+ SDValue Pred = getAL(CurDAG, dl);
+ SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
+ SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
+ return CurDAG->getMachineNode(Opc, dl, VT, VT, Ops);
+ }
+ case ARMISD::VUZP: {
+ unsigned Opc = 0;
+ EVT VT = N->getValueType(0);
+ switch (VT.getSimpleVT().SimpleTy) {
+ default: return nullptr;
+ case MVT::v8i8: Opc = ARM::VUZPd8; break;
+ case MVT::v4i16: Opc = ARM::VUZPd16; break;
+ case MVT::v2f32:
+ // vuzp.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
+ case MVT::v2i32: Opc = ARM::VTRNd32; break;
+ case MVT::v16i8: Opc = ARM::VUZPq8; break;
+ case MVT::v8i16: Opc = ARM::VUZPq16; break;
+ case MVT::v4f32:
+ case MVT::v4i32: Opc = ARM::VUZPq32; break;
+ }
+ SDValue Pred = getAL(CurDAG, dl);
+ SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
+ SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
+ return CurDAG->getMachineNode(Opc, dl, VT, VT, Ops);
+ }
+ case ARMISD::VTRN: {
+ unsigned Opc = 0;
+ EVT VT = N->getValueType(0);
+ switch (VT.getSimpleVT().SimpleTy) {
+ default: return nullptr;
+ case MVT::v8i8: Opc = ARM::VTRNd8; break;
+ case MVT::v4i16: Opc = ARM::VTRNd16; break;
+ case MVT::v2f32:
+ case MVT::v2i32: Opc = ARM::VTRNd32; break;
+ case MVT::v16i8: Opc = ARM::VTRNq8; break;
+ case MVT::v8i16: Opc = ARM::VTRNq16; break;
+ case MVT::v4f32:
+ case MVT::v4i32: Opc = ARM::VTRNq32; break;
+ }
+ SDValue Pred = getAL(CurDAG, dl);
+ SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
+ SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
+ return CurDAG->getMachineNode(Opc, dl, VT, VT, Ops);
+ }
+ case ARMISD::BUILD_VECTOR: {
+ EVT VecVT = N->getValueType(0);
+ EVT EltVT = VecVT.getVectorElementType();
+ unsigned NumElts = VecVT.getVectorNumElements();
+ if (EltVT == MVT::f64) {
+ assert(NumElts == 2 && "unexpected type for BUILD_VECTOR");
+ return createDRegPairNode(VecVT, N->getOperand(0), N->getOperand(1));
+ }
+ assert(EltVT == MVT::f32 && "unexpected type for BUILD_VECTOR");
+ if (NumElts == 2)
+ return createSRegPairNode(VecVT, N->getOperand(0), N->getOperand(1));
+ assert(NumElts == 4 && "unexpected type for BUILD_VECTOR");
+ return createQuadSRegsNode(VecVT, N->getOperand(0), N->getOperand(1),
+ N->getOperand(2), N->getOperand(3));
+ }
+
+ case ARMISD::VLD2DUP: {
+ static const uint16_t Opcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16,
+ ARM::VLD2DUPd32 };
+ return SelectVLDDup(N, false, 2, Opcodes);
+ }
+
+ case ARMISD::VLD3DUP: {
+ static const uint16_t Opcodes[] = { ARM::VLD3DUPd8Pseudo,
+ ARM::VLD3DUPd16Pseudo,
+ ARM::VLD3DUPd32Pseudo };
+ return SelectVLDDup(N, false, 3, Opcodes);
+ }
+
+ case ARMISD::VLD4DUP: {
+ static const uint16_t Opcodes[] = { ARM::VLD4DUPd8Pseudo,
+ ARM::VLD4DUPd16Pseudo,
+ ARM::VLD4DUPd32Pseudo };
+ return SelectVLDDup(N, false, 4, Opcodes);
+ }
+
+ case ARMISD::VLD2DUP_UPD: {
+ static const uint16_t Opcodes[] = { ARM::VLD2DUPd8wb_fixed,
+ ARM::VLD2DUPd16wb_fixed,
+ ARM::VLD2DUPd32wb_fixed };
+ return SelectVLDDup(N, true, 2, Opcodes);
+ }
+
+ case ARMISD::VLD3DUP_UPD: {
+ static const uint16_t Opcodes[] = { ARM::VLD3DUPd8Pseudo_UPD,
+ ARM::VLD3DUPd16Pseudo_UPD,
+ ARM::VLD3DUPd32Pseudo_UPD };
+ return SelectVLDDup(N, true, 3, Opcodes);
+ }
+
+ case ARMISD::VLD4DUP_UPD: {
+ static const uint16_t Opcodes[] = { ARM::VLD4DUPd8Pseudo_UPD,
+ ARM::VLD4DUPd16Pseudo_UPD,
+ ARM::VLD4DUPd32Pseudo_UPD };
+ return SelectVLDDup(N, true, 4, Opcodes);
+ }
+
+ case ARMISD::VLD1_UPD: {
+ static const uint16_t DOpcodes[] = { ARM::VLD1d8wb_fixed,
+ ARM::VLD1d16wb_fixed,
+ ARM::VLD1d32wb_fixed,
+ ARM::VLD1d64wb_fixed };
+ static const uint16_t QOpcodes[] = { ARM::VLD1q8wb_fixed,
+ ARM::VLD1q16wb_fixed,
+ ARM::VLD1q32wb_fixed,
+ ARM::VLD1q64wb_fixed };
+ return SelectVLD(N, true, 1, DOpcodes, QOpcodes, nullptr);
+ }
+
+ case ARMISD::VLD2_UPD: {
+ static const uint16_t DOpcodes[] = { ARM::VLD2d8wb_fixed,
+ ARM::VLD2d16wb_fixed,
+ ARM::VLD2d32wb_fixed,
+ ARM::VLD1q64wb_fixed};
+ static const uint16_t QOpcodes[] = { ARM::VLD2q8PseudoWB_fixed,
+ ARM::VLD2q16PseudoWB_fixed,
+ ARM::VLD2q32PseudoWB_fixed };
+ return SelectVLD(N, true, 2, DOpcodes, QOpcodes, nullptr);
+ }
+
+ case ARMISD::VLD3_UPD: {
+ static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo_UPD,
+ ARM::VLD3d16Pseudo_UPD,
+ ARM::VLD3d32Pseudo_UPD,
+ ARM::VLD1d64TPseudoWB_fixed};
+ static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
+ ARM::VLD3q16Pseudo_UPD,
+ ARM::VLD3q32Pseudo_UPD };
+ static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo_UPD,
+ ARM::VLD3q16oddPseudo_UPD,
+ ARM::VLD3q32oddPseudo_UPD };
+ return SelectVLD(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
+ }
+
+ case ARMISD::VLD4_UPD: {
+ static const uint16_t DOpcodes[] = { ARM::VLD4d8Pseudo_UPD,
+ ARM::VLD4d16Pseudo_UPD,
+ ARM::VLD4d32Pseudo_UPD,
+ ARM::VLD1d64QPseudoWB_fixed};
+ static const uint16_t QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD,
+ ARM::VLD4q16Pseudo_UPD,
+ ARM::VLD4q32Pseudo_UPD };
+ static const uint16_t QOpcodes1[] = { ARM::VLD4q8oddPseudo_UPD,
+ ARM::VLD4q16oddPseudo_UPD,
+ ARM::VLD4q32oddPseudo_UPD };
+ return SelectVLD(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
+ }
+
+ case ARMISD::VLD2LN_UPD: {
+ static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo_UPD,
+ ARM::VLD2LNd16Pseudo_UPD,
+ ARM::VLD2LNd32Pseudo_UPD };
+ static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo_UPD,
+ ARM::VLD2LNq32Pseudo_UPD };
+ return SelectVLDSTLane(N, true, true, 2, DOpcodes, QOpcodes);
+ }
+
+ case ARMISD::VLD3LN_UPD: {
+ static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo_UPD,
+ ARM::VLD3LNd16Pseudo_UPD,
+ ARM::VLD3LNd32Pseudo_UPD };
+ static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo_UPD,
+ ARM::VLD3LNq32Pseudo_UPD };
+ return SelectVLDSTLane(N, true, true, 3, DOpcodes, QOpcodes);
+ }
+
+ case ARMISD::VLD4LN_UPD: {
+ static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo_UPD,
+ ARM::VLD4LNd16Pseudo_UPD,
+ ARM::VLD4LNd32Pseudo_UPD };
+ static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo_UPD,
+ ARM::VLD4LNq32Pseudo_UPD };
+ return SelectVLDSTLane(N, true, true, 4, DOpcodes, QOpcodes);
+ }
+
+ case ARMISD::VST1_UPD: {
+ static const uint16_t DOpcodes[] = { ARM::VST1d8wb_fixed,
+ ARM::VST1d16wb_fixed,
+ ARM::VST1d32wb_fixed,
+ ARM::VST1d64wb_fixed };
+ static const uint16_t QOpcodes[] = { ARM::VST1q8wb_fixed,
+ ARM::VST1q16wb_fixed,
+ ARM::VST1q32wb_fixed,
+ ARM::VST1q64wb_fixed };
+ return SelectVST(N, true, 1, DOpcodes, QOpcodes, nullptr);
+ }
+
+ case ARMISD::VST2_UPD: {
+ static const uint16_t DOpcodes[] = { ARM::VST2d8wb_fixed,
+ ARM::VST2d16wb_fixed,
+ ARM::VST2d32wb_fixed,
+ ARM::VST1q64wb_fixed};
+ static const uint16_t QOpcodes[] = { ARM::VST2q8PseudoWB_fixed,
+ ARM::VST2q16PseudoWB_fixed,
+ ARM::VST2q32PseudoWB_fixed };
+ return SelectVST(N, true, 2, DOpcodes, QOpcodes, nullptr);
+ }
+
+ case ARMISD::VST3_UPD: {
+ static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo_UPD,
+ ARM::VST3d16Pseudo_UPD,
+ ARM::VST3d32Pseudo_UPD,
+ ARM::VST1d64TPseudoWB_fixed};
+ static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,
+ ARM::VST3q16Pseudo_UPD,
+ ARM::VST3q32Pseudo_UPD };
+ static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo_UPD,
+ ARM::VST3q16oddPseudo_UPD,
+ ARM::VST3q32oddPseudo_UPD };
+ return SelectVST(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
+ }
+
+ case ARMISD::VST4_UPD: {
+ static const uint16_t DOpcodes[] = { ARM::VST4d8Pseudo_UPD,
+ ARM::VST4d16Pseudo_UPD,
+ ARM::VST4d32Pseudo_UPD,
+ ARM::VST1d64QPseudoWB_fixed};
+ static const uint16_t QOpcodes0[] = { ARM::VST4q8Pseudo_UPD,
+ ARM::VST4q16Pseudo_UPD,
+ ARM::VST4q32Pseudo_UPD };
+ static const uint16_t QOpcodes1[] = { ARM::VST4q8oddPseudo_UPD,
+ ARM::VST4q16oddPseudo_UPD,
+ ARM::VST4q32oddPseudo_UPD };
+ return SelectVST(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
+ }
+
+ case ARMISD::VST2LN_UPD: {
+ static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo_UPD,
+ ARM::VST2LNd16Pseudo_UPD,
+ ARM::VST2LNd32Pseudo_UPD };
+ static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo_UPD,
+ ARM::VST2LNq32Pseudo_UPD };
+ return SelectVLDSTLane(N, false, true, 2, DOpcodes, QOpcodes);
+ }
+
+ case ARMISD::VST3LN_UPD: {
+ static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo_UPD,
+ ARM::VST3LNd16Pseudo_UPD,
+ ARM::VST3LNd32Pseudo_UPD };
+ static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo_UPD,
+ ARM::VST3LNq32Pseudo_UPD };
+ return SelectVLDSTLane(N, false, true, 3, DOpcodes, QOpcodes);
+ }
+
+ case ARMISD::VST4LN_UPD: {
+ static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo_UPD,
+ ARM::VST4LNd16Pseudo_UPD,
+ ARM::VST4LNd32Pseudo_UPD };
+ static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo_UPD,
+ ARM::VST4LNq32Pseudo_UPD };
+ return SelectVLDSTLane(N, false, true, 4, DOpcodes, QOpcodes);
+ }
+
+ case ISD::INTRINSIC_VOID:
+ case ISD::INTRINSIC_W_CHAIN: {
+ unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
+ switch (IntNo) {
+ default:
+ break;
+
+ case Intrinsic::arm_ldaexd:
+ case Intrinsic::arm_ldrexd: {
+ SDLoc dl(N);
+ SDValue Chain = N->getOperand(0);
+ SDValue MemAddr = N->getOperand(2);
+ bool isThumb = Subtarget->isThumb() && Subtarget->hasThumb2();
+
+ bool IsAcquire = IntNo == Intrinsic::arm_ldaexd;
+ unsigned NewOpc = isThumb ? (IsAcquire ? ARM::t2LDAEXD : ARM::t2LDREXD)
+ : (IsAcquire ? ARM::LDAEXD : ARM::LDREXD);
+
+ // arm_ldrexd returns a i64 value in {i32, i32}
+ std::vector<EVT> ResTys;
+ if (isThumb) {
+ ResTys.push_back(MVT::i32);
+ ResTys.push_back(MVT::i32);
+ } else
+ ResTys.push_back(MVT::Untyped);
+ ResTys.push_back(MVT::Other);
+
+ // Place arguments in the right order.
+ SmallVector<SDValue, 7> Ops;
+ Ops.push_back(MemAddr);
+ Ops.push_back(getAL(CurDAG, dl));
+ Ops.push_back(CurDAG->getRegister(0, MVT::i32));
+ Ops.push_back(Chain);
+ SDNode *Ld = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops);
+ // Transfer memoperands.
+ MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+ MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
+ cast<MachineSDNode>(Ld)->setMemRefs(MemOp, MemOp + 1);
+
+ // Remap uses.
+ SDValue OutChain = isThumb ? SDValue(Ld, 2) : SDValue(Ld, 1);
+ if (!SDValue(N, 0).use_empty()) {
+ SDValue Result;
+ if (isThumb)
+ Result = SDValue(Ld, 0);
+ else {
+ SDValue SubRegIdx =
+ CurDAG->getTargetConstant(ARM::gsub_0, dl, MVT::i32);
+ SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
+ dl, MVT::i32, SDValue(Ld, 0), SubRegIdx);
+ Result = SDValue(ResNode,0);
+ }
+ ReplaceUses(SDValue(N, 0), Result);
+ }
+ if (!SDValue(N, 1).use_empty()) {
+ SDValue Result;
+ if (isThumb)
+ Result = SDValue(Ld, 1);
+ else {
+ SDValue SubRegIdx =
+ CurDAG->getTargetConstant(ARM::gsub_1, dl, MVT::i32);
+ SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
+ dl, MVT::i32, SDValue(Ld, 0), SubRegIdx);
+ Result = SDValue(ResNode,0);
+ }
+ ReplaceUses(SDValue(N, 1), Result);
+ }
+ ReplaceUses(SDValue(N, 2), OutChain);
+ return nullptr;
+ }
+ case Intrinsic::arm_stlexd:
+ case Intrinsic::arm_strexd: {
+ SDLoc dl(N);
+ SDValue Chain = N->getOperand(0);
+ SDValue Val0 = N->getOperand(2);
+ SDValue Val1 = N->getOperand(3);
+ SDValue MemAddr = N->getOperand(4);
+
+ // Store exclusive double return a i32 value which is the return status
+ // of the issued store.
+ const EVT ResTys[] = {MVT::i32, MVT::Other};
+
+ bool isThumb = Subtarget->isThumb() && Subtarget->hasThumb2();
+ // Place arguments in the right order.
+ SmallVector<SDValue, 7> Ops;
+ if (isThumb) {
+ Ops.push_back(Val0);
+ Ops.push_back(Val1);
+ } else
+ // arm_strexd uses GPRPair.
+ Ops.push_back(SDValue(createGPRPairNode(MVT::Untyped, Val0, Val1), 0));
+ Ops.push_back(MemAddr);
+ Ops.push_back(getAL(CurDAG, dl));
+ Ops.push_back(CurDAG->getRegister(0, MVT::i32));
+ Ops.push_back(Chain);
+
+ bool IsRelease = IntNo == Intrinsic::arm_stlexd;
+ unsigned NewOpc = isThumb ? (IsRelease ? ARM::t2STLEXD : ARM::t2STREXD)
+ : (IsRelease ? ARM::STLEXD : ARM::STREXD);
+
+ SDNode *St = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops);
+ // Transfer memoperands.
+ MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+ MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
+ cast<MachineSDNode>(St)->setMemRefs(MemOp, MemOp + 1);
+
+ return St;
+ }
+
+ case Intrinsic::arm_neon_vld1: {
+ static const uint16_t DOpcodes[] = { ARM::VLD1d8, ARM::VLD1d16,
+ ARM::VLD1d32, ARM::VLD1d64 };
+ static const uint16_t QOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16,
+ ARM::VLD1q32, ARM::VLD1q64};
+ return SelectVLD(N, false, 1, DOpcodes, QOpcodes, nullptr);
+ }
+
+ case Intrinsic::arm_neon_vld2: {
+ static const uint16_t DOpcodes[] = { ARM::VLD2d8, ARM::VLD2d16,
+ ARM::VLD2d32, ARM::VLD1q64 };
+ static const uint16_t QOpcodes[] = { ARM::VLD2q8Pseudo, ARM::VLD2q16Pseudo,
+ ARM::VLD2q32Pseudo };
+ return SelectVLD(N, false, 2, DOpcodes, QOpcodes, nullptr);
+ }
+
+ case Intrinsic::arm_neon_vld3: {
+ static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo,
+ ARM::VLD3d16Pseudo,
+ ARM::VLD3d32Pseudo,
+ ARM::VLD1d64TPseudo };
+ static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
+ ARM::VLD3q16Pseudo_UPD,
+ ARM::VLD3q32Pseudo_UPD };
+ static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo,
+ ARM::VLD3q16oddPseudo,
+ ARM::VLD3q32oddPseudo };
+ return SelectVLD(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
+ }
+
+ case Intrinsic::arm_neon_vld4: {
+ static const uint16_t DOpcodes[] = { ARM::VLD4d8Pseudo,
+ ARM::VLD4d16Pseudo,
+ ARM::VLD4d32Pseudo,
+ ARM::VLD1d64QPseudo };
+ static const uint16_t QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD,
+ ARM::VLD4q16Pseudo_UPD,
+ ARM::VLD4q32Pseudo_UPD };
+ static const uint16_t QOpcodes1[] = { ARM::VLD4q8oddPseudo,
+ ARM::VLD4q16oddPseudo,
+ ARM::VLD4q32oddPseudo };
+ return SelectVLD(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
+ }
+
+ case Intrinsic::arm_neon_vld2lane: {
+ static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo,
+ ARM::VLD2LNd16Pseudo,
+ ARM::VLD2LNd32Pseudo };
+ static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo,
+ ARM::VLD2LNq32Pseudo };
+ return SelectVLDSTLane(N, true, false, 2, DOpcodes, QOpcodes);
+ }
+
+ case Intrinsic::arm_neon_vld3lane: {
+ static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo,
+ ARM::VLD3LNd16Pseudo,
+ ARM::VLD3LNd32Pseudo };
+ static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo,
+ ARM::VLD3LNq32Pseudo };
+ return SelectVLDSTLane(N, true, false, 3, DOpcodes, QOpcodes);
+ }
+
+ case Intrinsic::arm_neon_vld4lane: {
+ static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo,
+ ARM::VLD4LNd16Pseudo,
+ ARM::VLD4LNd32Pseudo };
+ static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo,
+ ARM::VLD4LNq32Pseudo };
+ return SelectVLDSTLane(N, true, false, 4, DOpcodes, QOpcodes);
+ }
+
+ case Intrinsic::arm_neon_vst1: {
+ static const uint16_t DOpcodes[] = { ARM::VST1d8, ARM::VST1d16,
+ ARM::VST1d32, ARM::VST1d64 };
+ static const uint16_t QOpcodes[] = { ARM::VST1q8, ARM::VST1q16,
+ ARM::VST1q32, ARM::VST1q64 };
+ return SelectVST(N, false, 1, DOpcodes, QOpcodes, nullptr);
+ }
+
+ case Intrinsic::arm_neon_vst2: {
+ static const uint16_t DOpcodes[] = { ARM::VST2d8, ARM::VST2d16,
+ ARM::VST2d32, ARM::VST1q64 };
+ static uint16_t QOpcodes[] = { ARM::VST2q8Pseudo, ARM::VST2q16Pseudo,
+ ARM::VST2q32Pseudo };
+ return SelectVST(N, false, 2, DOpcodes, QOpcodes, nullptr);
+ }
+
+ case Intrinsic::arm_neon_vst3: {
+ static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo,
+ ARM::VST3d16Pseudo,
+ ARM::VST3d32Pseudo,
+ ARM::VST1d64TPseudo };
+ static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,
+ ARM::VST3q16Pseudo_UPD,
+ ARM::VST3q32Pseudo_UPD };
+ static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo,
+ ARM::VST3q16oddPseudo,
+ ARM::VST3q32oddPseudo };
+ return SelectVST(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
+ }
+
+ case Intrinsic::arm_neon_vst4: {
+ static const uint16_t DOpcodes[] = { ARM::VST4d8Pseudo,
+ ARM::VST4d16Pseudo,
+ ARM::VST4d32Pseudo,
+ ARM::VST1d64QPseudo };
+ static const uint16_t QOpcodes0[] = { ARM::VST4q8Pseudo_UPD,
+ ARM::VST4q16Pseudo_UPD,
+ ARM::VST4q32Pseudo_UPD };
+ static const uint16_t QOpcodes1[] = { ARM::VST4q8oddPseudo,
+ ARM::VST4q16oddPseudo,
+ ARM::VST4q32oddPseudo };
+ return SelectVST(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
+ }
+
+ case Intrinsic::arm_neon_vst2lane: {
+ static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo,
+ ARM::VST2LNd16Pseudo,
+ ARM::VST2LNd32Pseudo };
+ static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo,
+ ARM::VST2LNq32Pseudo };
+ return SelectVLDSTLane(N, false, false, 2, DOpcodes, QOpcodes);
+ }
+
+ case Intrinsic::arm_neon_vst3lane: {
+ static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo,
+ ARM::VST3LNd16Pseudo,
+ ARM::VST3LNd32Pseudo };
+ static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo,
+ ARM::VST3LNq32Pseudo };
+ return SelectVLDSTLane(N, false, false, 3, DOpcodes, QOpcodes);
+ }
+
+ case Intrinsic::arm_neon_vst4lane: {
+ static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo,
+ ARM::VST4LNd16Pseudo,
+ ARM::VST4LNd32Pseudo };
+ static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo,
+ ARM::VST4LNq32Pseudo };
+ return SelectVLDSTLane(N, false, false, 4, DOpcodes, QOpcodes);
+ }
+ }
+ break;
+ }
+
+ case ISD::INTRINSIC_WO_CHAIN: {
+ unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
+ switch (IntNo) {
+ default:
+ break;
+
+ case Intrinsic::arm_neon_vtbl2:
+ return SelectVTBL(N, false, 2, ARM::VTBL2);
+ case Intrinsic::arm_neon_vtbl3:
+ return SelectVTBL(N, false, 3, ARM::VTBL3Pseudo);
+ case Intrinsic::arm_neon_vtbl4:
+ return SelectVTBL(N, false, 4, ARM::VTBL4Pseudo);
+
+ case Intrinsic::arm_neon_vtbx2:
+ return SelectVTBL(N, true, 2, ARM::VTBX2);
+ case Intrinsic::arm_neon_vtbx3:
+ return SelectVTBL(N, true, 3, ARM::VTBX3Pseudo);
+ case Intrinsic::arm_neon_vtbx4:
+ return SelectVTBL(N, true, 4, ARM::VTBX4Pseudo);
+ }
+ break;
+ }
+
+ case ARMISD::VTBL1: {
+ SDLoc dl(N);
+ EVT VT = N->getValueType(0);
+ SmallVector<SDValue, 6> Ops;
+
+ Ops.push_back(N->getOperand(0));
+ Ops.push_back(N->getOperand(1));
+ Ops.push_back(getAL(CurDAG, dl)); // Predicate
+ Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // Predicate Register
+ return CurDAG->getMachineNode(ARM::VTBL1, dl, VT, Ops);
+ }
+ case ARMISD::VTBL2: {
+ SDLoc dl(N);
+ EVT VT = N->getValueType(0);
+
+ // Form a REG_SEQUENCE to force register allocation.
+ SDValue V0 = N->getOperand(0);
+ SDValue V1 = N->getOperand(1);
+ SDValue RegSeq = SDValue(createDRegPairNode(MVT::v16i8, V0, V1), 0);
+
+ SmallVector<SDValue, 6> Ops;
+ Ops.push_back(RegSeq);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(getAL(CurDAG, dl)); // Predicate
+ Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // Predicate Register
+ return CurDAG->getMachineNode(ARM::VTBL2, dl, VT, Ops);
+ }
+
+ case ISD::CONCAT_VECTORS:
+ return SelectConcatVector(N);
+ }
+
+ return SelectCode(N);
+}
+
+// Inspect a register string of the form
+// cp<coprocessor>:<opc1>:c<CRn>:c<CRm>:<opc2> (32bit) or
+// cp<coprocessor>:<opc1>:c<CRm> (64bit) inspect the fields of the string
+// and obtain the integer operands from them, adding these operands to the
+// provided vector.
+static void getIntOperandsFromRegisterString(StringRef RegString,
+ SelectionDAG *CurDAG, SDLoc DL,
+ std::vector<SDValue>& Ops) {
+ SmallVector<StringRef, 5> Fields;
+ RegString.split(Fields, ':');
+
+ if (Fields.size() > 1) {
+ bool AllIntFields = true;
+
+ for (StringRef Field : Fields) {
+ // Need to trim out leading 'cp' characters and get the integer field.
+ unsigned IntField;
+ AllIntFields &= !Field.trim("CPcp").getAsInteger(10, IntField);
+ Ops.push_back(CurDAG->getTargetConstant(IntField, DL, MVT::i32));
+ }
+
+ assert(AllIntFields &&
+ "Unexpected non-integer value in special register string.");
+ }
+}
+
+// Maps a Banked Register string to its mask value. The mask value returned is
+// for use in the MRSbanked / MSRbanked instruction nodes as the Banked Register
+// mask operand, which expresses which register is to be used, e.g. r8, and in
+// which mode it is to be used, e.g. usr. Returns -1 to signify that the string
+// was invalid.
+static inline int getBankedRegisterMask(StringRef RegString) {
+ return StringSwitch<int>(RegString.lower())
+ .Case("r8_usr", 0x00)
+ .Case("r9_usr", 0x01)
+ .Case("r10_usr", 0x02)
+ .Case("r11_usr", 0x03)
+ .Case("r12_usr", 0x04)
+ .Case("sp_usr", 0x05)
+ .Case("lr_usr", 0x06)
+ .Case("r8_fiq", 0x08)
+ .Case("r9_fiq", 0x09)
+ .Case("r10_fiq", 0x0a)
+ .Case("r11_fiq", 0x0b)
+ .Case("r12_fiq", 0x0c)
+ .Case("sp_fiq", 0x0d)
+ .Case("lr_fiq", 0x0e)
+ .Case("lr_irq", 0x10)
+ .Case("sp_irq", 0x11)
+ .Case("lr_svc", 0x12)
+ .Case("sp_svc", 0x13)
+ .Case("lr_abt", 0x14)
+ .Case("sp_abt", 0x15)
+ .Case("lr_und", 0x16)
+ .Case("sp_und", 0x17)
+ .Case("lr_mon", 0x1c)
+ .Case("sp_mon", 0x1d)
+ .Case("elr_hyp", 0x1e)
+ .Case("sp_hyp", 0x1f)
+ .Case("spsr_fiq", 0x2e)
+ .Case("spsr_irq", 0x30)
+ .Case("spsr_svc", 0x32)
+ .Case("spsr_abt", 0x34)
+ .Case("spsr_und", 0x36)
+ .Case("spsr_mon", 0x3c)
+ .Case("spsr_hyp", 0x3e)
+ .Default(-1);
+}
+
+// Maps a MClass special register string to its value for use in the
+// t2MRS_M / t2MSR_M instruction nodes as the SYSm value operand.
+// Returns -1 to signify that the string was invalid.
+static inline int getMClassRegisterSYSmValueMask(StringRef RegString) {
+ return StringSwitch<int>(RegString.lower())
+ .Case("apsr", 0x0)
+ .Case("iapsr", 0x1)
+ .Case("eapsr", 0x2)
+ .Case("xpsr", 0x3)
+ .Case("ipsr", 0x5)
+ .Case("epsr", 0x6)
+ .Case("iepsr", 0x7)
+ .Case("msp", 0x8)
+ .Case("psp", 0x9)
+ .Case("primask", 0x10)
+ .Case("basepri", 0x11)
+ .Case("basepri_max", 0x12)
+ .Case("faultmask", 0x13)
+ .Case("control", 0x14)
+ .Default(-1);
+}
+
+// The flags here are common to those allowed for apsr in the A class cores and
+// those allowed for the special registers in the M class cores. Returns a
+// value representing which flags were present, -1 if invalid.
+static inline int getMClassFlagsMask(StringRef Flags, bool hasDSP) {
+ if (Flags.empty())
+ return 0x2 | (int)hasDSP;
+
+ return StringSwitch<int>(Flags)
+ .Case("g", 0x1)
+ .Case("nzcvq", 0x2)
+ .Case("nzcvqg", 0x3)
+ .Default(-1);
+}
+
+static int getMClassRegisterMask(StringRef Reg, StringRef Flags, bool IsRead,
+ const ARMSubtarget *Subtarget) {
+ // Ensure that the register (without flags) was a valid M Class special
+ // register.
+ int SYSmvalue = getMClassRegisterSYSmValueMask(Reg);
+ if (SYSmvalue == -1)
+ return -1;
+
+ // basepri, basepri_max and faultmask are only valid for V7m.
+ if (!Subtarget->hasV7Ops() && SYSmvalue >= 0x11 && SYSmvalue <= 0x13)
+ return -1;
+
+ // If it was a read then we won't be expecting flags and so at this point
+ // we can return the mask.
+ if (IsRead) {
+ assert (Flags.empty() && "Unexpected flags for reading M class register.");
+ return SYSmvalue;
+ }
+
+ // We know we are now handling a write so need to get the mask for the flags.
+ int Mask = getMClassFlagsMask(Flags, Subtarget->hasDSP());
+
+ // Only apsr, iapsr, eapsr, xpsr can have flags. The other register values
+ // shouldn't have flags present.
+ if ((SYSmvalue < 0x4 && Mask == -1) || (SYSmvalue > 0x4 && !Flags.empty()))
+ return -1;
+
+ // The _g and _nzcvqg versions are only valid if the DSP extension is
+ // available.
+ if (!Subtarget->hasDSP() && (Mask & 0x1))
+ return -1;
+
+ // The register was valid so need to put the mask in the correct place
+ // (the flags need to be in bits 11-10) and combine with the SYSmvalue to
+ // construct the operand for the instruction node.
+ if (SYSmvalue < 0x4)
+ return SYSmvalue | Mask << 10;
+
+ return SYSmvalue;
+}
+
+static int getARClassRegisterMask(StringRef Reg, StringRef Flags) {
+ // The mask operand contains the special register (R Bit) in bit 4, whether
+ // the register is spsr (R bit is 1) or one of cpsr/apsr (R bit is 0), and
+ // bits 3-0 contains the fields to be accessed in the special register, set by
+ // the flags provided with the register.
+ int Mask = 0;
+ if (Reg == "apsr") {
+ // The flags permitted for apsr are the same flags that are allowed in
+ // M class registers. We get the flag value and then shift the flags into
+ // the correct place to combine with the mask.
+ Mask = getMClassFlagsMask(Flags, true);
+ if (Mask == -1)
+ return -1;
+ return Mask << 2;
+ }
+
+ if (Reg != "cpsr" && Reg != "spsr") {
+ return -1;
+ }
+
+ // This is the same as if the flags were "fc"
+ if (Flags.empty() || Flags == "all")
+ return Mask | 0x9;
+
+ // Inspect the supplied flags string and set the bits in the mask for
+ // the relevant and valid flags allowed for cpsr and spsr.
+ for (char Flag : Flags) {
+ int FlagVal;
+ switch (Flag) {
+ case 'c':
+ FlagVal = 0x1;
+ break;
+ case 'x':
+ FlagVal = 0x2;
+ break;
+ case 's':
+ FlagVal = 0x4;
+ break;
+ case 'f':
+ FlagVal = 0x8;
+ break;
+ default:
+ FlagVal = 0;
+ }
+
+ // This avoids allowing strings where the same flag bit appears twice.
+ if (!FlagVal || (Mask & FlagVal))
+ return -1;
+ Mask |= FlagVal;
+ }
+
+ // If the register is spsr then we need to set the R bit.
+ if (Reg == "spsr")
+ Mask |= 0x10;
+
+ return Mask;
+}
+
+// Lower the read_register intrinsic to ARM specific DAG nodes
+// using the supplied metadata string to select the instruction node to use
+// and the registers/masks to construct as operands for the node.
+SDNode *ARMDAGToDAGISel::SelectReadRegister(SDNode *N){
+ const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1));
+ const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0));
+ bool IsThumb2 = Subtarget->isThumb2();
+ SDLoc DL(N);
+
+ std::vector<SDValue> Ops;
+ getIntOperandsFromRegisterString(RegString->getString(), CurDAG, DL, Ops);
+
+ if (!Ops.empty()) {
+ // If the special register string was constructed of fields (as defined
+ // in the ACLE) then need to lower to MRC node (32 bit) or
+ // MRRC node(64 bit), we can make the distinction based on the number of
+ // operands we have.
+ unsigned Opcode;
+ SmallVector<EVT, 3> ResTypes;
+ if (Ops.size() == 5){
+ Opcode = IsThumb2 ? ARM::t2MRC : ARM::MRC;
+ ResTypes.append({ MVT::i32, MVT::Other });
+ } else {
+ assert(Ops.size() == 3 &&
+ "Invalid number of fields in special register string.");
+ Opcode = IsThumb2 ? ARM::t2MRRC : ARM::MRRC;
+ ResTypes.append({ MVT::i32, MVT::i32, MVT::Other });
+ }
+
+ Ops.push_back(getAL(CurDAG, DL));
+ Ops.push_back(CurDAG->getRegister(0, MVT::i32));
+ Ops.push_back(N->getOperand(0));
+ return CurDAG->getMachineNode(Opcode, DL, ResTypes, Ops);
+ }
+
+ std::string SpecialReg = RegString->getString().lower();
+
+ int BankedReg = getBankedRegisterMask(SpecialReg);
+ if (BankedReg != -1) {
+ Ops = { CurDAG->getTargetConstant(BankedReg, DL, MVT::i32),
+ getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
+ N->getOperand(0) };
+ return CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRSbanked : ARM::MRSbanked,
+ DL, MVT::i32, MVT::Other, Ops);
+ }
+
+ // The VFP registers are read by creating SelectionDAG nodes with opcodes
+ // corresponding to the register that is being read from. So we switch on the
+ // string to find which opcode we need to use.
+ unsigned Opcode = StringSwitch<unsigned>(SpecialReg)
+ .Case("fpscr", ARM::VMRS)
+ .Case("fpexc", ARM::VMRS_FPEXC)
+ .Case("fpsid", ARM::VMRS_FPSID)
+ .Case("mvfr0", ARM::VMRS_MVFR0)
+ .Case("mvfr1", ARM::VMRS_MVFR1)
+ .Case("mvfr2", ARM::VMRS_MVFR2)
+ .Case("fpinst", ARM::VMRS_FPINST)
+ .Case("fpinst2", ARM::VMRS_FPINST2)
+ .Default(0);
+
+ // If an opcode was found then we can lower the read to a VFP instruction.
+ if (Opcode) {
+ if (!Subtarget->hasVFP2())
+ return nullptr;
+ if (Opcode == ARM::VMRS_MVFR2 && !Subtarget->hasFPARMv8())
+ return nullptr;
+
+ Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
+ N->getOperand(0) };
+ return CurDAG->getMachineNode(Opcode, DL, MVT::i32, MVT::Other, Ops);
+ }
+
+ // If the target is M Class then need to validate that the register string
+ // is an acceptable value, so check that a mask can be constructed from the
+ // string.
+ if (Subtarget->isMClass()) {
+ int SYSmValue = getMClassRegisterMask(SpecialReg, "", true, Subtarget);
+ if (SYSmValue == -1)
+ return nullptr;
+
+ SDValue Ops[] = { CurDAG->getTargetConstant(SYSmValue, DL, MVT::i32),
+ getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
+ N->getOperand(0) };
+ return CurDAG->getMachineNode(ARM::t2MRS_M, DL, MVT::i32, MVT::Other, Ops);
+ }
+
+ // Here we know the target is not M Class so we need to check if it is one
+ // of the remaining possible values which are apsr, cpsr or spsr.
+ if (SpecialReg == "apsr" || SpecialReg == "cpsr") {
+ Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
+ N->getOperand(0) };
+ return CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRS_AR : ARM::MRS, DL,
+ MVT::i32, MVT::Other, Ops);
+ }
+
+ if (SpecialReg == "spsr") {
+ Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
+ N->getOperand(0) };
+ return CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRSsys_AR : ARM::MRSsys,
+ DL, MVT::i32, MVT::Other, Ops);
+ }
+
+ return nullptr;
+}
+
+// Lower the write_register intrinsic to ARM specific DAG nodes
+// using the supplied metadata string to select the instruction node to use
+// and the registers/masks to use in the nodes
+SDNode *ARMDAGToDAGISel::SelectWriteRegister(SDNode *N){
+ const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1));
+ const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0));
+ bool IsThumb2 = Subtarget->isThumb2();
+ SDLoc DL(N);
+
+ std::vector<SDValue> Ops;
+ getIntOperandsFromRegisterString(RegString->getString(), CurDAG, DL, Ops);
+
+ if (!Ops.empty()) {
+ // If the special register string was constructed of fields (as defined
+ // in the ACLE) then need to lower to MCR node (32 bit) or
+ // MCRR node(64 bit), we can make the distinction based on the number of
+ // operands we have.
+ unsigned Opcode;
+ if (Ops.size() == 5) {
+ Opcode = IsThumb2 ? ARM::t2MCR : ARM::MCR;
+ Ops.insert(Ops.begin()+2, N->getOperand(2));
+ } else {
+ assert(Ops.size() == 3 &&
+ "Invalid number of fields in special register string.");
+ Opcode = IsThumb2 ? ARM::t2MCRR : ARM::MCRR;
+ SDValue WriteValue[] = { N->getOperand(2), N->getOperand(3) };
+ Ops.insert(Ops.begin()+2, WriteValue, WriteValue+2);
+ }
+
+ Ops.push_back(getAL(CurDAG, DL));
+ Ops.push_back(CurDAG->getRegister(0, MVT::i32));
+ Ops.push_back(N->getOperand(0));
+
+ return CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops);
+ }
+
+ std::string SpecialReg = RegString->getString().lower();
+ int BankedReg = getBankedRegisterMask(SpecialReg);
+ if (BankedReg != -1) {
+ Ops = { CurDAG->getTargetConstant(BankedReg, DL, MVT::i32), N->getOperand(2),
+ getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
+ N->getOperand(0) };
+ return CurDAG->getMachineNode(IsThumb2 ? ARM::t2MSRbanked : ARM::MSRbanked,
+ DL, MVT::Other, Ops);
+ }
+
+ // The VFP registers are written to by creating SelectionDAG nodes with
+ // opcodes corresponding to the register that is being written. So we switch
+ // on the string to find which opcode we need to use.
+ unsigned Opcode = StringSwitch<unsigned>(SpecialReg)
+ .Case("fpscr", ARM::VMSR)
+ .Case("fpexc", ARM::VMSR_FPEXC)
+ .Case("fpsid", ARM::VMSR_FPSID)
+ .Case("fpinst", ARM::VMSR_FPINST)
+ .Case("fpinst2", ARM::VMSR_FPINST2)
+ .Default(0);
+
+ if (Opcode) {
+ if (!Subtarget->hasVFP2())
+ return nullptr;
+ Ops = { N->getOperand(2), getAL(CurDAG, DL),
+ CurDAG->getRegister(0, MVT::i32), N->getOperand(0) };
+ return CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops);
+ }
+
+ SmallVector<StringRef, 5> Fields;
+ StringRef(SpecialReg).split(Fields, '_', 1, false);
+ std::string Reg = Fields[0].str();
+ StringRef Flags = Fields.size() == 2 ? Fields[1] : "";
+
+ // If the target was M Class then need to validate the special register value
+ // and retrieve the mask for use in the instruction node.
+ if (Subtarget->isMClass()) {
+ // basepri_max gets split so need to correct Reg and Flags.
+ if (SpecialReg == "basepri_max") {
+ Reg = SpecialReg;
+ Flags = "";
+ }
+ int SYSmValue = getMClassRegisterMask(Reg, Flags, false, Subtarget);
+ if (SYSmValue == -1)
+ return nullptr;
+
+ SDValue Ops[] = { CurDAG->getTargetConstant(SYSmValue, DL, MVT::i32),
+ N->getOperand(2), getAL(CurDAG, DL),
+ CurDAG->getRegister(0, MVT::i32), N->getOperand(0) };
+ return CurDAG->getMachineNode(ARM::t2MSR_M, DL, MVT::Other, Ops);
+ }
+
+ // We then check to see if a valid mask can be constructed for one of the
+ // register string values permitted for the A and R class cores. These values
+ // are apsr, spsr and cpsr; these are also valid on older cores.
+ int Mask = getARClassRegisterMask(Reg, Flags);
+ if (Mask != -1) {
+ Ops = { CurDAG->getTargetConstant(Mask, DL, MVT::i32), N->getOperand(2),
+ getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
+ N->getOperand(0) };
+ return CurDAG->getMachineNode(IsThumb2 ? ARM::t2MSR_AR : ARM::MSR,
+ DL, MVT::Other, Ops);
+ }
+
+ return nullptr;
+}
+
+SDNode *ARMDAGToDAGISel::SelectInlineAsm(SDNode *N){
+ std::vector<SDValue> AsmNodeOperands;
+ unsigned Flag, Kind;
+ bool Changed = false;
+ unsigned NumOps = N->getNumOperands();
+
+ // Normally, i64 data is bounded to two arbitrary GRPs for "%r" constraint.
+ // However, some instrstions (e.g. ldrexd/strexd in ARM mode) require
+ // (even/even+1) GPRs and use %n and %Hn to refer to the individual regs
+ // respectively. Since there is no constraint to explicitly specify a
+ // reg pair, we use GPRPair reg class for "%r" for 64-bit data. For Thumb,
+ // the 64-bit data may be referred by H, Q, R modifiers, so we still pack
+ // them into a GPRPair.
+
+ SDLoc dl(N);
+ SDValue Glue = N->getGluedNode() ? N->getOperand(NumOps-1)
+ : SDValue(nullptr,0);
+
+ SmallVector<bool, 8> OpChanged;
+ // Glue node will be appended late.
+ for(unsigned i = 0, e = N->getGluedNode() ? NumOps - 1 : NumOps; i < e; ++i) {
+ SDValue op = N->getOperand(i);
+ AsmNodeOperands.push_back(op);
+
+ if (i < InlineAsm::Op_FirstOperand)
+ continue;
+
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(i))) {
+ Flag = C->getZExtValue();
+ Kind = InlineAsm::getKind(Flag);
+ }
+ else
+ continue;
+
+ // Immediate operands to inline asm in the SelectionDAG are modeled with
+ // two operands. The first is a constant of value InlineAsm::Kind_Imm, and
+ // the second is a constant with the value of the immediate. If we get here
+ // and we have a Kind_Imm, skip the next operand, and continue.
+ if (Kind == InlineAsm::Kind_Imm) {
+ SDValue op = N->getOperand(++i);
+ AsmNodeOperands.push_back(op);
+ continue;
+ }
+
+ unsigned NumRegs = InlineAsm::getNumOperandRegisters(Flag);
+ if (NumRegs)
+ OpChanged.push_back(false);
+
+ unsigned DefIdx = 0;
+ bool IsTiedToChangedOp = false;
+ // If it's a use that is tied with a previous def, it has no
+ // reg class constraint.
+ if (Changed && InlineAsm::isUseOperandTiedToDef(Flag, DefIdx))
+ IsTiedToChangedOp = OpChanged[DefIdx];
+
+ if (Kind != InlineAsm::Kind_RegUse && Kind != InlineAsm::Kind_RegDef
+ && Kind != InlineAsm::Kind_RegDefEarlyClobber)
+ continue;
+
+ unsigned RC;
+ bool HasRC = InlineAsm::hasRegClassConstraint(Flag, RC);
+ if ((!IsTiedToChangedOp && (!HasRC || RC != ARM::GPRRegClassID))
+ || NumRegs != 2)
+ continue;
+
+ assert((i+2 < NumOps) && "Invalid number of operands in inline asm");
+ SDValue V0 = N->getOperand(i+1);
+ SDValue V1 = N->getOperand(i+2);
+ unsigned Reg0 = cast<RegisterSDNode>(V0)->getReg();
+ unsigned Reg1 = cast<RegisterSDNode>(V1)->getReg();
+ SDValue PairedReg;
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+
+ if (Kind == InlineAsm::Kind_RegDef ||
+ Kind == InlineAsm::Kind_RegDefEarlyClobber) {
+ // Replace the two GPRs with 1 GPRPair and copy values from GPRPair to
+ // the original GPRs.
+
+ unsigned GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
+ PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped);
+ SDValue Chain = SDValue(N,0);
+
+ SDNode *GU = N->getGluedUser();
+ SDValue RegCopy = CurDAG->getCopyFromReg(Chain, dl, GPVR, MVT::Untyped,
+ Chain.getValue(1));
+
+ // Extract values from a GPRPair reg and copy to the original GPR reg.
+ SDValue Sub0 = CurDAG->getTargetExtractSubreg(ARM::gsub_0, dl, MVT::i32,
+ RegCopy);
+ SDValue Sub1 = CurDAG->getTargetExtractSubreg(ARM::gsub_1, dl, MVT::i32,
+ RegCopy);
+ SDValue T0 = CurDAG->getCopyToReg(Sub0, dl, Reg0, Sub0,
+ RegCopy.getValue(1));
+ SDValue T1 = CurDAG->getCopyToReg(Sub1, dl, Reg1, Sub1, T0.getValue(1));
+
+ // Update the original glue user.
+ std::vector<SDValue> Ops(GU->op_begin(), GU->op_end()-1);
+ Ops.push_back(T1.getValue(1));
+ CurDAG->UpdateNodeOperands(GU, Ops);
+ }
+ else {
+ // For Kind == InlineAsm::Kind_RegUse, we first copy two GPRs into a
+ // GPRPair and then pass the GPRPair to the inline asm.
+ SDValue Chain = AsmNodeOperands[InlineAsm::Op_InputChain];
+
+ // As REG_SEQ doesn't take RegisterSDNode, we copy them first.
+ SDValue T0 = CurDAG->getCopyFromReg(Chain, dl, Reg0, MVT::i32,
+ Chain.getValue(1));
+ SDValue T1 = CurDAG->getCopyFromReg(Chain, dl, Reg1, MVT::i32,
+ T0.getValue(1));
+ SDValue Pair = SDValue(createGPRPairNode(MVT::Untyped, T0, T1), 0);
+
+ // Copy REG_SEQ into a GPRPair-typed VR and replace the original two
+ // i32 VRs of inline asm with it.
+ unsigned GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
+ PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped);
+ Chain = CurDAG->getCopyToReg(T1, dl, GPVR, Pair, T1.getValue(1));
+
+ AsmNodeOperands[InlineAsm::Op_InputChain] = Chain;
+ Glue = Chain.getValue(1);
+ }
+
+ Changed = true;
+
+ if(PairedReg.getNode()) {
+ OpChanged[OpChanged.size() -1 ] = true;
+ Flag = InlineAsm::getFlagWord(Kind, 1 /* RegNum*/);
+ if (IsTiedToChangedOp)
+ Flag = InlineAsm::getFlagWordForMatchingOp(Flag, DefIdx);
+ else
+ Flag = InlineAsm::getFlagWordForRegClass(Flag, ARM::GPRPairRegClassID);
+ // Replace the current flag.
+ AsmNodeOperands[AsmNodeOperands.size() -1] = CurDAG->getTargetConstant(
+ Flag, dl, MVT::i32);
+ // Add the new register node and skip the original two GPRs.
+ AsmNodeOperands.push_back(PairedReg);
+ // Skip the next two GPRs.
+ i += 2;
+ }
+ }
+
+ if (Glue.getNode())
+ AsmNodeOperands.push_back(Glue);
+ if (!Changed)
+ return nullptr;
+
+ SDValue New = CurDAG->getNode(ISD::INLINEASM, SDLoc(N),
+ CurDAG->getVTList(MVT::Other, MVT::Glue), AsmNodeOperands);
+ New->setNodeId(-1);
+ return New.getNode();
+}
+
+
+bool ARMDAGToDAGISel::
+SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
+ std::vector<SDValue> &OutOps) {
+ switch(ConstraintID) {
+ default:
+ llvm_unreachable("Unexpected asm memory constraint");
+ case InlineAsm::Constraint_i:
+ // FIXME: It seems strange that 'i' is needed here since it's supposed to
+ // be an immediate and not a memory constraint.
+ // Fallthrough.
+ case InlineAsm::Constraint_m:
+ case InlineAsm::Constraint_o:
+ case InlineAsm::Constraint_Q:
+ case InlineAsm::Constraint_Um:
+ case InlineAsm::Constraint_Un:
+ case InlineAsm::Constraint_Uq:
+ case InlineAsm::Constraint_Us:
+ case InlineAsm::Constraint_Ut:
+ case InlineAsm::Constraint_Uv:
+ case InlineAsm::Constraint_Uy:
+ // Require the address to be in a register. That is safe for all ARM
+ // variants and it is hard to do anything much smarter without knowing
+ // how the operand is used.
+ OutOps.push_back(Op);
+ return false;
+ }
+ return true;
+}
+
+/// createARMISelDag - This pass converts a legalized DAG into a
+/// ARM-specific DAG, ready for instruction scheduling.
+///
+FunctionPass *llvm::createARMISelDag(ARMBaseTargetMachine &TM,
+ CodeGenOpt::Level OptLevel) {
+ return new ARMDAGToDAGISel(TM, OptLevel);
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp b/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp
new file mode 100644
index 0000000..9cfb06b
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -0,0 +1,12326 @@
+//===-- ARMISelLowering.cpp - ARM DAG Lowering Implementation -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that ARM uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARMISelLowering.h"
+#include "ARMCallingConv.h"
+#include "ARMConstantPoolValue.h"
+#include "ARMMachineFunctionInfo.h"
+#include "ARMPerfectShuffle.h"
+#include "ARMSubtarget.h"
+#include "ARMTargetMachine.h"
+#include "ARMTargetObjectFile.h"
+#include "MCTargetDesc/ARMAddressingModes.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/IntrinsicLowering.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Type.h"
+#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetOptions.h"
+#include <utility>
+using namespace llvm;
+
+#define DEBUG_TYPE "arm-isel"
+
+STATISTIC(NumTailCalls, "Number of tail calls");
+STATISTIC(NumMovwMovt, "Number of GAs materialized with movw + movt");
+STATISTIC(NumLoopByVals, "Number of loops generated for byval arguments");
+
+static cl::opt<bool>
+ARMInterworking("arm-interworking", cl::Hidden,
+ cl::desc("Enable / disable ARM interworking (for debugging only)"),
+ cl::init(true));
+
+namespace {
+ class ARMCCState : public CCState {
+ public:
+ ARMCCState(CallingConv::ID CC, bool isVarArg, MachineFunction &MF,
+ SmallVectorImpl<CCValAssign> &locs, LLVMContext &C,
+ ParmContext PC)
+ : CCState(CC, isVarArg, MF, locs, C) {
+ assert(((PC == Call) || (PC == Prologue)) &&
+ "ARMCCState users must specify whether their context is call"
+ "or prologue generation.");
+ CallOrPrologue = PC;
+ }
+ };
+}
+
+// The APCS parameter registers.
+static const MCPhysReg GPRArgRegs[] = {
+ ARM::R0, ARM::R1, ARM::R2, ARM::R3
+};
+
+void ARMTargetLowering::addTypeForNEON(MVT VT, MVT PromotedLdStVT,
+ MVT PromotedBitwiseVT) {
+ if (VT != PromotedLdStVT) {
+ setOperationAction(ISD::LOAD, VT, Promote);
+ AddPromotedToType (ISD::LOAD, VT, PromotedLdStVT);
+
+ setOperationAction(ISD::STORE, VT, Promote);
+ AddPromotedToType (ISD::STORE, VT, PromotedLdStVT);
+ }
+
+ MVT ElemTy = VT.getVectorElementType();
+ if (ElemTy != MVT::i64 && ElemTy != MVT::f64)
+ setOperationAction(ISD::SETCC, VT, Custom);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
+ if (ElemTy == MVT::i32) {
+ setOperationAction(ISD::SINT_TO_FP, VT, Custom);
+ setOperationAction(ISD::UINT_TO_FP, VT, Custom);
+ setOperationAction(ISD::FP_TO_SINT, VT, Custom);
+ setOperationAction(ISD::FP_TO_UINT, VT, Custom);
+ } else {
+ setOperationAction(ISD::SINT_TO_FP, VT, Expand);
+ setOperationAction(ISD::UINT_TO_FP, VT, Expand);
+ setOperationAction(ISD::FP_TO_SINT, VT, Expand);
+ setOperationAction(ISD::FP_TO_UINT, VT, Expand);
+ }
+ setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
+ setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
+ setOperationAction(ISD::CONCAT_VECTORS, VT, Legal);
+ setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal);
+ setOperationAction(ISD::SELECT, VT, Expand);
+ setOperationAction(ISD::SELECT_CC, VT, Expand);
+ setOperationAction(ISD::VSELECT, VT, Expand);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
+ if (VT.isInteger()) {
+ setOperationAction(ISD::SHL, VT, Custom);
+ setOperationAction(ISD::SRA, VT, Custom);
+ setOperationAction(ISD::SRL, VT, Custom);
+ }
+
+ // Promote all bit-wise operations.
+ if (VT.isInteger() && VT != PromotedBitwiseVT) {
+ setOperationAction(ISD::AND, VT, Promote);
+ AddPromotedToType (ISD::AND, VT, PromotedBitwiseVT);
+ setOperationAction(ISD::OR, VT, Promote);
+ AddPromotedToType (ISD::OR, VT, PromotedBitwiseVT);
+ setOperationAction(ISD::XOR, VT, Promote);
+ AddPromotedToType (ISD::XOR, VT, PromotedBitwiseVT);
+ }
+
+ // Neon does not support vector divide/remainder operations.
+ setOperationAction(ISD::SDIV, VT, Expand);
+ setOperationAction(ISD::UDIV, VT, Expand);
+ setOperationAction(ISD::FDIV, VT, Expand);
+ setOperationAction(ISD::SREM, VT, Expand);
+ setOperationAction(ISD::UREM, VT, Expand);
+ setOperationAction(ISD::FREM, VT, Expand);
+
+ if (!VT.isFloatingPoint() &&
+ VT != MVT::v2i64 && VT != MVT::v1i64)
+ for (unsigned Opcode : {ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX})
+ setOperationAction(Opcode, VT, Legal);
+}
+
+void ARMTargetLowering::addDRTypeForNEON(MVT VT) {
+ addRegisterClass(VT, &ARM::DPRRegClass);
+ addTypeForNEON(VT, MVT::f64, MVT::v2i32);
+}
+
+void ARMTargetLowering::addQRTypeForNEON(MVT VT) {
+ addRegisterClass(VT, &ARM::DPairRegClass);
+ addTypeForNEON(VT, MVT::v2f64, MVT::v4i32);
+}
+
+ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
+ const ARMSubtarget &STI)
+ : TargetLowering(TM), Subtarget(&STI) {
+ RegInfo = Subtarget->getRegisterInfo();
+ Itins = Subtarget->getInstrItineraryData();
+
+ setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
+
+ if (Subtarget->isTargetMachO()) {
+ // Uses VFP for Thumb libfuncs if available.
+ if (Subtarget->isThumb() && Subtarget->hasVFP2() &&
+ Subtarget->hasARMOps() && !Subtarget->useSoftFloat()) {
+ static const struct {
+ const RTLIB::Libcall Op;
+ const char * const Name;
+ const ISD::CondCode Cond;
+ } LibraryCalls[] = {
+ // Single-precision floating-point arithmetic.
+ { RTLIB::ADD_F32, "__addsf3vfp", ISD::SETCC_INVALID },
+ { RTLIB::SUB_F32, "__subsf3vfp", ISD::SETCC_INVALID },
+ { RTLIB::MUL_F32, "__mulsf3vfp", ISD::SETCC_INVALID },
+ { RTLIB::DIV_F32, "__divsf3vfp", ISD::SETCC_INVALID },
+
+ // Double-precision floating-point arithmetic.
+ { RTLIB::ADD_F64, "__adddf3vfp", ISD::SETCC_INVALID },
+ { RTLIB::SUB_F64, "__subdf3vfp", ISD::SETCC_INVALID },
+ { RTLIB::MUL_F64, "__muldf3vfp", ISD::SETCC_INVALID },
+ { RTLIB::DIV_F64, "__divdf3vfp", ISD::SETCC_INVALID },
+
+ // Single-precision comparisons.
+ { RTLIB::OEQ_F32, "__eqsf2vfp", ISD::SETNE },
+ { RTLIB::UNE_F32, "__nesf2vfp", ISD::SETNE },
+ { RTLIB::OLT_F32, "__ltsf2vfp", ISD::SETNE },
+ { RTLIB::OLE_F32, "__lesf2vfp", ISD::SETNE },
+ { RTLIB::OGE_F32, "__gesf2vfp", ISD::SETNE },
+ { RTLIB::OGT_F32, "__gtsf2vfp", ISD::SETNE },
+ { RTLIB::UO_F32, "__unordsf2vfp", ISD::SETNE },
+ { RTLIB::O_F32, "__unordsf2vfp", ISD::SETEQ },
+
+ // Double-precision comparisons.
+ { RTLIB::OEQ_F64, "__eqdf2vfp", ISD::SETNE },
+ { RTLIB::UNE_F64, "__nedf2vfp", ISD::SETNE },
+ { RTLIB::OLT_F64, "__ltdf2vfp", ISD::SETNE },
+ { RTLIB::OLE_F64, "__ledf2vfp", ISD::SETNE },
+ { RTLIB::OGE_F64, "__gedf2vfp", ISD::SETNE },
+ { RTLIB::OGT_F64, "__gtdf2vfp", ISD::SETNE },
+ { RTLIB::UO_F64, "__unorddf2vfp", ISD::SETNE },
+ { RTLIB::O_F64, "__unorddf2vfp", ISD::SETEQ },
+
+ // Floating-point to integer conversions.
+ // i64 conversions are done via library routines even when generating VFP
+ // instructions, so use the same ones.
+ { RTLIB::FPTOSINT_F64_I32, "__fixdfsivfp", ISD::SETCC_INVALID },
+ { RTLIB::FPTOUINT_F64_I32, "__fixunsdfsivfp", ISD::SETCC_INVALID },
+ { RTLIB::FPTOSINT_F32_I32, "__fixsfsivfp", ISD::SETCC_INVALID },
+ { RTLIB::FPTOUINT_F32_I32, "__fixunssfsivfp", ISD::SETCC_INVALID },
+
+ // Conversions between floating types.
+ { RTLIB::FPROUND_F64_F32, "__truncdfsf2vfp", ISD::SETCC_INVALID },
+ { RTLIB::FPEXT_F32_F64, "__extendsfdf2vfp", ISD::SETCC_INVALID },
+
+ // Integer to floating-point conversions.
+ // i64 conversions are done via library routines even when generating VFP
+ // instructions, so use the same ones.
+ // FIXME: There appears to be some naming inconsistency in ARM libgcc:
+ // e.g., __floatunsidf vs. __floatunssidfvfp.
+ { RTLIB::SINTTOFP_I32_F64, "__floatsidfvfp", ISD::SETCC_INVALID },
+ { RTLIB::UINTTOFP_I32_F64, "__floatunssidfvfp", ISD::SETCC_INVALID },
+ { RTLIB::SINTTOFP_I32_F32, "__floatsisfvfp", ISD::SETCC_INVALID },
+ { RTLIB::UINTTOFP_I32_F32, "__floatunssisfvfp", ISD::SETCC_INVALID },
+ };
+
+ for (const auto &LC : LibraryCalls) {
+ setLibcallName(LC.Op, LC.Name);
+ if (LC.Cond != ISD::SETCC_INVALID)
+ setCmpLibcallCC(LC.Op, LC.Cond);
+ }
+ }
+
+ // Set the correct calling convention for ARMv7k WatchOS. It's just
+ // AAPCS_VFP for functions as simple as libcalls.
+ if (Subtarget->isTargetWatchOS()) {
+ for (int i = 0; i < RTLIB::UNKNOWN_LIBCALL; ++i)
+ setLibcallCallingConv((RTLIB::Libcall)i, CallingConv::ARM_AAPCS_VFP);
+ }
+ }
+
+ // These libcalls are not available in 32-bit.
+ setLibcallName(RTLIB::SHL_I128, nullptr);
+ setLibcallName(RTLIB::SRL_I128, nullptr);
+ setLibcallName(RTLIB::SRA_I128, nullptr);
+
+ // RTLIB
+ if (Subtarget->isAAPCS_ABI() &&
+ (Subtarget->isTargetAEABI() || Subtarget->isTargetGNUAEABI() ||
+ Subtarget->isTargetAndroid())) {
+ static const struct {
+ const RTLIB::Libcall Op;
+ const char * const Name;
+ const CallingConv::ID CC;
+ const ISD::CondCode Cond;
+ } LibraryCalls[] = {
+ // Double-precision floating-point arithmetic helper functions
+ // RTABI chapter 4.1.2, Table 2
+ { RTLIB::ADD_F64, "__aeabi_dadd", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+ { RTLIB::DIV_F64, "__aeabi_ddiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+ { RTLIB::MUL_F64, "__aeabi_dmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+ { RTLIB::SUB_F64, "__aeabi_dsub", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+
+ // Double-precision floating-point comparison helper functions
+ // RTABI chapter 4.1.2, Table 3
+ { RTLIB::OEQ_F64, "__aeabi_dcmpeq", CallingConv::ARM_AAPCS, ISD::SETNE },
+ { RTLIB::UNE_F64, "__aeabi_dcmpeq", CallingConv::ARM_AAPCS, ISD::SETEQ },
+ { RTLIB::OLT_F64, "__aeabi_dcmplt", CallingConv::ARM_AAPCS, ISD::SETNE },
+ { RTLIB::OLE_F64, "__aeabi_dcmple", CallingConv::ARM_AAPCS, ISD::SETNE },
+ { RTLIB::OGE_F64, "__aeabi_dcmpge", CallingConv::ARM_AAPCS, ISD::SETNE },
+ { RTLIB::OGT_F64, "__aeabi_dcmpgt", CallingConv::ARM_AAPCS, ISD::SETNE },
+ { RTLIB::UO_F64, "__aeabi_dcmpun", CallingConv::ARM_AAPCS, ISD::SETNE },
+ { RTLIB::O_F64, "__aeabi_dcmpun", CallingConv::ARM_AAPCS, ISD::SETEQ },
+
+ // Single-precision floating-point arithmetic helper functions
+ // RTABI chapter 4.1.2, Table 4
+ { RTLIB::ADD_F32, "__aeabi_fadd", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+ { RTLIB::DIV_F32, "__aeabi_fdiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+ { RTLIB::MUL_F32, "__aeabi_fmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+ { RTLIB::SUB_F32, "__aeabi_fsub", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+
+ // Single-precision floating-point comparison helper functions
+ // RTABI chapter 4.1.2, Table 5
+ { RTLIB::OEQ_F32, "__aeabi_fcmpeq", CallingConv::ARM_AAPCS, ISD::SETNE },
+ { RTLIB::UNE_F32, "__aeabi_fcmpeq", CallingConv::ARM_AAPCS, ISD::SETEQ },
+ { RTLIB::OLT_F32, "__aeabi_fcmplt", CallingConv::ARM_AAPCS, ISD::SETNE },
+ { RTLIB::OLE_F32, "__aeabi_fcmple", CallingConv::ARM_AAPCS, ISD::SETNE },
+ { RTLIB::OGE_F32, "__aeabi_fcmpge", CallingConv::ARM_AAPCS, ISD::SETNE },
+ { RTLIB::OGT_F32, "__aeabi_fcmpgt", CallingConv::ARM_AAPCS, ISD::SETNE },
+ { RTLIB::UO_F32, "__aeabi_fcmpun", CallingConv::ARM_AAPCS, ISD::SETNE },
+ { RTLIB::O_F32, "__aeabi_fcmpun", CallingConv::ARM_AAPCS, ISD::SETEQ },
+
+ // Floating-point to integer conversions.
+ // RTABI chapter 4.1.2, Table 6
+ { RTLIB::FPTOSINT_F64_I32, "__aeabi_d2iz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+ { RTLIB::FPTOUINT_F64_I32, "__aeabi_d2uiz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+ { RTLIB::FPTOSINT_F64_I64, "__aeabi_d2lz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+ { RTLIB::FPTOUINT_F64_I64, "__aeabi_d2ulz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+ { RTLIB::FPTOSINT_F32_I32, "__aeabi_f2iz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+ { RTLIB::FPTOUINT_F32_I32, "__aeabi_f2uiz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+ { RTLIB::FPTOSINT_F32_I64, "__aeabi_f2lz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+ { RTLIB::FPTOUINT_F32_I64, "__aeabi_f2ulz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+
+ // Conversions between floating types.
+ // RTABI chapter 4.1.2, Table 7
+ { RTLIB::FPROUND_F64_F32, "__aeabi_d2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+ { RTLIB::FPROUND_F64_F16, "__aeabi_d2h", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+ { RTLIB::FPEXT_F32_F64, "__aeabi_f2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+
+ // Integer to floating-point conversions.
+ // RTABI chapter 4.1.2, Table 8
+ { RTLIB::SINTTOFP_I32_F64, "__aeabi_i2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+ { RTLIB::UINTTOFP_I32_F64, "__aeabi_ui2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+ { RTLIB::SINTTOFP_I64_F64, "__aeabi_l2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+ { RTLIB::UINTTOFP_I64_F64, "__aeabi_ul2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+ { RTLIB::SINTTOFP_I32_F32, "__aeabi_i2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+ { RTLIB::UINTTOFP_I32_F32, "__aeabi_ui2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+ { RTLIB::SINTTOFP_I64_F32, "__aeabi_l2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+ { RTLIB::UINTTOFP_I64_F32, "__aeabi_ul2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+
+ // Long long helper functions
+ // RTABI chapter 4.2, Table 9
+ { RTLIB::MUL_I64, "__aeabi_lmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+ { RTLIB::SHL_I64, "__aeabi_llsl", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+ { RTLIB::SRL_I64, "__aeabi_llsr", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+ { RTLIB::SRA_I64, "__aeabi_lasr", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+
+ // Integer division functions
+ // RTABI chapter 4.3.1
+ { RTLIB::SDIV_I8, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+ { RTLIB::SDIV_I16, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+ { RTLIB::SDIV_I32, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+ { RTLIB::SDIV_I64, "__aeabi_ldivmod", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+ { RTLIB::UDIV_I8, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+ { RTLIB::UDIV_I16, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+ { RTLIB::UDIV_I32, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+ { RTLIB::UDIV_I64, "__aeabi_uldivmod", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+ };
+
+ for (const auto &LC : LibraryCalls) {
+ setLibcallName(LC.Op, LC.Name);
+ setLibcallCallingConv(LC.Op, LC.CC);
+ if (LC.Cond != ISD::SETCC_INVALID)
+ setCmpLibcallCC(LC.Op, LC.Cond);
+ }
+
+ // EABI dependent RTLIB
+ if (TM.Options.EABIVersion == EABI::EABI4 ||
+ TM.Options.EABIVersion == EABI::EABI5) {
+ static const struct {
+ const RTLIB::Libcall Op;
+ const char *const Name;
+ const CallingConv::ID CC;
+ const ISD::CondCode Cond;
+ } MemOpsLibraryCalls[] = {
+ // Memory operations
+ // RTABI chapter 4.3.4
+ { RTLIB::MEMCPY, "__aeabi_memcpy", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+ { RTLIB::MEMMOVE, "__aeabi_memmove", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+ { RTLIB::MEMSET, "__aeabi_memset", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+ };
+
+ for (const auto &LC : MemOpsLibraryCalls) {
+ setLibcallName(LC.Op, LC.Name);
+ setLibcallCallingConv(LC.Op, LC.CC);
+ if (LC.Cond != ISD::SETCC_INVALID)
+ setCmpLibcallCC(LC.Op, LC.Cond);
+ }
+ }
+ }
+
+ if (Subtarget->isTargetWindows()) {
+ static const struct {
+ const RTLIB::Libcall Op;
+ const char * const Name;
+ const CallingConv::ID CC;
+ } LibraryCalls[] = {
+ { RTLIB::FPTOSINT_F32_I64, "__stoi64", CallingConv::ARM_AAPCS_VFP },
+ { RTLIB::FPTOSINT_F64_I64, "__dtoi64", CallingConv::ARM_AAPCS_VFP },
+ { RTLIB::FPTOUINT_F32_I64, "__stou64", CallingConv::ARM_AAPCS_VFP },
+ { RTLIB::FPTOUINT_F64_I64, "__dtou64", CallingConv::ARM_AAPCS_VFP },
+ { RTLIB::SINTTOFP_I64_F32, "__i64tos", CallingConv::ARM_AAPCS_VFP },
+ { RTLIB::SINTTOFP_I64_F64, "__i64tod", CallingConv::ARM_AAPCS_VFP },
+ { RTLIB::UINTTOFP_I64_F32, "__u64tos", CallingConv::ARM_AAPCS_VFP },
+ { RTLIB::UINTTOFP_I64_F64, "__u64tod", CallingConv::ARM_AAPCS_VFP },
+ { RTLIB::SDIV_I32, "__rt_sdiv", CallingConv::ARM_AAPCS_VFP },
+ { RTLIB::UDIV_I32, "__rt_udiv", CallingConv::ARM_AAPCS_VFP },
+ { RTLIB::SDIV_I64, "__rt_sdiv64", CallingConv::ARM_AAPCS_VFP },
+ { RTLIB::UDIV_I64, "__rt_udiv64", CallingConv::ARM_AAPCS_VFP },
+ };
+
+ for (const auto &LC : LibraryCalls) {
+ setLibcallName(LC.Op, LC.Name);
+ setLibcallCallingConv(LC.Op, LC.CC);
+ }
+ }
+
+ // Use divmod compiler-rt calls for iOS 5.0 and later.
+ if (Subtarget->isTargetWatchOS() ||
+ (Subtarget->isTargetIOS() &&
+ !Subtarget->getTargetTriple().isOSVersionLT(5, 0))) {
+ setLibcallName(RTLIB::SDIVREM_I32, "__divmodsi4");
+ setLibcallName(RTLIB::UDIVREM_I32, "__udivmodsi4");
+ }
+
+ // The half <-> float conversion functions are always soft-float, but are
+ // needed for some targets which use a hard-float calling convention by
+ // default.
+ if (Subtarget->isAAPCS_ABI()) {
+ setLibcallCallingConv(RTLIB::FPROUND_F32_F16, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::FPROUND_F64_F16, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::FPEXT_F16_F32, CallingConv::ARM_AAPCS);
+ } else {
+ setLibcallCallingConv(RTLIB::FPROUND_F32_F16, CallingConv::ARM_APCS);
+ setLibcallCallingConv(RTLIB::FPROUND_F64_F16, CallingConv::ARM_APCS);
+ setLibcallCallingConv(RTLIB::FPEXT_F16_F32, CallingConv::ARM_APCS);
+ }
+
+ // In EABI, these functions have an __aeabi_ prefix, but in GNUEABI they have
+ // a __gnu_ prefix (which is the default).
+ if (Subtarget->isTargetAEABI()) {
+ setLibcallName(RTLIB::FPROUND_F32_F16, "__aeabi_f2h");
+ setLibcallName(RTLIB::FPROUND_F64_F16, "__aeabi_d2h");
+ setLibcallName(RTLIB::FPEXT_F16_F32, "__aeabi_h2f");
+ }
+
+ if (Subtarget->isThumb1Only())
+ addRegisterClass(MVT::i32, &ARM::tGPRRegClass);
+ else
+ addRegisterClass(MVT::i32, &ARM::GPRRegClass);
+ if (!Subtarget->useSoftFloat() && Subtarget->hasVFP2() &&
+ !Subtarget->isThumb1Only()) {
+ addRegisterClass(MVT::f32, &ARM::SPRRegClass);
+ addRegisterClass(MVT::f64, &ARM::DPRRegClass);
+ }
+
+ for (MVT VT : MVT::vector_valuetypes()) {
+ for (MVT InnerVT : MVT::vector_valuetypes()) {
+ setTruncStoreAction(VT, InnerVT, Expand);
+ setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
+ setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
+ setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
+ }
+
+ setOperationAction(ISD::MULHS, VT, Expand);
+ setOperationAction(ISD::SMUL_LOHI, VT, Expand);
+ setOperationAction(ISD::MULHU, VT, Expand);
+ setOperationAction(ISD::UMUL_LOHI, VT, Expand);
+
+ setOperationAction(ISD::BSWAP, VT, Expand);
+ }
+
+ setOperationAction(ISD::ConstantFP, MVT::f32, Custom);
+ setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
+
+ setOperationAction(ISD::READ_REGISTER, MVT::i64, Custom);
+ setOperationAction(ISD::WRITE_REGISTER, MVT::i64, Custom);
+
+ if (Subtarget->hasNEON()) {
+ addDRTypeForNEON(MVT::v2f32);
+ addDRTypeForNEON(MVT::v8i8);
+ addDRTypeForNEON(MVT::v4i16);
+ addDRTypeForNEON(MVT::v2i32);
+ addDRTypeForNEON(MVT::v1i64);
+
+ addQRTypeForNEON(MVT::v4f32);
+ addQRTypeForNEON(MVT::v2f64);
+ addQRTypeForNEON(MVT::v16i8);
+ addQRTypeForNEON(MVT::v8i16);
+ addQRTypeForNEON(MVT::v4i32);
+ addQRTypeForNEON(MVT::v2i64);
+
+ // v2f64 is legal so that QR subregs can be extracted as f64 elements, but
+ // neither Neon nor VFP support any arithmetic operations on it.
+ // The same with v4f32. But keep in mind that vadd, vsub, vmul are natively
+ // supported for v4f32.
+ setOperationAction(ISD::FADD, MVT::v2f64, Expand);
+ setOperationAction(ISD::FSUB, MVT::v2f64, Expand);
+ setOperationAction(ISD::FMUL, MVT::v2f64, Expand);
+ // FIXME: Code duplication: FDIV and FREM are expanded always, see
+ // ARMTargetLowering::addTypeForNEON method for details.
+ setOperationAction(ISD::FDIV, MVT::v2f64, Expand);
+ setOperationAction(ISD::FREM, MVT::v2f64, Expand);
+ // FIXME: Create unittest.
+ // In another words, find a way when "copysign" appears in DAG with vector
+ // operands.
+ setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Expand);
+ // FIXME: Code duplication: SETCC has custom operation action, see
+ // ARMTargetLowering::addTypeForNEON method for details.
+ setOperationAction(ISD::SETCC, MVT::v2f64, Expand);
+ // FIXME: Create unittest for FNEG and for FABS.
+ setOperationAction(ISD::FNEG, MVT::v2f64, Expand);
+ setOperationAction(ISD::FABS, MVT::v2f64, Expand);
+ setOperationAction(ISD::FSQRT, MVT::v2f64, Expand);
+ setOperationAction(ISD::FSIN, MVT::v2f64, Expand);
+ setOperationAction(ISD::FCOS, MVT::v2f64, Expand);
+ setOperationAction(ISD::FPOWI, MVT::v2f64, Expand);
+ setOperationAction(ISD::FPOW, MVT::v2f64, Expand);
+ setOperationAction(ISD::FLOG, MVT::v2f64, Expand);
+ setOperationAction(ISD::FLOG2, MVT::v2f64, Expand);
+ setOperationAction(ISD::FLOG10, MVT::v2f64, Expand);
+ setOperationAction(ISD::FEXP, MVT::v2f64, Expand);
+ setOperationAction(ISD::FEXP2, MVT::v2f64, Expand);
+ // FIXME: Create unittest for FCEIL, FTRUNC, FRINT, FNEARBYINT, FFLOOR.
+ setOperationAction(ISD::FCEIL, MVT::v2f64, Expand);
+ setOperationAction(ISD::FTRUNC, MVT::v2f64, Expand);
+ setOperationAction(ISD::FRINT, MVT::v2f64, Expand);
+ setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Expand);
+ setOperationAction(ISD::FFLOOR, MVT::v2f64, Expand);
+ setOperationAction(ISD::FMA, MVT::v2f64, Expand);
+
+ setOperationAction(ISD::FSQRT, MVT::v4f32, Expand);
+ setOperationAction(ISD::FSIN, MVT::v4f32, Expand);
+ setOperationAction(ISD::FCOS, MVT::v4f32, Expand);
+ setOperationAction(ISD::FPOWI, MVT::v4f32, Expand);
+ setOperationAction(ISD::FPOW, MVT::v4f32, Expand);
+ setOperationAction(ISD::FLOG, MVT::v4f32, Expand);
+ setOperationAction(ISD::FLOG2, MVT::v4f32, Expand);
+ setOperationAction(ISD::FLOG10, MVT::v4f32, Expand);
+ setOperationAction(ISD::FEXP, MVT::v4f32, Expand);
+ setOperationAction(ISD::FEXP2, MVT::v4f32, Expand);
+ setOperationAction(ISD::FCEIL, MVT::v4f32, Expand);
+ setOperationAction(ISD::FTRUNC, MVT::v4f32, Expand);
+ setOperationAction(ISD::FRINT, MVT::v4f32, Expand);
+ setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Expand);
+ setOperationAction(ISD::FFLOOR, MVT::v4f32, Expand);
+
+ // Mark v2f32 intrinsics.
+ setOperationAction(ISD::FSQRT, MVT::v2f32, Expand);
+ setOperationAction(ISD::FSIN, MVT::v2f32, Expand);
+ setOperationAction(ISD::FCOS, MVT::v2f32, Expand);
+ setOperationAction(ISD::FPOWI, MVT::v2f32, Expand);
+ setOperationAction(ISD::FPOW, MVT::v2f32, Expand);
+ setOperationAction(ISD::FLOG, MVT::v2f32, Expand);
+ setOperationAction(ISD::FLOG2, MVT::v2f32, Expand);
+ setOperationAction(ISD::FLOG10, MVT::v2f32, Expand);
+ setOperationAction(ISD::FEXP, MVT::v2f32, Expand);
+ setOperationAction(ISD::FEXP2, MVT::v2f32, Expand);
+ setOperationAction(ISD::FCEIL, MVT::v2f32, Expand);
+ setOperationAction(ISD::FTRUNC, MVT::v2f32, Expand);
+ setOperationAction(ISD::FRINT, MVT::v2f32, Expand);
+ setOperationAction(ISD::FNEARBYINT, MVT::v2f32, Expand);
+ setOperationAction(ISD::FFLOOR, MVT::v2f32, Expand);
+
+ // Neon does not support some operations on v1i64 and v2i64 types.
+ setOperationAction(ISD::MUL, MVT::v1i64, Expand);
+ // Custom handling for some quad-vector types to detect VMULL.
+ setOperationAction(ISD::MUL, MVT::v8i16, Custom);
+ setOperationAction(ISD::MUL, MVT::v4i32, Custom);
+ setOperationAction(ISD::MUL, MVT::v2i64, Custom);
+ // Custom handling for some vector types to avoid expensive expansions
+ setOperationAction(ISD::SDIV, MVT::v4i16, Custom);
+ setOperationAction(ISD::SDIV, MVT::v8i8, Custom);
+ setOperationAction(ISD::UDIV, MVT::v4i16, Custom);
+ setOperationAction(ISD::UDIV, MVT::v8i8, Custom);
+ setOperationAction(ISD::SETCC, MVT::v1i64, Expand);
+ setOperationAction(ISD::SETCC, MVT::v2i64, Expand);
+ // Neon does not have single instruction SINT_TO_FP and UINT_TO_FP with
+ // a destination type that is wider than the source, and nor does
+ // it have a FP_TO_[SU]INT instruction with a narrower destination than
+ // source.
+ setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Custom);
+ setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom);
+ setOperationAction(ISD::FP_TO_UINT, MVT::v4i16, Custom);
+ setOperationAction(ISD::FP_TO_SINT, MVT::v4i16, Custom);
+
+ setOperationAction(ISD::FP_ROUND, MVT::v2f32, Expand);
+ setOperationAction(ISD::FP_EXTEND, MVT::v2f64, Expand);
+
+ // NEON does not have single instruction CTPOP for vectors with element
+ // types wider than 8-bits. However, custom lowering can leverage the
+ // v8i8/v16i8 vcnt instruction.
+ setOperationAction(ISD::CTPOP, MVT::v2i32, Custom);
+ setOperationAction(ISD::CTPOP, MVT::v4i32, Custom);
+ setOperationAction(ISD::CTPOP, MVT::v4i16, Custom);
+ setOperationAction(ISD::CTPOP, MVT::v8i16, Custom);
+
+ // NEON does not have single instruction CTTZ for vectors.
+ setOperationAction(ISD::CTTZ, MVT::v8i8, Custom);
+ setOperationAction(ISD::CTTZ, MVT::v4i16, Custom);
+ setOperationAction(ISD::CTTZ, MVT::v2i32, Custom);
+ setOperationAction(ISD::CTTZ, MVT::v1i64, Custom);
+
+ setOperationAction(ISD::CTTZ, MVT::v16i8, Custom);
+ setOperationAction(ISD::CTTZ, MVT::v8i16, Custom);
+ setOperationAction(ISD::CTTZ, MVT::v4i32, Custom);
+ setOperationAction(ISD::CTTZ, MVT::v2i64, Custom);
+
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v8i8, Custom);
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v4i16, Custom);
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v2i32, Custom);
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v1i64, Custom);
+
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v16i8, Custom);
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v8i16, Custom);
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v4i32, Custom);
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v2i64, Custom);
+
+ // NEON only has FMA instructions as of VFP4.
+ if (!Subtarget->hasVFP4()) {
+ setOperationAction(ISD::FMA, MVT::v2f32, Expand);
+ setOperationAction(ISD::FMA, MVT::v4f32, Expand);
+ }
+
+ setTargetDAGCombine(ISD::INTRINSIC_VOID);
+ setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN);
+ setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
+ setTargetDAGCombine(ISD::SHL);
+ setTargetDAGCombine(ISD::SRL);
+ setTargetDAGCombine(ISD::SRA);
+ setTargetDAGCombine(ISD::SIGN_EXTEND);
+ setTargetDAGCombine(ISD::ZERO_EXTEND);
+ setTargetDAGCombine(ISD::ANY_EXTEND);
+ setTargetDAGCombine(ISD::BUILD_VECTOR);
+ setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
+ setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
+ setTargetDAGCombine(ISD::STORE);
+ setTargetDAGCombine(ISD::FP_TO_SINT);
+ setTargetDAGCombine(ISD::FP_TO_UINT);
+ setTargetDAGCombine(ISD::FDIV);
+ setTargetDAGCombine(ISD::LOAD);
+
+ // It is legal to extload from v4i8 to v4i16 or v4i32.
+ for (MVT Ty : {MVT::v8i8, MVT::v4i8, MVT::v2i8, MVT::v4i16, MVT::v2i16,
+ MVT::v2i32}) {
+ for (MVT VT : MVT::integer_vector_valuetypes()) {
+ setLoadExtAction(ISD::EXTLOAD, VT, Ty, Legal);
+ setLoadExtAction(ISD::ZEXTLOAD, VT, Ty, Legal);
+ setLoadExtAction(ISD::SEXTLOAD, VT, Ty, Legal);
+ }
+ }
+ }
+
+ // ARM and Thumb2 support UMLAL/SMLAL.
+ if (!Subtarget->isThumb1Only())
+ setTargetDAGCombine(ISD::ADDC);
+
+ if (Subtarget->isFPOnlySP()) {
+ // When targeting a floating-point unit with only single-precision
+ // operations, f64 is legal for the few double-precision instructions which
+ // are present However, no double-precision operations other than moves,
+ // loads and stores are provided by the hardware.
+ setOperationAction(ISD::FADD, MVT::f64, Expand);
+ setOperationAction(ISD::FSUB, MVT::f64, Expand);
+ setOperationAction(ISD::FMUL, MVT::f64, Expand);
+ setOperationAction(ISD::FMA, MVT::f64, Expand);
+ setOperationAction(ISD::FDIV, MVT::f64, Expand);
+ setOperationAction(ISD::FREM, MVT::f64, Expand);
+ setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
+ setOperationAction(ISD::FGETSIGN, MVT::f64, Expand);
+ setOperationAction(ISD::FNEG, MVT::f64, Expand);
+ setOperationAction(ISD::FABS, MVT::f64, Expand);
+ setOperationAction(ISD::FSQRT, MVT::f64, Expand);
+ setOperationAction(ISD::FSIN, MVT::f64, Expand);
+ setOperationAction(ISD::FCOS, MVT::f64, Expand);
+ setOperationAction(ISD::FPOWI, MVT::f64, Expand);
+ setOperationAction(ISD::FPOW, MVT::f64, Expand);
+ setOperationAction(ISD::FLOG, MVT::f64, Expand);
+ setOperationAction(ISD::FLOG2, MVT::f64, Expand);
+ setOperationAction(ISD::FLOG10, MVT::f64, Expand);
+ setOperationAction(ISD::FEXP, MVT::f64, Expand);
+ setOperationAction(ISD::FEXP2, MVT::f64, Expand);
+ setOperationAction(ISD::FCEIL, MVT::f64, Expand);
+ setOperationAction(ISD::FTRUNC, MVT::f64, Expand);
+ setOperationAction(ISD::FRINT, MVT::f64, Expand);
+ setOperationAction(ISD::FNEARBYINT, MVT::f64, Expand);
+ setOperationAction(ISD::FFLOOR, MVT::f64, Expand);
+ setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
+ setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
+ setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
+ setOperationAction(ISD::FP_TO_SINT, MVT::f64, Custom);
+ setOperationAction(ISD::FP_TO_UINT, MVT::f64, Custom);
+ setOperationAction(ISD::FP_ROUND, MVT::f32, Custom);
+ setOperationAction(ISD::FP_EXTEND, MVT::f64, Custom);
+ }
+
+ computeRegisterProperties(Subtarget->getRegisterInfo());
+
+ // ARM does not have floating-point extending loads.
+ for (MVT VT : MVT::fp_valuetypes()) {
+ setLoadExtAction(ISD::EXTLOAD, VT, MVT::f32, Expand);
+ setLoadExtAction(ISD::EXTLOAD, VT, MVT::f16, Expand);
+ }
+
+ // ... or truncating stores
+ setTruncStoreAction(MVT::f64, MVT::f32, Expand);
+ setTruncStoreAction(MVT::f32, MVT::f16, Expand);
+ setTruncStoreAction(MVT::f64, MVT::f16, Expand);
+
+ // ARM does not have i1 sign extending load.
+ for (MVT VT : MVT::integer_valuetypes())
+ setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
+
+ // ARM supports all 4 flavors of integer indexed load / store.
+ if (!Subtarget->isThumb1Only()) {
+ for (unsigned im = (unsigned)ISD::PRE_INC;
+ im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {
+ setIndexedLoadAction(im, MVT::i1, Legal);
+ setIndexedLoadAction(im, MVT::i8, Legal);
+ setIndexedLoadAction(im, MVT::i16, Legal);
+ setIndexedLoadAction(im, MVT::i32, Legal);
+ setIndexedStoreAction(im, MVT::i1, Legal);
+ setIndexedStoreAction(im, MVT::i8, Legal);
+ setIndexedStoreAction(im, MVT::i16, Legal);
+ setIndexedStoreAction(im, MVT::i32, Legal);
+ }
+ }
+
+ setOperationAction(ISD::SADDO, MVT::i32, Custom);
+ setOperationAction(ISD::UADDO, MVT::i32, Custom);
+ setOperationAction(ISD::SSUBO, MVT::i32, Custom);
+ setOperationAction(ISD::USUBO, MVT::i32, Custom);
+
+ // i64 operation support.
+ setOperationAction(ISD::MUL, MVT::i64, Expand);
+ setOperationAction(ISD::MULHU, MVT::i32, Expand);
+ if (Subtarget->isThumb1Only()) {
+ setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
+ setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
+ }
+ if (Subtarget->isThumb1Only() || !Subtarget->hasV6Ops()
+ || (Subtarget->isThumb2() && !Subtarget->hasDSP()))
+ setOperationAction(ISD::MULHS, MVT::i32, Expand);
+
+ setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);
+ setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom);
+ setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
+ setOperationAction(ISD::SRL, MVT::i64, Custom);
+ setOperationAction(ISD::SRA, MVT::i64, Custom);
+
+ if (!Subtarget->isThumb1Only()) {
+ // FIXME: We should do this for Thumb1 as well.
+ setOperationAction(ISD::ADDC, MVT::i32, Custom);
+ setOperationAction(ISD::ADDE, MVT::i32, Custom);
+ setOperationAction(ISD::SUBC, MVT::i32, Custom);
+ setOperationAction(ISD::SUBE, MVT::i32, Custom);
+ }
+
+ if (!Subtarget->isThumb1Only())
+ setOperationAction(ISD::BITREVERSE, MVT::i32, Legal);
+
+ // ARM does not have ROTL.
+ setOperationAction(ISD::ROTL, MVT::i32, Expand);
+ for (MVT VT : MVT::vector_valuetypes()) {
+ setOperationAction(ISD::ROTL, VT, Expand);
+ setOperationAction(ISD::ROTR, VT, Expand);
+ }
+ setOperationAction(ISD::CTTZ, MVT::i32, Custom);
+ setOperationAction(ISD::CTPOP, MVT::i32, Expand);
+ if (!Subtarget->hasV5TOps() || Subtarget->isThumb1Only())
+ setOperationAction(ISD::CTLZ, MVT::i32, Expand);
+
+ // These just redirect to CTTZ and CTLZ on ARM.
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF , MVT::i32 , Expand);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF , MVT::i32 , Expand);
+
+ // @llvm.readcyclecounter requires the Performance Monitors extension.
+ // Default to the 0 expansion on unsupported platforms.
+ // FIXME: Technically there are older ARM CPUs that have
+ // implementation-specific ways of obtaining this information.
+ if (Subtarget->hasPerfMon())
+ setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Custom);
+
+ // Only ARMv6 has BSWAP.
+ if (!Subtarget->hasV6Ops())
+ setOperationAction(ISD::BSWAP, MVT::i32, Expand);
+
+ if (!(Subtarget->hasDivide() && Subtarget->isThumb2()) &&
+ !(Subtarget->hasDivideInARMMode() && !Subtarget->isThumb())) {
+ // These are expanded into libcalls if the cpu doesn't have HW divider.
+ setOperationAction(ISD::SDIV, MVT::i32, LibCall);
+ setOperationAction(ISD::UDIV, MVT::i32, LibCall);
+ }
+
+ setOperationAction(ISD::SREM, MVT::i32, Expand);
+ setOperationAction(ISD::UREM, MVT::i32, Expand);
+ // Register based DivRem for AEABI (RTABI 4.2)
+ if (Subtarget->isTargetAEABI() || Subtarget->isTargetAndroid()) {
+ setOperationAction(ISD::SREM, MVT::i64, Custom);
+ setOperationAction(ISD::UREM, MVT::i64, Custom);
+
+ setLibcallName(RTLIB::SDIVREM_I8, "__aeabi_idivmod");
+ setLibcallName(RTLIB::SDIVREM_I16, "__aeabi_idivmod");
+ setLibcallName(RTLIB::SDIVREM_I32, "__aeabi_idivmod");
+ setLibcallName(RTLIB::SDIVREM_I64, "__aeabi_ldivmod");
+ setLibcallName(RTLIB::UDIVREM_I8, "__aeabi_uidivmod");
+ setLibcallName(RTLIB::UDIVREM_I16, "__aeabi_uidivmod");
+ setLibcallName(RTLIB::UDIVREM_I32, "__aeabi_uidivmod");
+ setLibcallName(RTLIB::UDIVREM_I64, "__aeabi_uldivmod");
+
+ setLibcallCallingConv(RTLIB::SDIVREM_I8, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::SDIVREM_I16, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::SDIVREM_I32, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::SDIVREM_I64, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::UDIVREM_I8, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::UDIVREM_I16, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::UDIVREM_I32, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::UDIVREM_I64, CallingConv::ARM_AAPCS);
+
+ setOperationAction(ISD::SDIVREM, MVT::i32, Custom);
+ setOperationAction(ISD::UDIVREM, MVT::i32, Custom);
+ } else {
+ setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
+ setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
+ }
+
+ setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
+ setOperationAction(ISD::ConstantPool, MVT::i32, Custom);
+ setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
+ setOperationAction(ISD::BlockAddress, MVT::i32, Custom);
+
+ setOperationAction(ISD::TRAP, MVT::Other, Legal);
+
+ // Use the default implementation.
+ setOperationAction(ISD::VASTART, MVT::Other, Custom);
+ setOperationAction(ISD::VAARG, MVT::Other, Expand);
+ setOperationAction(ISD::VACOPY, MVT::Other, Expand);
+ setOperationAction(ISD::VAEND, MVT::Other, Expand);
+ setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
+ setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
+
+ if (Subtarget->getTargetTriple().isWindowsItaniumEnvironment())
+ setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom);
+ else
+ setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand);
+
+ // ARMv6 Thumb1 (except for CPUs that support dmb / dsb) and earlier use
+ // the default expansion. If we are targeting a single threaded system,
+ // then set them all for expand so we can lower them later into their
+ // non-atomic form.
+ if (TM.Options.ThreadModel == ThreadModel::Single)
+ setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Expand);
+ else if (Subtarget->hasAnyDataBarrier() && !Subtarget->isThumb1Only()) {
+ // ATOMIC_FENCE needs custom lowering; the others should have been expanded
+ // to ldrex/strex loops already.
+ setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
+
+ // On v8, we have particularly efficient implementations of atomic fences
+ // if they can be combined with nearby atomic loads and stores.
+ if (!Subtarget->hasV8Ops()) {
+ // Automatically insert fences (dmb ish) around ATOMIC_SWAP etc.
+ setInsertFencesForAtomic(true);
+ }
+ } else {
+ // If there's anything we can use as a barrier, go through custom lowering
+ // for ATOMIC_FENCE.
+ setOperationAction(ISD::ATOMIC_FENCE, MVT::Other,
+ Subtarget->hasAnyDataBarrier() ? Custom : Expand);
+
+ // Set them all for expansion, which will force libcalls.
+ setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Expand);
+ setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i32, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i32, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i32, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i32, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i32, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i32, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i32, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i32, Expand);
+ // Mark ATOMIC_LOAD and ATOMIC_STORE custom so we can handle the
+ // Unordered/Monotonic case.
+ setOperationAction(ISD::ATOMIC_LOAD, MVT::i32, Custom);
+ setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Custom);
+ }
+
+ setOperationAction(ISD::PREFETCH, MVT::Other, Custom);
+
+ // Requires SXTB/SXTH, available on v6 and up in both ARM and Thumb modes.
+ if (!Subtarget->hasV6Ops()) {
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand);
+ }
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
+
+ if (!Subtarget->useSoftFloat() && Subtarget->hasVFP2() &&
+ !Subtarget->isThumb1Only()) {
+ // Turn f64->i64 into VMOVRRD, i64 -> f64 to VMOVDRR
+ // iff target supports vfp2.
+ setOperationAction(ISD::BITCAST, MVT::i64, Custom);
+ setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom);
+ }
+
+ // We want to custom lower some of our intrinsics.
+ setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
+ setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
+ setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
+ setOperationAction(ISD::EH_SJLJ_SETUP_DISPATCH, MVT::Other, Custom);
+ if (Subtarget->useSjLjEH())
+ setLibcallName(RTLIB::UNWIND_RESUME, "_Unwind_SjLj_Resume");
+
+ setOperationAction(ISD::SETCC, MVT::i32, Expand);
+ setOperationAction(ISD::SETCC, MVT::f32, Expand);
+ setOperationAction(ISD::SETCC, MVT::f64, Expand);
+ setOperationAction(ISD::SELECT, MVT::i32, Custom);
+ setOperationAction(ISD::SELECT, MVT::f32, Custom);
+ setOperationAction(ISD::SELECT, MVT::f64, Custom);
+ setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
+ setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
+ setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
+
+ setOperationAction(ISD::BRCOND, MVT::Other, Expand);
+ setOperationAction(ISD::BR_CC, MVT::i32, Custom);
+ setOperationAction(ISD::BR_CC, MVT::f32, Custom);
+ setOperationAction(ISD::BR_CC, MVT::f64, Custom);
+ setOperationAction(ISD::BR_JT, MVT::Other, Custom);
+
+ // We don't support sin/cos/fmod/copysign/pow
+ setOperationAction(ISD::FSIN, MVT::f64, Expand);
+ setOperationAction(ISD::FSIN, MVT::f32, Expand);
+ setOperationAction(ISD::FCOS, MVT::f32, Expand);
+ setOperationAction(ISD::FCOS, MVT::f64, Expand);
+ setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
+ setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
+ setOperationAction(ISD::FREM, MVT::f64, Expand);
+ setOperationAction(ISD::FREM, MVT::f32, Expand);
+ if (!Subtarget->useSoftFloat() && Subtarget->hasVFP2() &&
+ !Subtarget->isThumb1Only()) {
+ setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom);
+ setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
+ }
+ setOperationAction(ISD::FPOW, MVT::f64, Expand);
+ setOperationAction(ISD::FPOW, MVT::f32, Expand);
+
+ if (!Subtarget->hasVFP4()) {
+ setOperationAction(ISD::FMA, MVT::f64, Expand);
+ setOperationAction(ISD::FMA, MVT::f32, Expand);
+ }
+
+ // Various VFP goodness
+ if (!Subtarget->useSoftFloat() && !Subtarget->isThumb1Only()) {
+ // FP-ARMv8 adds f64 <-> f16 conversion. Before that it should be expanded.
+ if (!Subtarget->hasFPARMv8() || Subtarget->isFPOnlySP()) {
+ setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
+ setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand);
+ }
+
+ // fp16 is a special v7 extension that adds f16 <-> f32 conversions.
+ if (!Subtarget->hasFP16()) {
+ setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand);
+ setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand);
+ }
+ }
+
+ // Combine sin / cos into one node or libcall if possible.
+ if (Subtarget->hasSinCos()) {
+ setLibcallName(RTLIB::SINCOS_F32, "sincosf");
+ setLibcallName(RTLIB::SINCOS_F64, "sincos");
+ if (Subtarget->isTargetWatchOS()) {
+ setLibcallCallingConv(RTLIB::SINCOS_F32, CallingConv::ARM_AAPCS_VFP);
+ setLibcallCallingConv(RTLIB::SINCOS_F64, CallingConv::ARM_AAPCS_VFP);
+ }
+ if (Subtarget->isTargetIOS() || Subtarget->isTargetWatchOS()) {
+ // For iOS, we don't want to the normal expansion of a libcall to
+ // sincos. We want to issue a libcall to __sincos_stret.
+ setOperationAction(ISD::FSINCOS, MVT::f64, Custom);
+ setOperationAction(ISD::FSINCOS, MVT::f32, Custom);
+ }
+ }
+
+ // FP-ARMv8 implements a lot of rounding-like FP operations.
+ if (Subtarget->hasFPARMv8()) {
+ setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
+ setOperationAction(ISD::FCEIL, MVT::f32, Legal);
+ setOperationAction(ISD::FROUND, MVT::f32, Legal);
+ setOperationAction(ISD::FTRUNC, MVT::f32, Legal);
+ setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal);
+ setOperationAction(ISD::FRINT, MVT::f32, Legal);
+ setOperationAction(ISD::FMINNUM, MVT::f32, Legal);
+ setOperationAction(ISD::FMAXNUM, MVT::f32, Legal);
+ setOperationAction(ISD::FMINNUM, MVT::v2f32, Legal);
+ setOperationAction(ISD::FMAXNUM, MVT::v2f32, Legal);
+ setOperationAction(ISD::FMINNUM, MVT::v4f32, Legal);
+ setOperationAction(ISD::FMAXNUM, MVT::v4f32, Legal);
+
+ if (!Subtarget->isFPOnlySP()) {
+ setOperationAction(ISD::FFLOOR, MVT::f64, Legal);
+ setOperationAction(ISD::FCEIL, MVT::f64, Legal);
+ setOperationAction(ISD::FROUND, MVT::f64, Legal);
+ setOperationAction(ISD::FTRUNC, MVT::f64, Legal);
+ setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal);
+ setOperationAction(ISD::FRINT, MVT::f64, Legal);
+ setOperationAction(ISD::FMINNUM, MVT::f64, Legal);
+ setOperationAction(ISD::FMAXNUM, MVT::f64, Legal);
+ }
+ }
+
+ if (Subtarget->hasNEON()) {
+ // vmin and vmax aren't available in a scalar form, so we use
+ // a NEON instruction with an undef lane instead.
+ setOperationAction(ISD::FMINNAN, MVT::f32, Legal);
+ setOperationAction(ISD::FMAXNAN, MVT::f32, Legal);
+ setOperationAction(ISD::FMINNAN, MVT::v2f32, Legal);
+ setOperationAction(ISD::FMAXNAN, MVT::v2f32, Legal);
+ setOperationAction(ISD::FMINNAN, MVT::v4f32, Legal);
+ setOperationAction(ISD::FMAXNAN, MVT::v4f32, Legal);
+ }
+
+ // We have target-specific dag combine patterns for the following nodes:
+ // ARMISD::VMOVRRD - No need to call setTargetDAGCombine
+ setTargetDAGCombine(ISD::ADD);
+ setTargetDAGCombine(ISD::SUB);
+ setTargetDAGCombine(ISD::MUL);
+ setTargetDAGCombine(ISD::AND);
+ setTargetDAGCombine(ISD::OR);
+ setTargetDAGCombine(ISD::XOR);
+
+ if (Subtarget->hasV6Ops())
+ setTargetDAGCombine(ISD::SRL);
+
+ setStackPointerRegisterToSaveRestore(ARM::SP);
+
+ if (Subtarget->useSoftFloat() || Subtarget->isThumb1Only() ||
+ !Subtarget->hasVFP2())
+ setSchedulingPreference(Sched::RegPressure);
+ else
+ setSchedulingPreference(Sched::Hybrid);
+
+ //// temporary - rewrite interface to use type
+ MaxStoresPerMemset = 8;
+ MaxStoresPerMemsetOptSize = 4;
+ MaxStoresPerMemcpy = 4; // For @llvm.memcpy -> sequence of stores
+ MaxStoresPerMemcpyOptSize = 2;
+ MaxStoresPerMemmove = 4; // For @llvm.memmove -> sequence of stores
+ MaxStoresPerMemmoveOptSize = 2;
+
+ // On ARM arguments smaller than 4 bytes are extended, so all arguments
+ // are at least 4 bytes aligned.
+ setMinStackArgumentAlignment(4);
+
+ // Prefer likely predicted branches to selects on out-of-order cores.
+ PredictableSelectIsExpensive = Subtarget->isLikeA9();
+
+ setMinFunctionAlignment(Subtarget->isThumb() ? 1 : 2);
+}
+
+bool ARMTargetLowering::useSoftFloat() const {
+ return Subtarget->useSoftFloat();
+}
+
+// FIXME: It might make sense to define the representative register class as the
+// nearest super-register that has a non-null superset. For example, DPR_VFP2 is
+// a super-register of SPR, and DPR is a superset if DPR_VFP2. Consequently,
+// SPR's representative would be DPR_VFP2. This should work well if register
+// pressure tracking were modified such that a register use would increment the
+// pressure of the register class's representative and all of it's super
+// classes' representatives transitively. We have not implemented this because
+// of the difficulty prior to coalescing of modeling operand register classes
+// due to the common occurrence of cross class copies and subregister insertions
+// and extractions.
+std::pair<const TargetRegisterClass *, uint8_t>
+ARMTargetLowering::findRepresentativeClass(const TargetRegisterInfo *TRI,
+ MVT VT) const {
+ const TargetRegisterClass *RRC = nullptr;
+ uint8_t Cost = 1;
+ switch (VT.SimpleTy) {
+ default:
+ return TargetLowering::findRepresentativeClass(TRI, VT);
+ // Use DPR as representative register class for all floating point
+ // and vector types. Since there are 32 SPR registers and 32 DPR registers so
+ // the cost is 1 for both f32 and f64.
+ case MVT::f32: case MVT::f64: case MVT::v8i8: case MVT::v4i16:
+ case MVT::v2i32: case MVT::v1i64: case MVT::v2f32:
+ RRC = &ARM::DPRRegClass;
+ // When NEON is used for SP, only half of the register file is available
+ // because operations that define both SP and DP results will be constrained
+ // to the VFP2 class (D0-D15). We currently model this constraint prior to
+ // coalescing by double-counting the SP regs. See the FIXME above.
+ if (Subtarget->useNEONForSinglePrecisionFP())
+ Cost = 2;
+ break;
+ case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64:
+ case MVT::v4f32: case MVT::v2f64:
+ RRC = &ARM::DPRRegClass;
+ Cost = 2;
+ break;
+ case MVT::v4i64:
+ RRC = &ARM::DPRRegClass;
+ Cost = 4;
+ break;
+ case MVT::v8i64:
+ RRC = &ARM::DPRRegClass;
+ Cost = 8;
+ break;
+ }
+ return std::make_pair(RRC, Cost);
+}
+
+const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
+ switch ((ARMISD::NodeType)Opcode) {
+ case ARMISD::FIRST_NUMBER: break;
+ case ARMISD::Wrapper: return "ARMISD::Wrapper";
+ case ARMISD::WrapperPIC: return "ARMISD::WrapperPIC";
+ case ARMISD::WrapperJT: return "ARMISD::WrapperJT";
+ case ARMISD::COPY_STRUCT_BYVAL: return "ARMISD::COPY_STRUCT_BYVAL";
+ case ARMISD::CALL: return "ARMISD::CALL";
+ case ARMISD::CALL_PRED: return "ARMISD::CALL_PRED";
+ case ARMISD::CALL_NOLINK: return "ARMISD::CALL_NOLINK";
+ case ARMISD::tCALL: return "ARMISD::tCALL";
+ case ARMISD::BRCOND: return "ARMISD::BRCOND";
+ case ARMISD::BR_JT: return "ARMISD::BR_JT";
+ case ARMISD::BR2_JT: return "ARMISD::BR2_JT";
+ case ARMISD::RET_FLAG: return "ARMISD::RET_FLAG";
+ case ARMISD::INTRET_FLAG: return "ARMISD::INTRET_FLAG";
+ case ARMISD::PIC_ADD: return "ARMISD::PIC_ADD";
+ case ARMISD::CMP: return "ARMISD::CMP";
+ case ARMISD::CMN: return "ARMISD::CMN";
+ case ARMISD::CMPZ: return "ARMISD::CMPZ";
+ case ARMISD::CMPFP: return "ARMISD::CMPFP";
+ case ARMISD::CMPFPw0: return "ARMISD::CMPFPw0";
+ case ARMISD::BCC_i64: return "ARMISD::BCC_i64";
+ case ARMISD::FMSTAT: return "ARMISD::FMSTAT";
+
+ case ARMISD::CMOV: return "ARMISD::CMOV";
+
+ case ARMISD::SRL_FLAG: return "ARMISD::SRL_FLAG";
+ case ARMISD::SRA_FLAG: return "ARMISD::SRA_FLAG";
+ case ARMISD::RRX: return "ARMISD::RRX";
+
+ case ARMISD::ADDC: return "ARMISD::ADDC";
+ case ARMISD::ADDE: return "ARMISD::ADDE";
+ case ARMISD::SUBC: return "ARMISD::SUBC";
+ case ARMISD::SUBE: return "ARMISD::SUBE";
+
+ case ARMISD::VMOVRRD: return "ARMISD::VMOVRRD";
+ case ARMISD::VMOVDRR: return "ARMISD::VMOVDRR";
+
+ case ARMISD::EH_SJLJ_SETJMP: return "ARMISD::EH_SJLJ_SETJMP";
+ case ARMISD::EH_SJLJ_LONGJMP: return "ARMISD::EH_SJLJ_LONGJMP";
+ case ARMISD::EH_SJLJ_SETUP_DISPATCH: return "ARMISD::EH_SJLJ_SETUP_DISPATCH";
+
+ case ARMISD::TC_RETURN: return "ARMISD::TC_RETURN";
+
+ case ARMISD::THREAD_POINTER:return "ARMISD::THREAD_POINTER";
+
+ case ARMISD::DYN_ALLOC: return "ARMISD::DYN_ALLOC";
+
+ case ARMISD::MEMBARRIER_MCR: return "ARMISD::MEMBARRIER_MCR";
+
+ case ARMISD::PRELOAD: return "ARMISD::PRELOAD";
+
+ case ARMISD::WIN__CHKSTK: return "ARMISD:::WIN__CHKSTK";
+ case ARMISD::WIN__DBZCHK: return "ARMISD::WIN__DBZCHK";
+
+ case ARMISD::VCEQ: return "ARMISD::VCEQ";
+ case ARMISD::VCEQZ: return "ARMISD::VCEQZ";
+ case ARMISD::VCGE: return "ARMISD::VCGE";
+ case ARMISD::VCGEZ: return "ARMISD::VCGEZ";
+ case ARMISD::VCLEZ: return "ARMISD::VCLEZ";
+ case ARMISD::VCGEU: return "ARMISD::VCGEU";
+ case ARMISD::VCGT: return "ARMISD::VCGT";
+ case ARMISD::VCGTZ: return "ARMISD::VCGTZ";
+ case ARMISD::VCLTZ: return "ARMISD::VCLTZ";
+ case ARMISD::VCGTU: return "ARMISD::VCGTU";
+ case ARMISD::VTST: return "ARMISD::VTST";
+
+ case ARMISD::VSHL: return "ARMISD::VSHL";
+ case ARMISD::VSHRs: return "ARMISD::VSHRs";
+ case ARMISD::VSHRu: return "ARMISD::VSHRu";
+ case ARMISD::VRSHRs: return "ARMISD::VRSHRs";
+ case ARMISD::VRSHRu: return "ARMISD::VRSHRu";
+ case ARMISD::VRSHRN: return "ARMISD::VRSHRN";
+ case ARMISD::VQSHLs: return "ARMISD::VQSHLs";
+ case ARMISD::VQSHLu: return "ARMISD::VQSHLu";
+ case ARMISD::VQSHLsu: return "ARMISD::VQSHLsu";
+ case ARMISD::VQSHRNs: return "ARMISD::VQSHRNs";
+ case ARMISD::VQSHRNu: return "ARMISD::VQSHRNu";
+ case ARMISD::VQSHRNsu: return "ARMISD::VQSHRNsu";
+ case ARMISD::VQRSHRNs: return "ARMISD::VQRSHRNs";
+ case ARMISD::VQRSHRNu: return "ARMISD::VQRSHRNu";
+ case ARMISD::VQRSHRNsu: return "ARMISD::VQRSHRNsu";
+ case ARMISD::VSLI: return "ARMISD::VSLI";
+ case ARMISD::VSRI: return "ARMISD::VSRI";
+ case ARMISD::VGETLANEu: return "ARMISD::VGETLANEu";
+ case ARMISD::VGETLANEs: return "ARMISD::VGETLANEs";
+ case ARMISD::VMOVIMM: return "ARMISD::VMOVIMM";
+ case ARMISD::VMVNIMM: return "ARMISD::VMVNIMM";
+ case ARMISD::VMOVFPIMM: return "ARMISD::VMOVFPIMM";
+ case ARMISD::VDUP: return "ARMISD::VDUP";
+ case ARMISD::VDUPLANE: return "ARMISD::VDUPLANE";
+ case ARMISD::VEXT: return "ARMISD::VEXT";
+ case ARMISD::VREV64: return "ARMISD::VREV64";
+ case ARMISD::VREV32: return "ARMISD::VREV32";
+ case ARMISD::VREV16: return "ARMISD::VREV16";
+ case ARMISD::VZIP: return "ARMISD::VZIP";
+ case ARMISD::VUZP: return "ARMISD::VUZP";
+ case ARMISD::VTRN: return "ARMISD::VTRN";
+ case ARMISD::VTBL1: return "ARMISD::VTBL1";
+ case ARMISD::VTBL2: return "ARMISD::VTBL2";
+ case ARMISD::VMULLs: return "ARMISD::VMULLs";
+ case ARMISD::VMULLu: return "ARMISD::VMULLu";
+ case ARMISD::UMLAL: return "ARMISD::UMLAL";
+ case ARMISD::SMLAL: return "ARMISD::SMLAL";
+ case ARMISD::BUILD_VECTOR: return "ARMISD::BUILD_VECTOR";
+ case ARMISD::BFI: return "ARMISD::BFI";
+ case ARMISD::VORRIMM: return "ARMISD::VORRIMM";
+ case ARMISD::VBICIMM: return "ARMISD::VBICIMM";
+ case ARMISD::VBSL: return "ARMISD::VBSL";
+ case ARMISD::MEMCPY: return "ARMISD::MEMCPY";
+ case ARMISD::VLD2DUP: return "ARMISD::VLD2DUP";
+ case ARMISD::VLD3DUP: return "ARMISD::VLD3DUP";
+ case ARMISD::VLD4DUP: return "ARMISD::VLD4DUP";
+ case ARMISD::VLD1_UPD: return "ARMISD::VLD1_UPD";
+ case ARMISD::VLD2_UPD: return "ARMISD::VLD2_UPD";
+ case ARMISD::VLD3_UPD: return "ARMISD::VLD3_UPD";
+ case ARMISD::VLD4_UPD: return "ARMISD::VLD4_UPD";
+ case ARMISD::VLD2LN_UPD: return "ARMISD::VLD2LN_UPD";
+ case ARMISD::VLD3LN_UPD: return "ARMISD::VLD3LN_UPD";
+ case ARMISD::VLD4LN_UPD: return "ARMISD::VLD4LN_UPD";
+ case ARMISD::VLD2DUP_UPD: return "ARMISD::VLD2DUP_UPD";
+ case ARMISD::VLD3DUP_UPD: return "ARMISD::VLD3DUP_UPD";
+ case ARMISD::VLD4DUP_UPD: return "ARMISD::VLD4DUP_UPD";
+ case ARMISD::VST1_UPD: return "ARMISD::VST1_UPD";
+ case ARMISD::VST2_UPD: return "ARMISD::VST2_UPD";
+ case ARMISD::VST3_UPD: return "ARMISD::VST3_UPD";
+ case ARMISD::VST4_UPD: return "ARMISD::VST4_UPD";
+ case ARMISD::VST2LN_UPD: return "ARMISD::VST2LN_UPD";
+ case ARMISD::VST3LN_UPD: return "ARMISD::VST3LN_UPD";
+ case ARMISD::VST4LN_UPD: return "ARMISD::VST4LN_UPD";
+ }
+ return nullptr;
+}
+
+EVT ARMTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &,
+ EVT VT) const {
+ if (!VT.isVector())
+ return getPointerTy(DL);
+ return VT.changeVectorElementTypeToInteger();
+}
+
+/// getRegClassFor - Return the register class that should be used for the
+/// specified value type.
+const TargetRegisterClass *ARMTargetLowering::getRegClassFor(MVT VT) const {
+ // Map v4i64 to QQ registers but do not make the type legal. Similarly map
+ // v8i64 to QQQQ registers. v4i64 and v8i64 are only used for REG_SEQUENCE to
+ // load / store 4 to 8 consecutive D registers.
+ if (Subtarget->hasNEON()) {
+ if (VT == MVT::v4i64)
+ return &ARM::QQPRRegClass;
+ if (VT == MVT::v8i64)
+ return &ARM::QQQQPRRegClass;
+ }
+ return TargetLowering::getRegClassFor(VT);
+}
+
+// memcpy, and other memory intrinsics, typically tries to use LDM/STM if the
+// source/dest is aligned and the copy size is large enough. We therefore want
+// to align such objects passed to memory intrinsics.
+bool ARMTargetLowering::shouldAlignPointerArgs(CallInst *CI, unsigned &MinSize,
+ unsigned &PrefAlign) const {
+ if (!isa<MemIntrinsic>(CI))
+ return false;
+ MinSize = 8;
+ // On ARM11 onwards (excluding M class) 8-byte aligned LDM is typically 1
+ // cycle faster than 4-byte aligned LDM.
+ PrefAlign = (Subtarget->hasV6Ops() && !Subtarget->isMClass() ? 8 : 4);
+ return true;
+}
+
+// Create a fast isel object.
+FastISel *
+ARMTargetLowering::createFastISel(FunctionLoweringInfo &funcInfo,
+ const TargetLibraryInfo *libInfo) const {
+ return ARM::createFastISel(funcInfo, libInfo);
+}
+
+Sched::Preference ARMTargetLowering::getSchedulingPreference(SDNode *N) const {
+ unsigned NumVals = N->getNumValues();
+ if (!NumVals)
+ return Sched::RegPressure;
+
+ for (unsigned i = 0; i != NumVals; ++i) {
+ EVT VT = N->getValueType(i);
+ if (VT == MVT::Glue || VT == MVT::Other)
+ continue;
+ if (VT.isFloatingPoint() || VT.isVector())
+ return Sched::ILP;
+ }
+
+ if (!N->isMachineOpcode())
+ return Sched::RegPressure;
+
+ // Load are scheduled for latency even if there instruction itinerary
+ // is not available.
+ const TargetInstrInfo *TII = Subtarget->getInstrInfo();
+ const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
+
+ if (MCID.getNumDefs() == 0)
+ return Sched::RegPressure;
+ if (!Itins->isEmpty() &&
+ Itins->getOperandCycle(MCID.getSchedClass(), 0) > 2)
+ return Sched::ILP;
+
+ return Sched::RegPressure;
+}
+
+//===----------------------------------------------------------------------===//
+// Lowering Code
+//===----------------------------------------------------------------------===//
+
+/// IntCCToARMCC - Convert a DAG integer condition code to an ARM CC
+static ARMCC::CondCodes IntCCToARMCC(ISD::CondCode CC) {
+ switch (CC) {
+ default: llvm_unreachable("Unknown condition code!");
+ case ISD::SETNE: return ARMCC::NE;
+ case ISD::SETEQ: return ARMCC::EQ;
+ case ISD::SETGT: return ARMCC::GT;
+ case ISD::SETGE: return ARMCC::GE;
+ case ISD::SETLT: return ARMCC::LT;
+ case ISD::SETLE: return ARMCC::LE;
+ case ISD::SETUGT: return ARMCC::HI;
+ case ISD::SETUGE: return ARMCC::HS;
+ case ISD::SETULT: return ARMCC::LO;
+ case ISD::SETULE: return ARMCC::LS;
+ }
+}
+
+/// FPCCToARMCC - Convert a DAG fp condition code to an ARM CC.
+static void FPCCToARMCC(ISD::CondCode CC, ARMCC::CondCodes &CondCode,
+ ARMCC::CondCodes &CondCode2) {
+ CondCode2 = ARMCC::AL;
+ switch (CC) {
+ default: llvm_unreachable("Unknown FP condition!");
+ case ISD::SETEQ:
+ case ISD::SETOEQ: CondCode = ARMCC::EQ; break;
+ case ISD::SETGT:
+ case ISD::SETOGT: CondCode = ARMCC::GT; break;
+ case ISD::SETGE:
+ case ISD::SETOGE: CondCode = ARMCC::GE; break;
+ case ISD::SETOLT: CondCode = ARMCC::MI; break;
+ case ISD::SETOLE: CondCode = ARMCC::LS; break;
+ case ISD::SETONE: CondCode = ARMCC::MI; CondCode2 = ARMCC::GT; break;
+ case ISD::SETO: CondCode = ARMCC::VC; break;
+ case ISD::SETUO: CondCode = ARMCC::VS; break;
+ case ISD::SETUEQ: CondCode = ARMCC::EQ; CondCode2 = ARMCC::VS; break;
+ case ISD::SETUGT: CondCode = ARMCC::HI; break;
+ case ISD::SETUGE: CondCode = ARMCC::PL; break;
+ case ISD::SETLT:
+ case ISD::SETULT: CondCode = ARMCC::LT; break;
+ case ISD::SETLE:
+ case ISD::SETULE: CondCode = ARMCC::LE; break;
+ case ISD::SETNE:
+ case ISD::SETUNE: CondCode = ARMCC::NE; break;
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// Calling Convention Implementation
+//===----------------------------------------------------------------------===//
+
+#include "ARMGenCallingConv.inc"
+
+/// getEffectiveCallingConv - Get the effective calling convention, taking into
+/// account presence of floating point hardware and calling convention
+/// limitations, such as support for variadic functions.
+CallingConv::ID
+ARMTargetLowering::getEffectiveCallingConv(CallingConv::ID CC,
+ bool isVarArg) const {
+ switch (CC) {
+ default:
+ llvm_unreachable("Unsupported calling convention");
+ case CallingConv::ARM_AAPCS:
+ case CallingConv::ARM_APCS:
+ case CallingConv::GHC:
+ return CC;
+ case CallingConv::ARM_AAPCS_VFP:
+ return isVarArg ? CallingConv::ARM_AAPCS : CallingConv::ARM_AAPCS_VFP;
+ case CallingConv::C:
+ if (!Subtarget->isAAPCS_ABI())
+ return CallingConv::ARM_APCS;
+ else if (Subtarget->hasVFP2() && !Subtarget->isThumb1Only() &&
+ getTargetMachine().Options.FloatABIType == FloatABI::Hard &&
+ !isVarArg)
+ return CallingConv::ARM_AAPCS_VFP;
+ else
+ return CallingConv::ARM_AAPCS;
+ case CallingConv::Fast:
+ if (!Subtarget->isAAPCS_ABI()) {
+ if (Subtarget->hasVFP2() && !Subtarget->isThumb1Only() && !isVarArg)
+ return CallingConv::Fast;
+ return CallingConv::ARM_APCS;
+ } else if (Subtarget->hasVFP2() && !Subtarget->isThumb1Only() && !isVarArg)
+ return CallingConv::ARM_AAPCS_VFP;
+ else
+ return CallingConv::ARM_AAPCS;
+ }
+}
+
+/// CCAssignFnForNode - Selects the correct CCAssignFn for the given
+/// CallingConvention.
+CCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC,
+ bool Return,
+ bool isVarArg) const {
+ switch (getEffectiveCallingConv(CC, isVarArg)) {
+ default:
+ llvm_unreachable("Unsupported calling convention");
+ case CallingConv::ARM_APCS:
+ return (Return ? RetCC_ARM_APCS : CC_ARM_APCS);
+ case CallingConv::ARM_AAPCS:
+ return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
+ case CallingConv::ARM_AAPCS_VFP:
+ return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP);
+ case CallingConv::Fast:
+ return (Return ? RetFastCC_ARM_APCS : FastCC_ARM_APCS);
+ case CallingConv::GHC:
+ return (Return ? RetCC_ARM_APCS : CC_ARM_APCS_GHC);
+ }
+}
+
+/// LowerCallResult - Lower the result values of a call into the
+/// appropriate copies out of appropriate physical registers.
+SDValue
+ARMTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ SDLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals,
+ bool isThisReturn, SDValue ThisVal) const {
+
+ // Assign locations to each value returned by this call.
+ SmallVector<CCValAssign, 16> RVLocs;
+ ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
+ *DAG.getContext(), Call);
+ CCInfo.AnalyzeCallResult(Ins,
+ CCAssignFnForNode(CallConv, /* Return*/ true,
+ isVarArg));
+
+ // Copy all of the result registers out of their specified physreg.
+ for (unsigned i = 0; i != RVLocs.size(); ++i) {
+ CCValAssign VA = RVLocs[i];
+
+ // Pass 'this' value directly from the argument to return value, to avoid
+ // reg unit interference
+ if (i == 0 && isThisReturn) {
+ assert(!VA.needsCustom() && VA.getLocVT() == MVT::i32 &&
+ "unexpected return calling convention register assignment");
+ InVals.push_back(ThisVal);
+ continue;
+ }
+
+ SDValue Val;
+ if (VA.needsCustom()) {
+ // Handle f64 or half of a v2f64.
+ SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
+ InFlag);
+ Chain = Lo.getValue(1);
+ InFlag = Lo.getValue(2);
+ VA = RVLocs[++i]; // skip ahead to next loc
+ SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
+ InFlag);
+ Chain = Hi.getValue(1);
+ InFlag = Hi.getValue(2);
+ if (!Subtarget->isLittle())
+ std::swap (Lo, Hi);
+ Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
+
+ if (VA.getLocVT() == MVT::v2f64) {
+ SDValue Vec = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64);
+ Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val,
+ DAG.getConstant(0, dl, MVT::i32));
+
+ VA = RVLocs[++i]; // skip ahead to next loc
+ Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag);
+ Chain = Lo.getValue(1);
+ InFlag = Lo.getValue(2);
+ VA = RVLocs[++i]; // skip ahead to next loc
+ Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag);
+ Chain = Hi.getValue(1);
+ InFlag = Hi.getValue(2);
+ if (!Subtarget->isLittle())
+ std::swap (Lo, Hi);
+ Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
+ Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val,
+ DAG.getConstant(1, dl, MVT::i32));
+ }
+ } else {
+ Val = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(),
+ InFlag);
+ Chain = Val.getValue(1);
+ InFlag = Val.getValue(2);
+ }
+
+ switch (VA.getLocInfo()) {
+ default: llvm_unreachable("Unknown loc info!");
+ case CCValAssign::Full: break;
+ case CCValAssign::BCvt:
+ Val = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), Val);
+ break;
+ }
+
+ InVals.push_back(Val);
+ }
+
+ return Chain;
+}
+
+/// LowerMemOpCallTo - Store the argument to the stack.
+SDValue
+ARMTargetLowering::LowerMemOpCallTo(SDValue Chain,
+ SDValue StackPtr, SDValue Arg,
+ SDLoc dl, SelectionDAG &DAG,
+ const CCValAssign &VA,
+ ISD::ArgFlagsTy Flags) const {
+ unsigned LocMemOffset = VA.getLocMemOffset();
+ SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
+ PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
+ StackPtr, PtrOff);
+ return DAG.getStore(
+ Chain, dl, Arg, PtrOff,
+ MachinePointerInfo::getStack(DAG.getMachineFunction(), LocMemOffset),
+ false, false, 0);
+}
+
+void ARMTargetLowering::PassF64ArgInRegs(SDLoc dl, SelectionDAG &DAG,
+ SDValue Chain, SDValue &Arg,
+ RegsToPassVector &RegsToPass,
+ CCValAssign &VA, CCValAssign &NextVA,
+ SDValue &StackPtr,
+ SmallVectorImpl<SDValue> &MemOpChains,
+ ISD::ArgFlagsTy Flags) const {
+
+ SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl,
+ DAG.getVTList(MVT::i32, MVT::i32), Arg);
+ unsigned id = Subtarget->isLittle() ? 0 : 1;
+ RegsToPass.push_back(std::make_pair(VA.getLocReg(), fmrrd.getValue(id)));
+
+ if (NextVA.isRegLoc())
+ RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), fmrrd.getValue(1-id)));
+ else {
+ assert(NextVA.isMemLoc());
+ if (!StackPtr.getNode())
+ StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP,
+ getPointerTy(DAG.getDataLayout()));
+
+ MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, fmrrd.getValue(1-id),
+ dl, DAG, NextVA,
+ Flags));
+ }
+}
+
+/// LowerCall - Lowering a call into a callseq_start <-
+/// ARMISD:CALL <- callseq_end chain. Also add input and output parameter
+/// nodes.
+SDValue
+ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
+ SmallVectorImpl<SDValue> &InVals) const {
+ SelectionDAG &DAG = CLI.DAG;
+ SDLoc &dl = CLI.DL;
+ SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
+ SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
+ SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
+ SDValue Chain = CLI.Chain;
+ SDValue Callee = CLI.Callee;
+ bool &isTailCall = CLI.IsTailCall;
+ CallingConv::ID CallConv = CLI.CallConv;
+ bool doesNotRet = CLI.DoesNotReturn;
+ bool isVarArg = CLI.IsVarArg;
+
+ MachineFunction &MF = DAG.getMachineFunction();
+ bool isStructRet = (Outs.empty()) ? false : Outs[0].Flags.isSRet();
+ bool isThisReturn = false;
+ bool isSibCall = false;
+ auto Attr = MF.getFunction()->getFnAttribute("disable-tail-calls");
+
+ // Disable tail calls if they're not supported.
+ if (!Subtarget->supportsTailCall() || Attr.getValueAsString() == "true")
+ isTailCall = false;
+
+ if (isTailCall) {
+ // Check if it's really possible to do a tail call.
+ isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv,
+ isVarArg, isStructRet, MF.getFunction()->hasStructRetAttr(),
+ Outs, OutVals, Ins, DAG);
+ if (!isTailCall && CLI.CS && CLI.CS->isMustTailCall())
+ report_fatal_error("failed to perform tail call elimination on a call "
+ "site marked musttail");
+ // We don't support GuaranteedTailCallOpt for ARM, only automatically
+ // detected sibcalls.
+ if (isTailCall) {
+ ++NumTailCalls;
+ isSibCall = true;
+ }
+ }
+
+ // Analyze operands of the call, assigning locations to each operand.
+ SmallVector<CCValAssign, 16> ArgLocs;
+ ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
+ *DAG.getContext(), Call);
+ CCInfo.AnalyzeCallOperands(Outs,
+ CCAssignFnForNode(CallConv, /* Return*/ false,
+ isVarArg));
+
+ // Get a count of how many bytes are to be pushed on the stack.
+ unsigned NumBytes = CCInfo.getNextStackOffset();
+
+ // For tail calls, memory operands are available in our caller's stack.
+ if (isSibCall)
+ NumBytes = 0;
+
+ // Adjust the stack pointer for the new arguments...
+ // These operations are automatically eliminated by the prolog/epilog pass
+ if (!isSibCall)
+ Chain = DAG.getCALLSEQ_START(Chain,
+ DAG.getIntPtrConstant(NumBytes, dl, true), dl);
+
+ SDValue StackPtr =
+ DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy(DAG.getDataLayout()));
+
+ RegsToPassVector RegsToPass;
+ SmallVector<SDValue, 8> MemOpChains;
+
+ // Walk the register/memloc assignments, inserting copies/loads. In the case
+ // of tail call optimization, arguments are handled later.
+ for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size();
+ i != e;
+ ++i, ++realArgIdx) {
+ CCValAssign &VA = ArgLocs[i];
+ SDValue Arg = OutVals[realArgIdx];
+ ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
+ bool isByVal = Flags.isByVal();
+
+ // Promote the value if needed.
+ switch (VA.getLocInfo()) {
+ default: llvm_unreachable("Unknown loc info!");
+ case CCValAssign::Full: break;
+ case CCValAssign::SExt:
+ Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
+ break;
+ case CCValAssign::ZExt:
+ Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
+ break;
+ case CCValAssign::AExt:
+ Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
+ break;
+ case CCValAssign::BCvt:
+ Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
+ break;
+ }
+
+ // f64 and v2f64 might be passed in i32 pairs and must be split into pieces
+ if (VA.needsCustom()) {
+ if (VA.getLocVT() == MVT::v2f64) {
+ SDValue Op0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
+ DAG.getConstant(0, dl, MVT::i32));
+ SDValue Op1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
+ DAG.getConstant(1, dl, MVT::i32));
+
+ PassF64ArgInRegs(dl, DAG, Chain, Op0, RegsToPass,
+ VA, ArgLocs[++i], StackPtr, MemOpChains, Flags);
+
+ VA = ArgLocs[++i]; // skip ahead to next loc
+ if (VA.isRegLoc()) {
+ PassF64ArgInRegs(dl, DAG, Chain, Op1, RegsToPass,
+ VA, ArgLocs[++i], StackPtr, MemOpChains, Flags);
+ } else {
+ assert(VA.isMemLoc());
+
+ MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Op1,
+ dl, DAG, VA, Flags));
+ }
+ } else {
+ PassF64ArgInRegs(dl, DAG, Chain, Arg, RegsToPass, VA, ArgLocs[++i],
+ StackPtr, MemOpChains, Flags);
+ }
+ } else if (VA.isRegLoc()) {
+ if (realArgIdx == 0 && Flags.isReturned() && Outs[0].VT == MVT::i32) {
+ assert(VA.getLocVT() == MVT::i32 &&
+ "unexpected calling convention register assignment");
+ assert(!Ins.empty() && Ins[0].VT == MVT::i32 &&
+ "unexpected use of 'returned'");
+ isThisReturn = true;
+ }
+ RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
+ } else if (isByVal) {
+ assert(VA.isMemLoc());
+ unsigned offset = 0;
+
+ // True if this byval aggregate will be split between registers
+ // and memory.
+ unsigned ByValArgsCount = CCInfo.getInRegsParamsCount();
+ unsigned CurByValIdx = CCInfo.getInRegsParamsProcessed();
+
+ if (CurByValIdx < ByValArgsCount) {
+
+ unsigned RegBegin, RegEnd;
+ CCInfo.getInRegsParamInfo(CurByValIdx, RegBegin, RegEnd);
+
+ EVT PtrVT =
+ DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
+ unsigned int i, j;
+ for (i = 0, j = RegBegin; j < RegEnd; i++, j++) {
+ SDValue Const = DAG.getConstant(4*i, dl, MVT::i32);
+ SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
+ SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg,
+ MachinePointerInfo(),
+ false, false, false,
+ DAG.InferPtrAlignment(AddArg));
+ MemOpChains.push_back(Load.getValue(1));
+ RegsToPass.push_back(std::make_pair(j, Load));
+ }
+
+ // If parameter size outsides register area, "offset" value
+ // helps us to calculate stack slot for remained part properly.
+ offset = RegEnd - RegBegin;
+
+ CCInfo.nextInRegsParam();
+ }
+
+ if (Flags.getByValSize() > 4*offset) {
+ auto PtrVT = getPointerTy(DAG.getDataLayout());
+ unsigned LocMemOffset = VA.getLocMemOffset();
+ SDValue StkPtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
+ SDValue Dst = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, StkPtrOff);
+ SDValue SrcOffset = DAG.getIntPtrConstant(4*offset, dl);
+ SDValue Src = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, SrcOffset);
+ SDValue SizeNode = DAG.getConstant(Flags.getByValSize() - 4*offset, dl,
+ MVT::i32);
+ SDValue AlignNode = DAG.getConstant(Flags.getByValAlign(), dl,
+ MVT::i32);
+
+ SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
+ SDValue Ops[] = { Chain, Dst, Src, SizeNode, AlignNode};
+ MemOpChains.push_back(DAG.getNode(ARMISD::COPY_STRUCT_BYVAL, dl, VTs,
+ Ops));
+ }
+ } else if (!isSibCall) {
+ assert(VA.isMemLoc());
+
+ MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg,
+ dl, DAG, VA, Flags));
+ }
+ }
+
+ if (!MemOpChains.empty())
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
+
+ // Build a sequence of copy-to-reg nodes chained together with token chain
+ // and flag operands which copy the outgoing args into the appropriate regs.
+ SDValue InFlag;
+ // Tail call byval lowering might overwrite argument registers so in case of
+ // tail call optimization the copies to registers are lowered later.
+ if (!isTailCall)
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+ Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
+ RegsToPass[i].second, InFlag);
+ InFlag = Chain.getValue(1);
+ }
+
+ // For tail calls lower the arguments to the 'real' stack slot.
+ if (isTailCall) {
+ // Force all the incoming stack arguments to be loaded from the stack
+ // before any new outgoing arguments are stored to the stack, because the
+ // outgoing stack slots may alias the incoming argument stack slots, and
+ // the alias isn't otherwise explicit. This is slightly more conservative
+ // than necessary, because it means that each store effectively depends
+ // on every argument instead of just those arguments it would clobber.
+
+ // Do not flag preceding copytoreg stuff together with the following stuff.
+ InFlag = SDValue();
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+ Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
+ RegsToPass[i].second, InFlag);
+ InFlag = Chain.getValue(1);
+ }
+ InFlag = SDValue();
+ }
+
+ // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
+ // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
+ // node so that legalize doesn't hack it.
+ bool isDirect = false;
+ bool isARMFunc = false;
+ bool isLocalARMFunc = false;
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ auto PtrVt = getPointerTy(DAG.getDataLayout());
+
+ if (Subtarget->genLongCalls()) {
+ assert((Subtarget->isTargetWindows() ||
+ getTargetMachine().getRelocationModel() == Reloc::Static) &&
+ "long-calls with non-static relocation model!");
+ // Handle a global address or an external symbol. If it's not one of
+ // those, the target's already in a register, so we don't need to do
+ // anything extra.
+ if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
+ const GlobalValue *GV = G->getGlobal();
+ // Create a constant pool entry for the callee address
+ unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
+ ARMConstantPoolValue *CPV =
+ ARMConstantPoolConstant::Create(GV, ARMPCLabelIndex, ARMCP::CPValue, 0);
+
+ // Get the address of the callee into a register
+ SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVt, 4);
+ CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
+ Callee = DAG.getLoad(
+ PtrVt, dl, DAG.getEntryNode(), CPAddr,
+ MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), false,
+ false, false, 0);
+ } else if (ExternalSymbolSDNode *S=dyn_cast<ExternalSymbolSDNode>(Callee)) {
+ const char *Sym = S->getSymbol();
+
+ // Create a constant pool entry for the callee address
+ unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
+ ARMConstantPoolValue *CPV =
+ ARMConstantPoolSymbol::Create(*DAG.getContext(), Sym,
+ ARMPCLabelIndex, 0);
+ // Get the address of the callee into a register
+ SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVt, 4);
+ CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
+ Callee = DAG.getLoad(
+ PtrVt, dl, DAG.getEntryNode(), CPAddr,
+ MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), false,
+ false, false, 0);
+ }
+ } else if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
+ const GlobalValue *GV = G->getGlobal();
+ isDirect = true;
+ bool isDef = GV->isStrongDefinitionForLinker();
+ bool isStub = (!isDef && Subtarget->isTargetMachO()) &&
+ getTargetMachine().getRelocationModel() != Reloc::Static;
+ isARMFunc = !Subtarget->isThumb() || (isStub && !Subtarget->isMClass());
+ // ARM call to a local ARM function is predicable.
+ isLocalARMFunc = !Subtarget->isThumb() && (isDef || !ARMInterworking);
+ // tBX takes a register source operand.
+ if (isStub && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
+ assert(Subtarget->isTargetMachO() && "WrapperPIC use on non-MachO?");
+ Callee = DAG.getNode(
+ ARMISD::WrapperPIC, dl, PtrVt,
+ DAG.getTargetGlobalAddress(GV, dl, PtrVt, 0, ARMII::MO_NONLAZY));
+ Callee = DAG.getLoad(PtrVt, dl, DAG.getEntryNode(), Callee,
+ MachinePointerInfo::getGOT(DAG.getMachineFunction()),
+ false, false, true, 0);
+ } else if (Subtarget->isTargetCOFF()) {
+ assert(Subtarget->isTargetWindows() &&
+ "Windows is the only supported COFF target");
+ unsigned TargetFlags = GV->hasDLLImportStorageClass()
+ ? ARMII::MO_DLLIMPORT
+ : ARMII::MO_NO_FLAG;
+ Callee =
+ DAG.getTargetGlobalAddress(GV, dl, PtrVt, /*Offset=*/0, TargetFlags);
+ if (GV->hasDLLImportStorageClass())
+ Callee =
+ DAG.getLoad(PtrVt, dl, DAG.getEntryNode(),
+ DAG.getNode(ARMISD::Wrapper, dl, PtrVt, Callee),
+ MachinePointerInfo::getGOT(DAG.getMachineFunction()),
+ false, false, false, 0);
+ } else {
+ // On ELF targets for PIC code, direct calls should go through the PLT
+ unsigned OpFlags = 0;
+ if (Subtarget->isTargetELF() &&
+ getTargetMachine().getRelocationModel() == Reloc::PIC_)
+ OpFlags = ARMII::MO_PLT;
+ Callee = DAG.getTargetGlobalAddress(GV, dl, PtrVt, 0, OpFlags);
+ }
+ } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
+ isDirect = true;
+ bool isStub = Subtarget->isTargetMachO() &&
+ getTargetMachine().getRelocationModel() != Reloc::Static;
+ isARMFunc = !Subtarget->isThumb() || (isStub && !Subtarget->isMClass());
+ // tBX takes a register source operand.
+ const char *Sym = S->getSymbol();
+ if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
+ unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
+ ARMConstantPoolValue *CPV =
+ ARMConstantPoolSymbol::Create(*DAG.getContext(), Sym,
+ ARMPCLabelIndex, 4);
+ SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVt, 4);
+ CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
+ Callee = DAG.getLoad(
+ PtrVt, dl, DAG.getEntryNode(), CPAddr,
+ MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), false,
+ false, false, 0);
+ SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
+ Callee = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVt, Callee, PICLabel);
+ } else {
+ unsigned OpFlags = 0;
+ // On ELF targets for PIC code, direct calls should go through the PLT
+ if (Subtarget->isTargetELF() &&
+ getTargetMachine().getRelocationModel() == Reloc::PIC_)
+ OpFlags = ARMII::MO_PLT;
+ Callee = DAG.getTargetExternalSymbol(Sym, PtrVt, OpFlags);
+ }
+ }
+
+ // FIXME: handle tail calls differently.
+ unsigned CallOpc;
+ if (Subtarget->isThumb()) {
+ if ((!isDirect || isARMFunc) && !Subtarget->hasV5TOps())
+ CallOpc = ARMISD::CALL_NOLINK;
+ else
+ CallOpc = isARMFunc ? ARMISD::CALL : ARMISD::tCALL;
+ } else {
+ if (!isDirect && !Subtarget->hasV5TOps())
+ CallOpc = ARMISD::CALL_NOLINK;
+ else if (doesNotRet && isDirect && Subtarget->hasRAS() &&
+ // Emit regular call when code size is the priority
+ !MF.getFunction()->optForMinSize())
+ // "mov lr, pc; b _foo" to avoid confusing the RSP
+ CallOpc = ARMISD::CALL_NOLINK;
+ else
+ CallOpc = isLocalARMFunc ? ARMISD::CALL_PRED : ARMISD::CALL;
+ }
+
+ std::vector<SDValue> Ops;
+ Ops.push_back(Chain);
+ Ops.push_back(Callee);
+
+ // Add argument registers to the end of the list so that they are known live
+ // into the call.
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
+ Ops.push_back(DAG.getRegister(RegsToPass[i].first,
+ RegsToPass[i].second.getValueType()));
+
+ // Add a register mask operand representing the call-preserved registers.
+ if (!isTailCall) {
+ const uint32_t *Mask;
+ const ARMBaseRegisterInfo *ARI = Subtarget->getRegisterInfo();
+ if (isThisReturn) {
+ // For 'this' returns, use the R0-preserving mask if applicable
+ Mask = ARI->getThisReturnPreservedMask(MF, CallConv);
+ if (!Mask) {
+ // Set isThisReturn to false if the calling convention is not one that
+ // allows 'returned' to be modeled in this way, so LowerCallResult does
+ // not try to pass 'this' straight through
+ isThisReturn = false;
+ Mask = ARI->getCallPreservedMask(MF, CallConv);
+ }
+ } else
+ Mask = ARI->getCallPreservedMask(MF, CallConv);
+
+ assert(Mask && "Missing call preserved mask for calling convention");
+ Ops.push_back(DAG.getRegisterMask(Mask));
+ }
+
+ if (InFlag.getNode())
+ Ops.push_back(InFlag);
+
+ SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
+ if (isTailCall) {
+ MF.getFrameInfo()->setHasTailCall();
+ return DAG.getNode(ARMISD::TC_RETURN, dl, NodeTys, Ops);
+ }
+
+ // Returns a chain and a flag for retval copy to use.
+ Chain = DAG.getNode(CallOpc, dl, NodeTys, Ops);
+ InFlag = Chain.getValue(1);
+
+ Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),
+ DAG.getIntPtrConstant(0, dl, true), InFlag, dl);
+ if (!Ins.empty())
+ InFlag = Chain.getValue(1);
+
+ // Handle result values, copying them out of physregs into vregs that we
+ // return.
+ return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl, DAG,
+ InVals, isThisReturn,
+ isThisReturn ? OutVals[0] : SDValue());
+}
+
+/// HandleByVal - Every parameter *after* a byval parameter is passed
+/// on the stack. Remember the next parameter register to allocate,
+/// and then confiscate the rest of the parameter registers to insure
+/// this.
+void ARMTargetLowering::HandleByVal(CCState *State, unsigned &Size,
+ unsigned Align) const {
+ assert((State->getCallOrPrologue() == Prologue ||
+ State->getCallOrPrologue() == Call) &&
+ "unhandled ParmContext");
+
+ // Byval (as with any stack) slots are always at least 4 byte aligned.
+ Align = std::max(Align, 4U);
+
+ unsigned Reg = State->AllocateReg(GPRArgRegs);
+ if (!Reg)
+ return;
+
+ unsigned AlignInRegs = Align / 4;
+ unsigned Waste = (ARM::R4 - Reg) % AlignInRegs;
+ for (unsigned i = 0; i < Waste; ++i)
+ Reg = State->AllocateReg(GPRArgRegs);
+
+ if (!Reg)
+ return;
+
+ unsigned Excess = 4 * (ARM::R4 - Reg);
+
+ // Special case when NSAA != SP and parameter size greater than size of
+ // all remained GPR regs. In that case we can't split parameter, we must
+ // send it to stack. We also must set NCRN to R4, so waste all
+ // remained registers.
+ const unsigned NSAAOffset = State->getNextStackOffset();
+ if (NSAAOffset != 0 && Size > Excess) {
+ while (State->AllocateReg(GPRArgRegs))
+ ;
+ return;
+ }
+
+ // First register for byval parameter is the first register that wasn't
+ // allocated before this method call, so it would be "reg".
+ // If parameter is small enough to be saved in range [reg, r4), then
+ // the end (first after last) register would be reg + param-size-in-regs,
+ // else parameter would be splitted between registers and stack,
+ // end register would be r4 in this case.
+ unsigned ByValRegBegin = Reg;
+ unsigned ByValRegEnd = std::min<unsigned>(Reg + Size / 4, ARM::R4);
+ State->addInRegsParamInfo(ByValRegBegin, ByValRegEnd);
+ // Note, first register is allocated in the beginning of function already,
+ // allocate remained amount of registers we need.
+ for (unsigned i = Reg + 1; i != ByValRegEnd; ++i)
+ State->AllocateReg(GPRArgRegs);
+ // A byval parameter that is split between registers and memory needs its
+ // size truncated here.
+ // In the case where the entire structure fits in registers, we set the
+ // size in memory to zero.
+ Size = std::max<int>(Size - Excess, 0);
+}
+
+/// MatchingStackOffset - Return true if the given stack call argument is
+/// already available in the same position (relatively) of the caller's
+/// incoming argument stack.
+static
+bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags,
+ MachineFrameInfo *MFI, const MachineRegisterInfo *MRI,
+ const TargetInstrInfo *TII) {
+ unsigned Bytes = Arg.getValueType().getSizeInBits() / 8;
+ int FI = INT_MAX;
+ if (Arg.getOpcode() == ISD::CopyFromReg) {
+ unsigned VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(VR))
+ return false;
+ MachineInstr *Def = MRI->getVRegDef(VR);
+ if (!Def)
+ return false;
+ if (!Flags.isByVal()) {
+ if (!TII->isLoadFromStackSlot(Def, FI))
+ return false;
+ } else {
+ return false;
+ }
+ } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) {
+ if (Flags.isByVal())
+ // ByVal argument is passed in as a pointer but it's now being
+ // dereferenced. e.g.
+ // define @foo(%struct.X* %A) {
+ // tail call @bar(%struct.X* byval %A)
+ // }
+ return false;
+ SDValue Ptr = Ld->getBasePtr();
+ FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr);
+ if (!FINode)
+ return false;
+ FI = FINode->getIndex();
+ } else
+ return false;
+
+ assert(FI != INT_MAX);
+ if (!MFI->isFixedObjectIndex(FI))
+ return false;
+ return Offset == MFI->getObjectOffset(FI) && Bytes == MFI->getObjectSize(FI);
+}
+
+/// IsEligibleForTailCallOptimization - Check whether the call is eligible
+/// for tail call optimization. Targets which want to do tail call
+/// optimization should implement this function.
+bool
+ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
+ CallingConv::ID CalleeCC,
+ bool isVarArg,
+ bool isCalleeStructRet,
+ bool isCallerStructRet,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ SelectionDAG& DAG) const {
+ const Function *CallerF = DAG.getMachineFunction().getFunction();
+ CallingConv::ID CallerCC = CallerF->getCallingConv();
+ bool CCMatch = CallerCC == CalleeCC;
+
+ assert(Subtarget->supportsTailCall());
+
+ // Look for obvious safe cases to perform tail call optimization that do not
+ // require ABI changes. This is what gcc calls sibcall.
+
+ // Do not sibcall optimize vararg calls unless the call site is not passing
+ // any arguments.
+ if (isVarArg && !Outs.empty())
+ return false;
+
+ // Exception-handling functions need a special set of instructions to indicate
+ // a return to the hardware. Tail-calling another function would probably
+ // break this.
+ if (CallerF->hasFnAttribute("interrupt"))
+ return false;
+
+ // Also avoid sibcall optimization if either caller or callee uses struct
+ // return semantics.
+ if (isCalleeStructRet || isCallerStructRet)
+ return false;
+
+ // Externally-defined functions with weak linkage should not be
+ // tail-called on ARM when the OS does not support dynamic
+ // pre-emption of symbols, as the AAELF spec requires normal calls
+ // to undefined weak functions to be replaced with a NOP or jump to the
+ // next instruction. The behaviour of branch instructions in this
+ // situation (as used for tail calls) is implementation-defined, so we
+ // cannot rely on the linker replacing the tail call with a return.
+ if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
+ const GlobalValue *GV = G->getGlobal();
+ const Triple &TT = getTargetMachine().getTargetTriple();
+ if (GV->hasExternalWeakLinkage() &&
+ (!TT.isOSWindows() || TT.isOSBinFormatELF() || TT.isOSBinFormatMachO()))
+ return false;
+ }
+
+ // If the calling conventions do not match, then we'd better make sure the
+ // results are returned in the same way as what the caller expects.
+ if (!CCMatch) {
+ SmallVector<CCValAssign, 16> RVLocs1;
+ ARMCCState CCInfo1(CalleeCC, false, DAG.getMachineFunction(), RVLocs1,
+ *DAG.getContext(), Call);
+ CCInfo1.AnalyzeCallResult(Ins, CCAssignFnForNode(CalleeCC, true, isVarArg));
+
+ SmallVector<CCValAssign, 16> RVLocs2;
+ ARMCCState CCInfo2(CallerCC, false, DAG.getMachineFunction(), RVLocs2,
+ *DAG.getContext(), Call);
+ CCInfo2.AnalyzeCallResult(Ins, CCAssignFnForNode(CallerCC, true, isVarArg));
+
+ if (RVLocs1.size() != RVLocs2.size())
+ return false;
+ for (unsigned i = 0, e = RVLocs1.size(); i != e; ++i) {
+ if (RVLocs1[i].isRegLoc() != RVLocs2[i].isRegLoc())
+ return false;
+ if (RVLocs1[i].getLocInfo() != RVLocs2[i].getLocInfo())
+ return false;
+ if (RVLocs1[i].isRegLoc()) {
+ if (RVLocs1[i].getLocReg() != RVLocs2[i].getLocReg())
+ return false;
+ } else {
+ if (RVLocs1[i].getLocMemOffset() != RVLocs2[i].getLocMemOffset())
+ return false;
+ }
+ }
+ }
+
+ // If Caller's vararg or byval argument has been split between registers and
+ // stack, do not perform tail call, since part of the argument is in caller's
+ // local frame.
+ const ARMFunctionInfo *AFI_Caller = DAG.getMachineFunction().
+ getInfo<ARMFunctionInfo>();
+ if (AFI_Caller->getArgRegsSaveSize())
+ return false;
+
+ // If the callee takes no arguments then go on to check the results of the
+ // call.
+ if (!Outs.empty()) {
+ // Check if stack adjustment is needed. For now, do not do this if any
+ // argument is passed on the stack.
+ SmallVector<CCValAssign, 16> ArgLocs;
+ ARMCCState CCInfo(CalleeCC, isVarArg, DAG.getMachineFunction(), ArgLocs,
+ *DAG.getContext(), Call);
+ CCInfo.AnalyzeCallOperands(Outs,
+ CCAssignFnForNode(CalleeCC, false, isVarArg));
+ if (CCInfo.getNextStackOffset()) {
+ MachineFunction &MF = DAG.getMachineFunction();
+
+ // Check if the arguments are already laid out in the right way as
+ // the caller's fixed stack objects.
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ const MachineRegisterInfo *MRI = &MF.getRegInfo();
+ const TargetInstrInfo *TII = Subtarget->getInstrInfo();
+ for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size();
+ i != e;
+ ++i, ++realArgIdx) {
+ CCValAssign &VA = ArgLocs[i];
+ EVT RegVT = VA.getLocVT();
+ SDValue Arg = OutVals[realArgIdx];
+ ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
+ if (VA.getLocInfo() == CCValAssign::Indirect)
+ return false;
+ if (VA.needsCustom()) {
+ // f64 and vector types are split into multiple registers or
+ // register/stack-slot combinations. The types will not match
+ // the registers; give up on memory f64 refs until we figure
+ // out what to do about this.
+ if (!VA.isRegLoc())
+ return false;
+ if (!ArgLocs[++i].isRegLoc())
+ return false;
+ if (RegVT == MVT::v2f64) {
+ if (!ArgLocs[++i].isRegLoc())
+ return false;
+ if (!ArgLocs[++i].isRegLoc())
+ return false;
+ }
+ } else if (!VA.isRegLoc()) {
+ if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags,
+ MFI, MRI, TII))
+ return false;
+ }
+ }
+ }
+ }
+
+ return true;
+}
+
+bool
+ARMTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
+ MachineFunction &MF, bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ LLVMContext &Context) const {
+ SmallVector<CCValAssign, 16> RVLocs;
+ CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
+ return CCInfo.CheckReturn(Outs, CCAssignFnForNode(CallConv, /*Return=*/true,
+ isVarArg));
+}
+
+static SDValue LowerInterruptReturn(SmallVectorImpl<SDValue> &RetOps,
+ SDLoc DL, SelectionDAG &DAG) {
+ const MachineFunction &MF = DAG.getMachineFunction();
+ const Function *F = MF.getFunction();
+
+ StringRef IntKind = F->getFnAttribute("interrupt").getValueAsString();
+
+ // See ARM ARM v7 B1.8.3. On exception entry LR is set to a possibly offset
+ // version of the "preferred return address". These offsets affect the return
+ // instruction if this is a return from PL1 without hypervisor extensions.
+ // IRQ/FIQ: +4 "subs pc, lr, #4"
+ // SWI: 0 "subs pc, lr, #0"
+ // ABORT: +4 "subs pc, lr, #4"
+ // UNDEF: +4/+2 "subs pc, lr, #0"
+ // UNDEF varies depending on where the exception came from ARM or Thumb
+ // mode. Alongside GCC, we throw our hands up in disgust and pretend it's 0.
+
+ int64_t LROffset;
+ if (IntKind == "" || IntKind == "IRQ" || IntKind == "FIQ" ||
+ IntKind == "ABORT")
+ LROffset = 4;
+ else if (IntKind == "SWI" || IntKind == "UNDEF")
+ LROffset = 0;
+ else
+ report_fatal_error("Unsupported interrupt attribute. If present, value "
+ "must be one of: IRQ, FIQ, SWI, ABORT or UNDEF");
+
+ RetOps.insert(RetOps.begin() + 1,
+ DAG.getConstant(LROffset, DL, MVT::i32, false));
+
+ return DAG.getNode(ARMISD::INTRET_FLAG, DL, MVT::Other, RetOps);
+}
+
+SDValue
+ARMTargetLowering::LowerReturn(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ SDLoc dl, SelectionDAG &DAG) const {
+
+ // CCValAssign - represent the assignment of the return value to a location.
+ SmallVector<CCValAssign, 16> RVLocs;
+
+ // CCState - Info about the registers and stack slots.
+ ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
+ *DAG.getContext(), Call);
+
+ // Analyze outgoing return values.
+ CCInfo.AnalyzeReturn(Outs, CCAssignFnForNode(CallConv, /* Return */ true,
+ isVarArg));
+
+ SDValue Flag;
+ SmallVector<SDValue, 4> RetOps;
+ RetOps.push_back(Chain); // Operand #0 = Chain (updated below)
+ bool isLittleEndian = Subtarget->isLittle();
+
+ MachineFunction &MF = DAG.getMachineFunction();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ AFI->setReturnRegsCount(RVLocs.size());
+
+ // Copy the result values into the output registers.
+ for (unsigned i = 0, realRVLocIdx = 0;
+ i != RVLocs.size();
+ ++i, ++realRVLocIdx) {
+ CCValAssign &VA = RVLocs[i];
+ assert(VA.isRegLoc() && "Can only return in registers!");
+
+ SDValue Arg = OutVals[realRVLocIdx];
+
+ switch (VA.getLocInfo()) {
+ default: llvm_unreachable("Unknown loc info!");
+ case CCValAssign::Full: break;
+ case CCValAssign::BCvt:
+ Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
+ break;
+ }
+
+ if (VA.needsCustom()) {
+ if (VA.getLocVT() == MVT::v2f64) {
+ // Extract the first half and return it in two registers.
+ SDValue Half = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
+ DAG.getConstant(0, dl, MVT::i32));
+ SDValue HalfGPRs = DAG.getNode(ARMISD::VMOVRRD, dl,
+ DAG.getVTList(MVT::i32, MVT::i32), Half);
+
+ Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
+ HalfGPRs.getValue(isLittleEndian ? 0 : 1),
+ Flag);
+ Flag = Chain.getValue(1);
+ RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
+ VA = RVLocs[++i]; // skip ahead to next loc
+ Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
+ HalfGPRs.getValue(isLittleEndian ? 1 : 0),
+ Flag);
+ Flag = Chain.getValue(1);
+ RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
+ VA = RVLocs[++i]; // skip ahead to next loc
+
+ // Extract the 2nd half and fall through to handle it as an f64 value.
+ Arg = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
+ DAG.getConstant(1, dl, MVT::i32));
+ }
+ // Legalize ret f64 -> ret 2 x i32. We always have fmrrd if f64 is
+ // available.
+ SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl,
+ DAG.getVTList(MVT::i32, MVT::i32), Arg);
+ Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
+ fmrrd.getValue(isLittleEndian ? 0 : 1),
+ Flag);
+ Flag = Chain.getValue(1);
+ RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
+ VA = RVLocs[++i]; // skip ahead to next loc
+ Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
+ fmrrd.getValue(isLittleEndian ? 1 : 0),
+ Flag);
+ } else
+ Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag);
+
+ // Guarantee that all emitted copies are
+ // stuck together, avoiding something bad.
+ Flag = Chain.getValue(1);
+ RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
+ }
+
+ // Update chain and glue.
+ RetOps[0] = Chain;
+ if (Flag.getNode())
+ RetOps.push_back(Flag);
+
+ // CPUs which aren't M-class use a special sequence to return from
+ // exceptions (roughly, any instruction setting pc and cpsr simultaneously,
+ // though we use "subs pc, lr, #N").
+ //
+ // M-class CPUs actually use a normal return sequence with a special
+ // (hardware-provided) value in LR, so the normal code path works.
+ if (DAG.getMachineFunction().getFunction()->hasFnAttribute("interrupt") &&
+ !Subtarget->isMClass()) {
+ if (Subtarget->isThumb1Only())
+ report_fatal_error("interrupt attribute is not supported in Thumb1");
+ return LowerInterruptReturn(RetOps, dl, DAG);
+ }
+
+ return DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other, RetOps);
+}
+
+bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
+ if (N->getNumValues() != 1)
+ return false;
+ if (!N->hasNUsesOfValue(1, 0))
+ return false;
+
+ SDValue TCChain = Chain;
+ SDNode *Copy = *N->use_begin();
+ if (Copy->getOpcode() == ISD::CopyToReg) {
+ // If the copy has a glue operand, we conservatively assume it isn't safe to
+ // perform a tail call.
+ if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
+ return false;
+ TCChain = Copy->getOperand(0);
+ } else if (Copy->getOpcode() == ARMISD::VMOVRRD) {
+ SDNode *VMov = Copy;
+ // f64 returned in a pair of GPRs.
+ SmallPtrSet<SDNode*, 2> Copies;
+ for (SDNode::use_iterator UI = VMov->use_begin(), UE = VMov->use_end();
+ UI != UE; ++UI) {
+ if (UI->getOpcode() != ISD::CopyToReg)
+ return false;
+ Copies.insert(*UI);
+ }
+ if (Copies.size() > 2)
+ return false;
+
+ for (SDNode::use_iterator UI = VMov->use_begin(), UE = VMov->use_end();
+ UI != UE; ++UI) {
+ SDValue UseChain = UI->getOperand(0);
+ if (Copies.count(UseChain.getNode()))
+ // Second CopyToReg
+ Copy = *UI;
+ else {
+ // We are at the top of this chain.
+ // If the copy has a glue operand, we conservatively assume it
+ // isn't safe to perform a tail call.
+ if (UI->getOperand(UI->getNumOperands()-1).getValueType() == MVT::Glue)
+ return false;
+ // First CopyToReg
+ TCChain = UseChain;
+ }
+ }
+ } else if (Copy->getOpcode() == ISD::BITCAST) {
+ // f32 returned in a single GPR.
+ if (!Copy->hasOneUse())
+ return false;
+ Copy = *Copy->use_begin();
+ if (Copy->getOpcode() != ISD::CopyToReg || !Copy->hasNUsesOfValue(1, 0))
+ return false;
+ // If the copy has a glue operand, we conservatively assume it isn't safe to
+ // perform a tail call.
+ if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
+ return false;
+ TCChain = Copy->getOperand(0);
+ } else {
+ return false;
+ }
+
+ bool HasRet = false;
+ for (SDNode::use_iterator UI = Copy->use_begin(), UE = Copy->use_end();
+ UI != UE; ++UI) {
+ if (UI->getOpcode() != ARMISD::RET_FLAG &&
+ UI->getOpcode() != ARMISD::INTRET_FLAG)
+ return false;
+ HasRet = true;
+ }
+
+ if (!HasRet)
+ return false;
+
+ Chain = TCChain;
+ return true;
+}
+
+bool ARMTargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const {
+ if (!Subtarget->supportsTailCall())
+ return false;
+
+ auto Attr =
+ CI->getParent()->getParent()->getFnAttribute("disable-tail-calls");
+ if (!CI->isTailCall() || Attr.getValueAsString() == "true")
+ return false;
+
+ return true;
+}
+
+// Trying to write a 64 bit value so need to split into two 32 bit values first,
+// and pass the lower and high parts through.
+static SDValue LowerWRITE_REGISTER(SDValue Op, SelectionDAG &DAG) {
+ SDLoc DL(Op);
+ SDValue WriteValue = Op->getOperand(2);
+
+ // This function is only supposed to be called for i64 type argument.
+ assert(WriteValue.getValueType() == MVT::i64
+ && "LowerWRITE_REGISTER called for non-i64 type argument.");
+
+ SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, WriteValue,
+ DAG.getConstant(0, DL, MVT::i32));
+ SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, WriteValue,
+ DAG.getConstant(1, DL, MVT::i32));
+ SDValue Ops[] = { Op->getOperand(0), Op->getOperand(1), Lo, Hi };
+ return DAG.getNode(ISD::WRITE_REGISTER, DL, MVT::Other, Ops);
+}
+
+// ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as
+// their target counterpart wrapped in the ARMISD::Wrapper node. Suppose N is
+// one of the above mentioned nodes. It has to be wrapped because otherwise
+// Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only
+// be used to form addressing mode. These wrapped nodes will be selected
+// into MOVi.
+static SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) {
+ EVT PtrVT = Op.getValueType();
+ // FIXME there is no actual debug info here
+ SDLoc dl(Op);
+ ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
+ SDValue Res;
+ if (CP->isMachineConstantPoolEntry())
+ Res = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT,
+ CP->getAlignment());
+ else
+ Res = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT,
+ CP->getAlignment());
+ return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Res);
+}
+
+unsigned ARMTargetLowering::getJumpTableEncoding() const {
+ return MachineJumpTableInfo::EK_Inline;
+}
+
+SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op,
+ SelectionDAG &DAG) const {
+ MachineFunction &MF = DAG.getMachineFunction();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ unsigned ARMPCLabelIndex = 0;
+ SDLoc DL(Op);
+ EVT PtrVT = getPointerTy(DAG.getDataLayout());
+ const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
+ Reloc::Model RelocM = getTargetMachine().getRelocationModel();
+ SDValue CPAddr;
+ if (RelocM == Reloc::Static) {
+ CPAddr = DAG.getTargetConstantPool(BA, PtrVT, 4);
+ } else {
+ unsigned PCAdj = Subtarget->isThumb() ? 4 : 8;
+ ARMPCLabelIndex = AFI->createPICLabelUId();
+ ARMConstantPoolValue *CPV =
+ ARMConstantPoolConstant::Create(BA, ARMPCLabelIndex,
+ ARMCP::CPBlockAddress, PCAdj);
+ CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
+ }
+ CPAddr = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, CPAddr);
+ SDValue Result =
+ DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), CPAddr,
+ MachinePointerInfo::getConstantPool(DAG.getMachineFunction()),
+ false, false, false, 0);
+ if (RelocM == Reloc::Static)
+ return Result;
+ SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, DL, MVT::i32);
+ return DAG.getNode(ARMISD::PIC_ADD, DL, PtrVT, Result, PICLabel);
+}
+
+// Lower ISD::GlobalTLSAddress using the "general dynamic" model
+SDValue
+ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
+ SelectionDAG &DAG) const {
+ SDLoc dl(GA);
+ EVT PtrVT = getPointerTy(DAG.getDataLayout());
+ unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
+ MachineFunction &MF = DAG.getMachineFunction();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
+ ARMConstantPoolValue *CPV =
+ ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex,
+ ARMCP::CPValue, PCAdj, ARMCP::TLSGD, true);
+ SDValue Argument = DAG.getTargetConstantPool(CPV, PtrVT, 4);
+ Argument = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Argument);
+ Argument =
+ DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Argument,
+ MachinePointerInfo::getConstantPool(DAG.getMachineFunction()),
+ false, false, false, 0);
+ SDValue Chain = Argument.getValue(1);
+
+ SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
+ Argument = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Argument, PICLabel);
+
+ // call __tls_get_addr.
+ ArgListTy Args;
+ ArgListEntry Entry;
+ Entry.Node = Argument;
+ Entry.Ty = (Type *) Type::getInt32Ty(*DAG.getContext());
+ Args.push_back(Entry);
+
+ // FIXME: is there useful debug info available here?
+ TargetLowering::CallLoweringInfo CLI(DAG);
+ CLI.setDebugLoc(dl).setChain(Chain)
+ .setCallee(CallingConv::C, Type::getInt32Ty(*DAG.getContext()),
+ DAG.getExternalSymbol("__tls_get_addr", PtrVT), std::move(Args),
+ 0);
+
+ std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
+ return CallResult.first;
+}
+
+// Lower ISD::GlobalTLSAddress using the "initial exec" or
+// "local exec" model.
+SDValue
+ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA,
+ SelectionDAG &DAG,
+ TLSModel::Model model) const {
+ const GlobalValue *GV = GA->getGlobal();
+ SDLoc dl(GA);
+ SDValue Offset;
+ SDValue Chain = DAG.getEntryNode();
+ EVT PtrVT = getPointerTy(DAG.getDataLayout());
+ // Get the Thread Pointer
+ SDValue ThreadPointer = DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT);
+
+ if (model == TLSModel::InitialExec) {
+ MachineFunction &MF = DAG.getMachineFunction();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
+ // Initial exec model.
+ unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
+ ARMConstantPoolValue *CPV =
+ ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex,
+ ARMCP::CPValue, PCAdj, ARMCP::GOTTPOFF,
+ true);
+ Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4);
+ Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
+ Offset = DAG.getLoad(
+ PtrVT, dl, Chain, Offset,
+ MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), false,
+ false, false, 0);
+ Chain = Offset.getValue(1);
+
+ SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
+ Offset = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Offset, PICLabel);
+
+ Offset = DAG.getLoad(
+ PtrVT, dl, Chain, Offset,
+ MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), false,
+ false, false, 0);
+ } else {
+ // local exec model
+ assert(model == TLSModel::LocalExec);
+ ARMConstantPoolValue *CPV =
+ ARMConstantPoolConstant::Create(GV, ARMCP::TPOFF);
+ Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4);
+ Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
+ Offset = DAG.getLoad(
+ PtrVT, dl, Chain, Offset,
+ MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), false,
+ false, false, 0);
+ }
+
+ // The address of the thread local variable is the add of the thread
+ // pointer with the offset of the variable.
+ return DAG.getNode(ISD::ADD, dl, PtrVT, ThreadPointer, Offset);
+}
+
+SDValue
+ARMTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
+ // TODO: implement the "local dynamic" model
+ assert(Subtarget->isTargetELF() &&
+ "TLS not implemented for non-ELF targets");
+ GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
+ if (DAG.getTarget().Options.EmulatedTLS)
+ return LowerToTLSEmulatedModel(GA, DAG);
+
+ TLSModel::Model model = getTargetMachine().getTLSModel(GA->getGlobal());
+
+ switch (model) {
+ case TLSModel::GeneralDynamic:
+ case TLSModel::LocalDynamic:
+ return LowerToTLSGeneralDynamicModel(GA, DAG);
+ case TLSModel::InitialExec:
+ case TLSModel::LocalExec:
+ return LowerToTLSExecModels(GA, DAG, model);
+ }
+ llvm_unreachable("bogus TLS model");
+}
+
+SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op,
+ SelectionDAG &DAG) const {
+ EVT PtrVT = getPointerTy(DAG.getDataLayout());
+ SDLoc dl(Op);
+ const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
+ if (getTargetMachine().getRelocationModel() == Reloc::PIC_) {
+ bool UseGOT_PREL =
+ !(GV->hasHiddenVisibility() || GV->hasLocalLinkage());
+
+ MachineFunction &MF = DAG.getMachineFunction();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
+ EVT PtrVT = getPointerTy(DAG.getDataLayout());
+ SDLoc dl(Op);
+ unsigned PCAdj = Subtarget->isThumb() ? 4 : 8;
+ ARMConstantPoolValue *CPV = ARMConstantPoolConstant::Create(
+ GV, ARMPCLabelIndex, ARMCP::CPValue, PCAdj,
+ UseGOT_PREL ? ARMCP::GOT_PREL : ARMCP::no_modifier,
+ /*AddCurrentAddress=*/UseGOT_PREL);
+ SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
+ CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
+ SDValue Result = DAG.getLoad(
+ PtrVT, dl, DAG.getEntryNode(), CPAddr,
+ MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), false,
+ false, false, 0);
+ SDValue Chain = Result.getValue(1);
+ SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
+ Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
+ if (UseGOT_PREL)
+ Result = DAG.getLoad(PtrVT, dl, Chain, Result,
+ MachinePointerInfo::getGOT(DAG.getMachineFunction()),
+ false, false, false, 0);
+ return Result;
+ }
+
+ // If we have T2 ops, we can materialize the address directly via movt/movw
+ // pair. This is always cheaper.
+ if (Subtarget->useMovt(DAG.getMachineFunction())) {
+ ++NumMovwMovt;
+ // FIXME: Once remat is capable of dealing with instructions with register
+ // operands, expand this into two nodes.
+ return DAG.getNode(ARMISD::Wrapper, dl, PtrVT,
+ DAG.getTargetGlobalAddress(GV, dl, PtrVT));
+ } else {
+ SDValue CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4);
+ CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
+ return DAG.getLoad(
+ PtrVT, dl, DAG.getEntryNode(), CPAddr,
+ MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), false,
+ false, false, 0);
+ }
+}
+
+SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op,
+ SelectionDAG &DAG) const {
+ EVT PtrVT = getPointerTy(DAG.getDataLayout());
+ SDLoc dl(Op);
+ const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
+ Reloc::Model RelocM = getTargetMachine().getRelocationModel();
+
+ if (Subtarget->useMovt(DAG.getMachineFunction()))
+ ++NumMovwMovt;
+
+ // FIXME: Once remat is capable of dealing with instructions with register
+ // operands, expand this into multiple nodes
+ unsigned Wrapper =
+ RelocM == Reloc::PIC_ ? ARMISD::WrapperPIC : ARMISD::Wrapper;
+
+ SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, ARMII::MO_NONLAZY);
+ SDValue Result = DAG.getNode(Wrapper, dl, PtrVT, G);
+
+ if (Subtarget->GVIsIndirectSymbol(GV, RelocM))
+ Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result,
+ MachinePointerInfo::getGOT(DAG.getMachineFunction()),
+ false, false, false, 0);
+ return Result;
+}
+
+SDValue ARMTargetLowering::LowerGlobalAddressWindows(SDValue Op,
+ SelectionDAG &DAG) const {
+ assert(Subtarget->isTargetWindows() && "non-Windows COFF is not supported");
+ assert(Subtarget->useMovt(DAG.getMachineFunction()) &&
+ "Windows on ARM expects to use movw/movt");
+
+ const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
+ const ARMII::TOF TargetFlags =
+ (GV->hasDLLImportStorageClass() ? ARMII::MO_DLLIMPORT : ARMII::MO_NO_FLAG);
+ EVT PtrVT = getPointerTy(DAG.getDataLayout());
+ SDValue Result;
+ SDLoc DL(Op);
+
+ ++NumMovwMovt;
+
+ // FIXME: Once remat is capable of dealing with instructions with register
+ // operands, expand this into two nodes.
+ Result = DAG.getNode(ARMISD::Wrapper, DL, PtrVT,
+ DAG.getTargetGlobalAddress(GV, DL, PtrVT, /*Offset=*/0,
+ TargetFlags));
+ if (GV->hasDLLImportStorageClass())
+ Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result,
+ MachinePointerInfo::getGOT(DAG.getMachineFunction()),
+ false, false, false, 0);
+ return Result;
+}
+
+SDValue
+ARMTargetLowering::LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const {
+ SDLoc dl(Op);
+ SDValue Val = DAG.getConstant(0, dl, MVT::i32);
+ return DAG.getNode(ARMISD::EH_SJLJ_SETJMP, dl,
+ DAG.getVTList(MVT::i32, MVT::Other), Op.getOperand(0),
+ Op.getOperand(1), Val);
+}
+
+SDValue
+ARMTargetLowering::LowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const {
+ SDLoc dl(Op);
+ return DAG.getNode(ARMISD::EH_SJLJ_LONGJMP, dl, MVT::Other, Op.getOperand(0),
+ Op.getOperand(1), DAG.getConstant(0, dl, MVT::i32));
+}
+
+SDValue ARMTargetLowering::LowerEH_SJLJ_SETUP_DISPATCH(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDLoc dl(Op);
+ return DAG.getNode(ARMISD::EH_SJLJ_SETUP_DISPATCH, dl, MVT::Other,
+ Op.getOperand(0));
+}
+
+SDValue
+ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
+ const ARMSubtarget *Subtarget) const {
+ unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+ SDLoc dl(Op);
+ switch (IntNo) {
+ default: return SDValue(); // Don't custom lower most intrinsics.
+ case Intrinsic::arm_rbit: {
+ assert(Op.getOperand(1).getValueType() == MVT::i32 &&
+ "RBIT intrinsic must have i32 type!");
+ return DAG.getNode(ISD::BITREVERSE, dl, MVT::i32, Op.getOperand(1));
+ }
+ case Intrinsic::arm_thread_pointer: {
+ EVT PtrVT = getPointerTy(DAG.getDataLayout());
+ return DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT);
+ }
+ case Intrinsic::eh_sjlj_lsda: {
+ MachineFunction &MF = DAG.getMachineFunction();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
+ EVT PtrVT = getPointerTy(DAG.getDataLayout());
+ Reloc::Model RelocM = getTargetMachine().getRelocationModel();
+ SDValue CPAddr;
+ unsigned PCAdj = (RelocM != Reloc::PIC_)
+ ? 0 : (Subtarget->isThumb() ? 4 : 8);
+ ARMConstantPoolValue *CPV =
+ ARMConstantPoolConstant::Create(MF.getFunction(), ARMPCLabelIndex,
+ ARMCP::CPLSDA, PCAdj);
+ CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
+ CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
+ SDValue Result = DAG.getLoad(
+ PtrVT, dl, DAG.getEntryNode(), CPAddr,
+ MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), false,
+ false, false, 0);
+
+ if (RelocM == Reloc::PIC_) {
+ SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
+ Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
+ }
+ return Result;
+ }
+ case Intrinsic::arm_neon_vmulls:
+ case Intrinsic::arm_neon_vmullu: {
+ unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmulls)
+ ? ARMISD::VMULLs : ARMISD::VMULLu;
+ return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
+ Op.getOperand(1), Op.getOperand(2));
+ }
+ case Intrinsic::arm_neon_vminnm:
+ case Intrinsic::arm_neon_vmaxnm: {
+ unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vminnm)
+ ? ISD::FMINNUM : ISD::FMAXNUM;
+ return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
+ Op.getOperand(1), Op.getOperand(2));
+ }
+ case Intrinsic::arm_neon_vminu:
+ case Intrinsic::arm_neon_vmaxu: {
+ if (Op.getValueType().isFloatingPoint())
+ return SDValue();
+ unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vminu)
+ ? ISD::UMIN : ISD::UMAX;
+ return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
+ Op.getOperand(1), Op.getOperand(2));
+ }
+ case Intrinsic::arm_neon_vmins:
+ case Intrinsic::arm_neon_vmaxs: {
+ // v{min,max}s is overloaded between signed integers and floats.
+ if (!Op.getValueType().isFloatingPoint()) {
+ unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmins)
+ ? ISD::SMIN : ISD::SMAX;
+ return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
+ Op.getOperand(1), Op.getOperand(2));
+ }
+ unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmins)
+ ? ISD::FMINNAN : ISD::FMAXNAN;
+ return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
+ Op.getOperand(1), Op.getOperand(2));
+ }
+ }
+}
+
+static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG,
+ const ARMSubtarget *Subtarget) {
+ // FIXME: handle "fence singlethread" more efficiently.
+ SDLoc dl(Op);
+ if (!Subtarget->hasDataBarrier()) {
+ // Some ARMv6 cpus can support data barriers with an mcr instruction.
+ // Thumb1 and pre-v6 ARM mode use a libcall instead and should never get
+ // here.
+ assert(Subtarget->hasV6Ops() && !Subtarget->isThumb() &&
+ "Unexpected ISD::ATOMIC_FENCE encountered. Should be libcall!");
+ return DAG.getNode(ARMISD::MEMBARRIER_MCR, dl, MVT::Other, Op.getOperand(0),
+ DAG.getConstant(0, dl, MVT::i32));
+ }
+
+ ConstantSDNode *OrdN = cast<ConstantSDNode>(Op.getOperand(1));
+ AtomicOrdering Ord = static_cast<AtomicOrdering>(OrdN->getZExtValue());
+ ARM_MB::MemBOpt Domain = ARM_MB::ISH;
+ if (Subtarget->isMClass()) {
+ // Only a full system barrier exists in the M-class architectures.
+ Domain = ARM_MB::SY;
+ } else if (Subtarget->isSwift() && Ord == Release) {
+ // Swift happens to implement ISHST barriers in a way that's compatible with
+ // Release semantics but weaker than ISH so we'd be fools not to use
+ // it. Beware: other processors probably don't!
+ Domain = ARM_MB::ISHST;
+ }
+
+ return DAG.getNode(ISD::INTRINSIC_VOID, dl, MVT::Other, Op.getOperand(0),
+ DAG.getConstant(Intrinsic::arm_dmb, dl, MVT::i32),
+ DAG.getConstant(Domain, dl, MVT::i32));
+}
+
+static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG,
+ const ARMSubtarget *Subtarget) {
+ // ARM pre v5TE and Thumb1 does not have preload instructions.
+ if (!(Subtarget->isThumb2() ||
+ (!Subtarget->isThumb1Only() && Subtarget->hasV5TEOps())))
+ // Just preserve the chain.
+ return Op.getOperand(0);
+
+ SDLoc dl(Op);
+ unsigned isRead = ~cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() & 1;
+ if (!isRead &&
+ (!Subtarget->hasV7Ops() || !Subtarget->hasMPExtension()))
+ // ARMv7 with MP extension has PLDW.
+ return Op.getOperand(0);
+
+ unsigned isData = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue();
+ if (Subtarget->isThumb()) {
+ // Invert the bits.
+ isRead = ~isRead & 1;
+ isData = ~isData & 1;
+ }
+
+ return DAG.getNode(ARMISD::PRELOAD, dl, MVT::Other, Op.getOperand(0),
+ Op.getOperand(1), DAG.getConstant(isRead, dl, MVT::i32),
+ DAG.getConstant(isData, dl, MVT::i32));
+}
+
+static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) {
+ MachineFunction &MF = DAG.getMachineFunction();
+ ARMFunctionInfo *FuncInfo = MF.getInfo<ARMFunctionInfo>();
+
+ // vastart just stores the address of the VarArgsFrameIndex slot into the
+ // memory location argument.
+ SDLoc dl(Op);
+ EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
+ SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
+ const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
+ return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1),
+ MachinePointerInfo(SV), false, false, 0);
+}
+
+SDValue
+ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA,
+ SDValue &Root, SelectionDAG &DAG,
+ SDLoc dl) const {
+ MachineFunction &MF = DAG.getMachineFunction();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+
+ const TargetRegisterClass *RC;
+ if (AFI->isThumb1OnlyFunction())
+ RC = &ARM::tGPRRegClass;
+ else
+ RC = &ARM::GPRRegClass;
+
+ // Transform the arguments stored in physical registers into virtual ones.
+ unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
+ SDValue ArgValue = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
+
+ SDValue ArgValue2;
+ if (NextVA.isMemLoc()) {
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ int FI = MFI->CreateFixedObject(4, NextVA.getLocMemOffset(), true);
+
+ // Create load node to retrieve arguments from the stack.
+ SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
+ ArgValue2 = DAG.getLoad(
+ MVT::i32, dl, Root, FIN,
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), false,
+ false, false, 0);
+ } else {
+ Reg = MF.addLiveIn(NextVA.getLocReg(), RC);
+ ArgValue2 = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
+ }
+ if (!Subtarget->isLittle())
+ std::swap (ArgValue, ArgValue2);
+ return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, ArgValue, ArgValue2);
+}
+
+// The remaining GPRs hold either the beginning of variable-argument
+// data, or the beginning of an aggregate passed by value (usually
+// byval). Either way, we allocate stack slots adjacent to the data
+// provided by our caller, and store the unallocated registers there.
+// If this is a variadic function, the va_list pointer will begin with
+// these values; otherwise, this reassembles a (byval) structure that
+// was split between registers and memory.
+// Return: The frame index registers were stored into.
+int
+ARMTargetLowering::StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG,
+ SDLoc dl, SDValue &Chain,
+ const Value *OrigArg,
+ unsigned InRegsParamRecordIdx,
+ int ArgOffset,
+ unsigned ArgSize) const {
+ // Currently, two use-cases possible:
+ // Case #1. Non-var-args function, and we meet first byval parameter.
+ // Setup first unallocated register as first byval register;
+ // eat all remained registers
+ // (these two actions are performed by HandleByVal method).
+ // Then, here, we initialize stack frame with
+ // "store-reg" instructions.
+ // Case #2. Var-args function, that doesn't contain byval parameters.
+ // The same: eat all remained unallocated registers,
+ // initialize stack frame.
+
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ unsigned RBegin, REnd;
+ if (InRegsParamRecordIdx < CCInfo.getInRegsParamsCount()) {
+ CCInfo.getInRegsParamInfo(InRegsParamRecordIdx, RBegin, REnd);
+ } else {
+ unsigned RBeginIdx = CCInfo.getFirstUnallocated(GPRArgRegs);
+ RBegin = RBeginIdx == 4 ? (unsigned)ARM::R4 : GPRArgRegs[RBeginIdx];
+ REnd = ARM::R4;
+ }
+
+ if (REnd != RBegin)
+ ArgOffset = -4 * (ARM::R4 - RBegin);
+
+ auto PtrVT = getPointerTy(DAG.getDataLayout());
+ int FrameIndex = MFI->CreateFixedObject(ArgSize, ArgOffset, false);
+ SDValue FIN = DAG.getFrameIndex(FrameIndex, PtrVT);
+
+ SmallVector<SDValue, 4> MemOps;
+ const TargetRegisterClass *RC =
+ AFI->isThumb1OnlyFunction() ? &ARM::tGPRRegClass : &ARM::GPRRegClass;
+
+ for (unsigned Reg = RBegin, i = 0; Reg < REnd; ++Reg, ++i) {
+ unsigned VReg = MF.addLiveIn(Reg, RC);
+ SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
+ SDValue Store =
+ DAG.getStore(Val.getValue(1), dl, Val, FIN,
+ MachinePointerInfo(OrigArg, 4 * i), false, false, 0);
+ MemOps.push_back(Store);
+ FIN = DAG.getNode(ISD::ADD, dl, PtrVT, FIN, DAG.getConstant(4, dl, PtrVT));
+ }
+
+ if (!MemOps.empty())
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
+ return FrameIndex;
+}
+
+// Setup stack frame, the va_list pointer will start from.
+void
+ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG,
+ SDLoc dl, SDValue &Chain,
+ unsigned ArgOffset,
+ unsigned TotalArgRegsSaveSize,
+ bool ForceMutable) const {
+ MachineFunction &MF = DAG.getMachineFunction();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+
+ // Try to store any remaining integer argument regs
+ // to their spots on the stack so that they may be loaded by deferencing
+ // the result of va_next.
+ // If there is no regs to be stored, just point address after last
+ // argument passed via stack.
+ int FrameIndex = StoreByValRegs(CCInfo, DAG, dl, Chain, nullptr,
+ CCInfo.getInRegsParamsCount(),
+ CCInfo.getNextStackOffset(), 4);
+ AFI->setVarArgsFrameIndex(FrameIndex);
+}
+
+SDValue
+ARMTargetLowering::LowerFormalArguments(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg>
+ &Ins,
+ SDLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals)
+ const {
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+
+ // Assign locations to all of the incoming arguments.
+ SmallVector<CCValAssign, 16> ArgLocs;
+ ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
+ *DAG.getContext(), Prologue);
+ CCInfo.AnalyzeFormalArguments(Ins,
+ CCAssignFnForNode(CallConv, /* Return*/ false,
+ isVarArg));
+
+ SmallVector<SDValue, 16> ArgValues;
+ SDValue ArgValue;
+ Function::const_arg_iterator CurOrigArg = MF.getFunction()->arg_begin();
+ unsigned CurArgIdx = 0;
+
+ // Initially ArgRegsSaveSize is zero.
+ // Then we increase this value each time we meet byval parameter.
+ // We also increase this value in case of varargs function.
+ AFI->setArgRegsSaveSize(0);
+
+ // Calculate the amount of stack space that we need to allocate to store
+ // byval and variadic arguments that are passed in registers.
+ // We need to know this before we allocate the first byval or variadic
+ // argument, as they will be allocated a stack slot below the CFA (Canonical
+ // Frame Address, the stack pointer at entry to the function).
+ unsigned ArgRegBegin = ARM::R4;
+ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+ if (CCInfo.getInRegsParamsProcessed() >= CCInfo.getInRegsParamsCount())
+ break;
+
+ CCValAssign &VA = ArgLocs[i];
+ unsigned Index = VA.getValNo();
+ ISD::ArgFlagsTy Flags = Ins[Index].Flags;
+ if (!Flags.isByVal())
+ continue;
+
+ assert(VA.isMemLoc() && "unexpected byval pointer in reg");
+ unsigned RBegin, REnd;
+ CCInfo.getInRegsParamInfo(CCInfo.getInRegsParamsProcessed(), RBegin, REnd);
+ ArgRegBegin = std::min(ArgRegBegin, RBegin);
+
+ CCInfo.nextInRegsParam();
+ }
+ CCInfo.rewindByValRegsInfo();
+
+ int lastInsIndex = -1;
+ if (isVarArg && MFI->hasVAStart()) {
+ unsigned RegIdx = CCInfo.getFirstUnallocated(GPRArgRegs);
+ if (RegIdx != array_lengthof(GPRArgRegs))
+ ArgRegBegin = std::min(ArgRegBegin, (unsigned)GPRArgRegs[RegIdx]);
+ }
+
+ unsigned TotalArgRegsSaveSize = 4 * (ARM::R4 - ArgRegBegin);
+ AFI->setArgRegsSaveSize(TotalArgRegsSaveSize);
+ auto PtrVT = getPointerTy(DAG.getDataLayout());
+
+ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+ CCValAssign &VA = ArgLocs[i];
+ if (Ins[VA.getValNo()].isOrigArg()) {
+ std::advance(CurOrigArg,
+ Ins[VA.getValNo()].getOrigArgIndex() - CurArgIdx);
+ CurArgIdx = Ins[VA.getValNo()].getOrigArgIndex();
+ }
+ // Arguments stored in registers.
+ if (VA.isRegLoc()) {
+ EVT RegVT = VA.getLocVT();
+
+ if (VA.needsCustom()) {
+ // f64 and vector types are split up into multiple registers or
+ // combinations of registers and stack slots.
+ if (VA.getLocVT() == MVT::v2f64) {
+ SDValue ArgValue1 = GetF64FormalArgument(VA, ArgLocs[++i],
+ Chain, DAG, dl);
+ VA = ArgLocs[++i]; // skip ahead to next loc
+ SDValue ArgValue2;
+ if (VA.isMemLoc()) {
+ int FI = MFI->CreateFixedObject(8, VA.getLocMemOffset(), true);
+ SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
+ ArgValue2 = DAG.getLoad(
+ MVT::f64, dl, Chain, FIN,
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI),
+ false, false, false, 0);
+ } else {
+ ArgValue2 = GetF64FormalArgument(VA, ArgLocs[++i],
+ Chain, DAG, dl);
+ }
+ ArgValue = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64);
+ ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64,
+ ArgValue, ArgValue1,
+ DAG.getIntPtrConstant(0, dl));
+ ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64,
+ ArgValue, ArgValue2,
+ DAG.getIntPtrConstant(1, dl));
+ } else
+ ArgValue = GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl);
+
+ } else {
+ const TargetRegisterClass *RC;
+
+ if (RegVT == MVT::f32)
+ RC = &ARM::SPRRegClass;
+ else if (RegVT == MVT::f64)
+ RC = &ARM::DPRRegClass;
+ else if (RegVT == MVT::v2f64)
+ RC = &ARM::QPRRegClass;
+ else if (RegVT == MVT::i32)
+ RC = AFI->isThumb1OnlyFunction() ? &ARM::tGPRRegClass
+ : &ARM::GPRRegClass;
+ else
+ llvm_unreachable("RegVT not supported by FORMAL_ARGUMENTS Lowering");
+
+ // Transform the arguments in physical registers into virtual ones.
+ unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
+ ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
+ }
+
+ // If this is an 8 or 16-bit value, it is really passed promoted
+ // to 32 bits. Insert an assert[sz]ext to capture this, then
+ // truncate to the right size.
+ switch (VA.getLocInfo()) {
+ default: llvm_unreachable("Unknown loc info!");
+ case CCValAssign::Full: break;
+ case CCValAssign::BCvt:
+ ArgValue = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), ArgValue);
+ break;
+ case CCValAssign::SExt:
+ ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue,
+ DAG.getValueType(VA.getValVT()));
+ ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
+ break;
+ case CCValAssign::ZExt:
+ ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue,
+ DAG.getValueType(VA.getValVT()));
+ ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
+ break;
+ }
+
+ InVals.push_back(ArgValue);
+
+ } else { // VA.isRegLoc()
+
+ // sanity check
+ assert(VA.isMemLoc());
+ assert(VA.getValVT() != MVT::i64 && "i64 should already be lowered");
+
+ int index = VA.getValNo();
+
+ // Some Ins[] entries become multiple ArgLoc[] entries.
+ // Process them only once.
+ if (index != lastInsIndex)
+ {
+ ISD::ArgFlagsTy Flags = Ins[index].Flags;
+ // FIXME: For now, all byval parameter objects are marked mutable.
+ // This can be changed with more analysis.
+ // In case of tail call optimization mark all arguments mutable.
+ // Since they could be overwritten by lowering of arguments in case of
+ // a tail call.
+ if (Flags.isByVal()) {
+ assert(Ins[index].isOrigArg() &&
+ "Byval arguments cannot be implicit");
+ unsigned CurByValIndex = CCInfo.getInRegsParamsProcessed();
+
+ int FrameIndex = StoreByValRegs(
+ CCInfo, DAG, dl, Chain, &*CurOrigArg, CurByValIndex,
+ VA.getLocMemOffset(), Flags.getByValSize());
+ InVals.push_back(DAG.getFrameIndex(FrameIndex, PtrVT));
+ CCInfo.nextInRegsParam();
+ } else {
+ unsigned FIOffset = VA.getLocMemOffset();
+ int FI = MFI->CreateFixedObject(VA.getLocVT().getSizeInBits()/8,
+ FIOffset, true);
+
+ // Create load nodes to retrieve arguments from the stack.
+ SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
+ InVals.push_back(DAG.getLoad(
+ VA.getValVT(), dl, Chain, FIN,
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI),
+ false, false, false, 0));
+ }
+ lastInsIndex = index;
+ }
+ }
+ }
+
+ // varargs
+ if (isVarArg && MFI->hasVAStart())
+ VarArgStyleRegisters(CCInfo, DAG, dl, Chain,
+ CCInfo.getNextStackOffset(),
+ TotalArgRegsSaveSize);
+
+ AFI->setArgumentStackSize(CCInfo.getNextStackOffset());
+
+ return Chain;
+}
+
+/// isFloatingPointZero - Return true if this is +0.0.
+static bool isFloatingPointZero(SDValue Op) {
+ if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op))
+ return CFP->getValueAPF().isPosZero();
+ else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) {
+ // Maybe this has already been legalized into the constant pool?
+ if (Op.getOperand(1).getOpcode() == ARMISD::Wrapper) {
+ SDValue WrapperOp = Op.getOperand(1).getOperand(0);
+ if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(WrapperOp))
+ if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
+ return CFP->getValueAPF().isPosZero();
+ }
+ } else if (Op->getOpcode() == ISD::BITCAST &&
+ Op->getValueType(0) == MVT::f64) {
+ // Handle (ISD::BITCAST (ARMISD::VMOVIMM (ISD::TargetConstant 0)) MVT::f64)
+ // created by LowerConstantFP().
+ SDValue BitcastOp = Op->getOperand(0);
+ if (BitcastOp->getOpcode() == ARMISD::VMOVIMM &&
+ isNullConstant(BitcastOp->getOperand(0)))
+ return true;
+ }
+ return false;
+}
+
+/// Returns appropriate ARM CMP (cmp) and corresponding condition code for
+/// the given operands.
+SDValue
+ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
+ SDValue &ARMcc, SelectionDAG &DAG,
+ SDLoc dl) const {
+ if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) {
+ unsigned C = RHSC->getZExtValue();
+ if (!isLegalICmpImmediate(C)) {
+ // Constant does not fit, try adjusting it by one?
+ switch (CC) {
+ default: break;
+ case ISD::SETLT:
+ case ISD::SETGE:
+ if (C != 0x80000000 && isLegalICmpImmediate(C-1)) {
+ CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT;
+ RHS = DAG.getConstant(C - 1, dl, MVT::i32);
+ }
+ break;
+ case ISD::SETULT:
+ case ISD::SETUGE:
+ if (C != 0 && isLegalICmpImmediate(C-1)) {
+ CC = (CC == ISD::SETULT) ? ISD::SETULE : ISD::SETUGT;
+ RHS = DAG.getConstant(C - 1, dl, MVT::i32);
+ }
+ break;
+ case ISD::SETLE:
+ case ISD::SETGT:
+ if (C != 0x7fffffff && isLegalICmpImmediate(C+1)) {
+ CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE;
+ RHS = DAG.getConstant(C + 1, dl, MVT::i32);
+ }
+ break;
+ case ISD::SETULE:
+ case ISD::SETUGT:
+ if (C != 0xffffffff && isLegalICmpImmediate(C+1)) {
+ CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
+ RHS = DAG.getConstant(C + 1, dl, MVT::i32);
+ }
+ break;
+ }
+ }
+ }
+
+ ARMCC::CondCodes CondCode = IntCCToARMCC(CC);
+ ARMISD::NodeType CompareType;
+ switch (CondCode) {
+ default:
+ CompareType = ARMISD::CMP;
+ break;
+ case ARMCC::EQ:
+ case ARMCC::NE:
+ // Uses only Z Flag
+ CompareType = ARMISD::CMPZ;
+ break;
+ }
+ ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
+ return DAG.getNode(CompareType, dl, MVT::Glue, LHS, RHS);
+}
+
+/// Returns a appropriate VFP CMP (fcmp{s|d}+fmstat) for the given operands.
+SDValue
+ARMTargetLowering::getVFPCmp(SDValue LHS, SDValue RHS, SelectionDAG &DAG,
+ SDLoc dl) const {
+ assert(!Subtarget->isFPOnlySP() || RHS.getValueType() != MVT::f64);
+ SDValue Cmp;
+ if (!isFloatingPointZero(RHS))
+ Cmp = DAG.getNode(ARMISD::CMPFP, dl, MVT::Glue, LHS, RHS);
+ else
+ Cmp = DAG.getNode(ARMISD::CMPFPw0, dl, MVT::Glue, LHS);
+ return DAG.getNode(ARMISD::FMSTAT, dl, MVT::Glue, Cmp);
+}
+
+/// duplicateCmp - Glue values can have only one use, so this function
+/// duplicates a comparison node.
+SDValue
+ARMTargetLowering::duplicateCmp(SDValue Cmp, SelectionDAG &DAG) const {
+ unsigned Opc = Cmp.getOpcode();
+ SDLoc DL(Cmp);
+ if (Opc == ARMISD::CMP || Opc == ARMISD::CMPZ)
+ return DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),Cmp.getOperand(1));
+
+ assert(Opc == ARMISD::FMSTAT && "unexpected comparison operation");
+ Cmp = Cmp.getOperand(0);
+ Opc = Cmp.getOpcode();
+ if (Opc == ARMISD::CMPFP)
+ Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),Cmp.getOperand(1));
+ else {
+ assert(Opc == ARMISD::CMPFPw0 && "unexpected operand of FMSTAT");
+ Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0));
+ }
+ return DAG.getNode(ARMISD::FMSTAT, DL, MVT::Glue, Cmp);
+}
+
+std::pair<SDValue, SDValue>
+ARMTargetLowering::getARMXALUOOp(SDValue Op, SelectionDAG &DAG,
+ SDValue &ARMcc) const {
+ assert(Op.getValueType() == MVT::i32 && "Unsupported value type");
+
+ SDValue Value, OverflowCmp;
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+ SDLoc dl(Op);
+
+ // FIXME: We are currently always generating CMPs because we don't support
+ // generating CMN through the backend. This is not as good as the natural
+ // CMP case because it causes a register dependency and cannot be folded
+ // later.
+
+ switch (Op.getOpcode()) {
+ default:
+ llvm_unreachable("Unknown overflow instruction!");
+ case ISD::SADDO:
+ ARMcc = DAG.getConstant(ARMCC::VC, dl, MVT::i32);
+ Value = DAG.getNode(ISD::ADD, dl, Op.getValueType(), LHS, RHS);
+ OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, Value, LHS);
+ break;
+ case ISD::UADDO:
+ ARMcc = DAG.getConstant(ARMCC::HS, dl, MVT::i32);
+ Value = DAG.getNode(ISD::ADD, dl, Op.getValueType(), LHS, RHS);
+ OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, Value, LHS);
+ break;
+ case ISD::SSUBO:
+ ARMcc = DAG.getConstant(ARMCC::VC, dl, MVT::i32);
+ Value = DAG.getNode(ISD::SUB, dl, Op.getValueType(), LHS, RHS);
+ OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, LHS, RHS);
+ break;
+ case ISD::USUBO:
+ ARMcc = DAG.getConstant(ARMCC::HS, dl, MVT::i32);
+ Value = DAG.getNode(ISD::SUB, dl, Op.getValueType(), LHS, RHS);
+ OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, LHS, RHS);
+ break;
+ } // switch (...)
+
+ return std::make_pair(Value, OverflowCmp);
+}
+
+
+SDValue
+ARMTargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) const {
+ // Let legalize expand this if it isn't a legal type yet.
+ if (!DAG.getTargetLoweringInfo().isTypeLegal(Op.getValueType()))
+ return SDValue();
+
+ SDValue Value, OverflowCmp;
+ SDValue ARMcc;
+ std::tie(Value, OverflowCmp) = getARMXALUOOp(Op, DAG, ARMcc);
+ SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
+ SDLoc dl(Op);
+ // We use 0 and 1 as false and true values.
+ SDValue TVal = DAG.getConstant(1, dl, MVT::i32);
+ SDValue FVal = DAG.getConstant(0, dl, MVT::i32);
+ EVT VT = Op.getValueType();
+
+ SDValue Overflow = DAG.getNode(ARMISD::CMOV, dl, VT, TVal, FVal,
+ ARMcc, CCR, OverflowCmp);
+
+ SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
+ return DAG.getNode(ISD::MERGE_VALUES, dl, VTs, Value, Overflow);
+}
+
+
+SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
+ SDValue Cond = Op.getOperand(0);
+ SDValue SelectTrue = Op.getOperand(1);
+ SDValue SelectFalse = Op.getOperand(2);
+ SDLoc dl(Op);
+ unsigned Opc = Cond.getOpcode();
+
+ if (Cond.getResNo() == 1 &&
+ (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO ||
+ Opc == ISD::USUBO)) {
+ if (!DAG.getTargetLoweringInfo().isTypeLegal(Cond->getValueType(0)))
+ return SDValue();
+
+ SDValue Value, OverflowCmp;
+ SDValue ARMcc;
+ std::tie(Value, OverflowCmp) = getARMXALUOOp(Cond, DAG, ARMcc);
+ SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
+ EVT VT = Op.getValueType();
+
+ return getCMOV(dl, VT, SelectTrue, SelectFalse, ARMcc, CCR,
+ OverflowCmp, DAG);
+ }
+
+ // Convert:
+ //
+ // (select (cmov 1, 0, cond), t, f) -> (cmov t, f, cond)
+ // (select (cmov 0, 1, cond), t, f) -> (cmov f, t, cond)
+ //
+ if (Cond.getOpcode() == ARMISD::CMOV && Cond.hasOneUse()) {
+ const ConstantSDNode *CMOVTrue =
+ dyn_cast<ConstantSDNode>(Cond.getOperand(0));
+ const ConstantSDNode *CMOVFalse =
+ dyn_cast<ConstantSDNode>(Cond.getOperand(1));
+
+ if (CMOVTrue && CMOVFalse) {
+ unsigned CMOVTrueVal = CMOVTrue->getZExtValue();
+ unsigned CMOVFalseVal = CMOVFalse->getZExtValue();
+
+ SDValue True;
+ SDValue False;
+ if (CMOVTrueVal == 1 && CMOVFalseVal == 0) {
+ True = SelectTrue;
+ False = SelectFalse;
+ } else if (CMOVTrueVal == 0 && CMOVFalseVal == 1) {
+ True = SelectFalse;
+ False = SelectTrue;
+ }
+
+ if (True.getNode() && False.getNode()) {
+ EVT VT = Op.getValueType();
+ SDValue ARMcc = Cond.getOperand(2);
+ SDValue CCR = Cond.getOperand(3);
+ SDValue Cmp = duplicateCmp(Cond.getOperand(4), DAG);
+ assert(True.getValueType() == VT);
+ return getCMOV(dl, VT, True, False, ARMcc, CCR, Cmp, DAG);
+ }
+ }
+ }
+
+ // ARM's BooleanContents value is UndefinedBooleanContent. Mask out the
+ // undefined bits before doing a full-word comparison with zero.
+ Cond = DAG.getNode(ISD::AND, dl, Cond.getValueType(), Cond,
+ DAG.getConstant(1, dl, Cond.getValueType()));
+
+ return DAG.getSelectCC(dl, Cond,
+ DAG.getConstant(0, dl, Cond.getValueType()),
+ SelectTrue, SelectFalse, ISD::SETNE);
+}
+
+static void checkVSELConstraints(ISD::CondCode CC, ARMCC::CondCodes &CondCode,
+ bool &swpCmpOps, bool &swpVselOps) {
+ // Start by selecting the GE condition code for opcodes that return true for
+ // 'equality'
+ if (CC == ISD::SETUGE || CC == ISD::SETOGE || CC == ISD::SETOLE ||
+ CC == ISD::SETULE)
+ CondCode = ARMCC::GE;
+
+ // and GT for opcodes that return false for 'equality'.
+ else if (CC == ISD::SETUGT || CC == ISD::SETOGT || CC == ISD::SETOLT ||
+ CC == ISD::SETULT)
+ CondCode = ARMCC::GT;
+
+ // Since we are constrained to GE/GT, if the opcode contains 'less', we need
+ // to swap the compare operands.
+ if (CC == ISD::SETOLE || CC == ISD::SETULE || CC == ISD::SETOLT ||
+ CC == ISD::SETULT)
+ swpCmpOps = true;
+
+ // Both GT and GE are ordered comparisons, and return false for 'unordered'.
+ // If we have an unordered opcode, we need to swap the operands to the VSEL
+ // instruction (effectively negating the condition).
+ //
+ // This also has the effect of swapping which one of 'less' or 'greater'
+ // returns true, so we also swap the compare operands. It also switches
+ // whether we return true for 'equality', so we compensate by picking the
+ // opposite condition code to our original choice.
+ if (CC == ISD::SETULE || CC == ISD::SETULT || CC == ISD::SETUGE ||
+ CC == ISD::SETUGT) {
+ swpCmpOps = !swpCmpOps;
+ swpVselOps = !swpVselOps;
+ CondCode = CondCode == ARMCC::GT ? ARMCC::GE : ARMCC::GT;
+ }
+
+ // 'ordered' is 'anything but unordered', so use the VS condition code and
+ // swap the VSEL operands.
+ if (CC == ISD::SETO) {
+ CondCode = ARMCC::VS;
+ swpVselOps = true;
+ }
+
+ // 'unordered or not equal' is 'anything but equal', so use the EQ condition
+ // code and swap the VSEL operands.
+ if (CC == ISD::SETUNE) {
+ CondCode = ARMCC::EQ;
+ swpVselOps = true;
+ }
+}
+
+SDValue ARMTargetLowering::getCMOV(SDLoc dl, EVT VT, SDValue FalseVal,
+ SDValue TrueVal, SDValue ARMcc, SDValue CCR,
+ SDValue Cmp, SelectionDAG &DAG) const {
+ if (Subtarget->isFPOnlySP() && VT == MVT::f64) {
+ FalseVal = DAG.getNode(ARMISD::VMOVRRD, dl,
+ DAG.getVTList(MVT::i32, MVT::i32), FalseVal);
+ TrueVal = DAG.getNode(ARMISD::VMOVRRD, dl,
+ DAG.getVTList(MVT::i32, MVT::i32), TrueVal);
+
+ SDValue TrueLow = TrueVal.getValue(0);
+ SDValue TrueHigh = TrueVal.getValue(1);
+ SDValue FalseLow = FalseVal.getValue(0);
+ SDValue FalseHigh = FalseVal.getValue(1);
+
+ SDValue Low = DAG.getNode(ARMISD::CMOV, dl, MVT::i32, FalseLow, TrueLow,
+ ARMcc, CCR, Cmp);
+ SDValue High = DAG.getNode(ARMISD::CMOV, dl, MVT::i32, FalseHigh, TrueHigh,
+ ARMcc, CCR, duplicateCmp(Cmp, DAG));
+
+ return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Low, High);
+ } else {
+ return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, CCR,
+ Cmp);
+ }
+}
+
+SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
+ EVT VT = Op.getValueType();
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+ ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
+ SDValue TrueVal = Op.getOperand(2);
+ SDValue FalseVal = Op.getOperand(3);
+ SDLoc dl(Op);
+
+ if (Subtarget->isFPOnlySP() && LHS.getValueType() == MVT::f64) {
+ DAG.getTargetLoweringInfo().softenSetCCOperands(DAG, MVT::f64, LHS, RHS, CC,
+ dl);
+
+ // If softenSetCCOperands only returned one value, we should compare it to
+ // zero.
+ if (!RHS.getNode()) {
+ RHS = DAG.getConstant(0, dl, LHS.getValueType());
+ CC = ISD::SETNE;
+ }
+ }
+
+ if (LHS.getValueType() == MVT::i32) {
+ // Try to generate VSEL on ARMv8.
+ // The VSEL instruction can't use all the usual ARM condition
+ // codes: it only has two bits to select the condition code, so it's
+ // constrained to use only GE, GT, VS and EQ.
+ //
+ // To implement all the various ISD::SETXXX opcodes, we sometimes need to
+ // swap the operands of the previous compare instruction (effectively
+ // inverting the compare condition, swapping 'less' and 'greater') and
+ // sometimes need to swap the operands to the VSEL (which inverts the
+ // condition in the sense of firing whenever the previous condition didn't)
+ if (Subtarget->hasFPARMv8() && (TrueVal.getValueType() == MVT::f32 ||
+ TrueVal.getValueType() == MVT::f64)) {
+ ARMCC::CondCodes CondCode = IntCCToARMCC(CC);
+ if (CondCode == ARMCC::LT || CondCode == ARMCC::LE ||
+ CondCode == ARMCC::VC || CondCode == ARMCC::NE) {
+ CC = ISD::getSetCCInverse(CC, true);
+ std::swap(TrueVal, FalseVal);
+ }
+ }
+
+ SDValue ARMcc;
+ SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
+ SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
+ return getCMOV(dl, VT, FalseVal, TrueVal, ARMcc, CCR, Cmp, DAG);
+ }
+
+ ARMCC::CondCodes CondCode, CondCode2;
+ FPCCToARMCC(CC, CondCode, CondCode2);
+
+ // Try to generate VMAXNM/VMINNM on ARMv8.
+ if (Subtarget->hasFPARMv8() && (TrueVal.getValueType() == MVT::f32 ||
+ TrueVal.getValueType() == MVT::f64)) {
+ bool swpCmpOps = false;
+ bool swpVselOps = false;
+ checkVSELConstraints(CC, CondCode, swpCmpOps, swpVselOps);
+
+ if (CondCode == ARMCC::GT || CondCode == ARMCC::GE ||
+ CondCode == ARMCC::VS || CondCode == ARMCC::EQ) {
+ if (swpCmpOps)
+ std::swap(LHS, RHS);
+ if (swpVselOps)
+ std::swap(TrueVal, FalseVal);
+ }
+ }
+
+ SDValue ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
+ SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl);
+ SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
+ SDValue Result = getCMOV(dl, VT, FalseVal, TrueVal, ARMcc, CCR, Cmp, DAG);
+ if (CondCode2 != ARMCC::AL) {
+ SDValue ARMcc2 = DAG.getConstant(CondCode2, dl, MVT::i32);
+ // FIXME: Needs another CMP because flag can have but one use.
+ SDValue Cmp2 = getVFPCmp(LHS, RHS, DAG, dl);
+ Result = getCMOV(dl, VT, Result, TrueVal, ARMcc2, CCR, Cmp2, DAG);
+ }
+ return Result;
+}
+
+/// canChangeToInt - Given the fp compare operand, return true if it is suitable
+/// to morph to an integer compare sequence.
+static bool canChangeToInt(SDValue Op, bool &SeenZero,
+ const ARMSubtarget *Subtarget) {
+ SDNode *N = Op.getNode();
+ if (!N->hasOneUse())
+ // Otherwise it requires moving the value from fp to integer registers.
+ return false;
+ if (!N->getNumValues())
+ return false;
+ EVT VT = Op.getValueType();
+ if (VT != MVT::f32 && !Subtarget->isFPBrccSlow())
+ // f32 case is generally profitable. f64 case only makes sense when vcmpe +
+ // vmrs are very slow, e.g. cortex-a8.
+ return false;
+
+ if (isFloatingPointZero(Op)) {
+ SeenZero = true;
+ return true;
+ }
+ return ISD::isNormalLoad(N);
+}
+
+static SDValue bitcastf32Toi32(SDValue Op, SelectionDAG &DAG) {
+ if (isFloatingPointZero(Op))
+ return DAG.getConstant(0, SDLoc(Op), MVT::i32);
+
+ if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op))
+ return DAG.getLoad(MVT::i32, SDLoc(Op),
+ Ld->getChain(), Ld->getBasePtr(), Ld->getPointerInfo(),
+ Ld->isVolatile(), Ld->isNonTemporal(),
+ Ld->isInvariant(), Ld->getAlignment());
+
+ llvm_unreachable("Unknown VFP cmp argument!");
+}
+
+static void expandf64Toi32(SDValue Op, SelectionDAG &DAG,
+ SDValue &RetVal1, SDValue &RetVal2) {
+ SDLoc dl(Op);
+
+ if (isFloatingPointZero(Op)) {
+ RetVal1 = DAG.getConstant(0, dl, MVT::i32);
+ RetVal2 = DAG.getConstant(0, dl, MVT::i32);
+ return;
+ }
+
+ if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op)) {
+ SDValue Ptr = Ld->getBasePtr();
+ RetVal1 = DAG.getLoad(MVT::i32, dl,
+ Ld->getChain(), Ptr,
+ Ld->getPointerInfo(),
+ Ld->isVolatile(), Ld->isNonTemporal(),
+ Ld->isInvariant(), Ld->getAlignment());
+
+ EVT PtrType = Ptr.getValueType();
+ unsigned NewAlign = MinAlign(Ld->getAlignment(), 4);
+ SDValue NewPtr = DAG.getNode(ISD::ADD, dl,
+ PtrType, Ptr, DAG.getConstant(4, dl, PtrType));
+ RetVal2 = DAG.getLoad(MVT::i32, dl,
+ Ld->getChain(), NewPtr,
+ Ld->getPointerInfo().getWithOffset(4),
+ Ld->isVolatile(), Ld->isNonTemporal(),
+ Ld->isInvariant(), NewAlign);
+ return;
+ }
+
+ llvm_unreachable("Unknown VFP cmp argument!");
+}
+
+/// OptimizeVFPBrcond - With -enable-unsafe-fp-math, it's legal to optimize some
+/// f32 and even f64 comparisons to integer ones.
+SDValue
+ARMTargetLowering::OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const {
+ SDValue Chain = Op.getOperand(0);
+ ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
+ SDValue LHS = Op.getOperand(2);
+ SDValue RHS = Op.getOperand(3);
+ SDValue Dest = Op.getOperand(4);
+ SDLoc dl(Op);
+
+ bool LHSSeenZero = false;
+ bool LHSOk = canChangeToInt(LHS, LHSSeenZero, Subtarget);
+ bool RHSSeenZero = false;
+ bool RHSOk = canChangeToInt(RHS, RHSSeenZero, Subtarget);
+ if (LHSOk && RHSOk && (LHSSeenZero || RHSSeenZero)) {
+ // If unsafe fp math optimization is enabled and there are no other uses of
+ // the CMP operands, and the condition code is EQ or NE, we can optimize it
+ // to an integer comparison.
+ if (CC == ISD::SETOEQ)
+ CC = ISD::SETEQ;
+ else if (CC == ISD::SETUNE)
+ CC = ISD::SETNE;
+
+ SDValue Mask = DAG.getConstant(0x7fffffff, dl, MVT::i32);
+ SDValue ARMcc;
+ if (LHS.getValueType() == MVT::f32) {
+ LHS = DAG.getNode(ISD::AND, dl, MVT::i32,
+ bitcastf32Toi32(LHS, DAG), Mask);
+ RHS = DAG.getNode(ISD::AND, dl, MVT::i32,
+ bitcastf32Toi32(RHS, DAG), Mask);
+ SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
+ SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
+ return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other,
+ Chain, Dest, ARMcc, CCR, Cmp);
+ }
+
+ SDValue LHS1, LHS2;
+ SDValue RHS1, RHS2;
+ expandf64Toi32(LHS, DAG, LHS1, LHS2);
+ expandf64Toi32(RHS, DAG, RHS1, RHS2);
+ LHS2 = DAG.getNode(ISD::AND, dl, MVT::i32, LHS2, Mask);
+ RHS2 = DAG.getNode(ISD::AND, dl, MVT::i32, RHS2, Mask);
+ ARMCC::CondCodes CondCode = IntCCToARMCC(CC);
+ ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
+ SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue);
+ SDValue Ops[] = { Chain, ARMcc, LHS1, LHS2, RHS1, RHS2, Dest };
+ return DAG.getNode(ARMISD::BCC_i64, dl, VTList, Ops);
+ }
+
+ return SDValue();
+}
+
+SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
+ SDValue Chain = Op.getOperand(0);
+ ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
+ SDValue LHS = Op.getOperand(2);
+ SDValue RHS = Op.getOperand(3);
+ SDValue Dest = Op.getOperand(4);
+ SDLoc dl(Op);
+
+ if (Subtarget->isFPOnlySP() && LHS.getValueType() == MVT::f64) {
+ DAG.getTargetLoweringInfo().softenSetCCOperands(DAG, MVT::f64, LHS, RHS, CC,
+ dl);
+
+ // If softenSetCCOperands only returned one value, we should compare it to
+ // zero.
+ if (!RHS.getNode()) {
+ RHS = DAG.getConstant(0, dl, LHS.getValueType());
+ CC = ISD::SETNE;
+ }
+ }
+
+ if (LHS.getValueType() == MVT::i32) {
+ SDValue ARMcc;
+ SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
+ SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
+ return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other,
+ Chain, Dest, ARMcc, CCR, Cmp);
+ }
+
+ assert(LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64);
+
+ if (getTargetMachine().Options.UnsafeFPMath &&
+ (CC == ISD::SETEQ || CC == ISD::SETOEQ ||
+ CC == ISD::SETNE || CC == ISD::SETUNE)) {
+ SDValue Result = OptimizeVFPBrcond(Op, DAG);
+ if (Result.getNode())
+ return Result;
+ }
+
+ ARMCC::CondCodes CondCode, CondCode2;
+ FPCCToARMCC(CC, CondCode, CondCode2);
+
+ SDValue ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
+ SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl);
+ SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
+ SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue);
+ SDValue Ops[] = { Chain, Dest, ARMcc, CCR, Cmp };
+ SDValue Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops);
+ if (CondCode2 != ARMCC::AL) {
+ ARMcc = DAG.getConstant(CondCode2, dl, MVT::i32);
+ SDValue Ops[] = { Res, Dest, ARMcc, CCR, Res.getValue(1) };
+ Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops);
+ }
+ return Res;
+}
+
+SDValue ARMTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) const {
+ SDValue Chain = Op.getOperand(0);
+ SDValue Table = Op.getOperand(1);
+ SDValue Index = Op.getOperand(2);
+ SDLoc dl(Op);
+
+ EVT PTy = getPointerTy(DAG.getDataLayout());
+ JumpTableSDNode *JT = cast<JumpTableSDNode>(Table);
+ SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PTy);
+ Table = DAG.getNode(ARMISD::WrapperJT, dl, MVT::i32, JTI);
+ Index = DAG.getNode(ISD::MUL, dl, PTy, Index, DAG.getConstant(4, dl, PTy));
+ SDValue Addr = DAG.getNode(ISD::ADD, dl, PTy, Index, Table);
+ if (Subtarget->isThumb2()) {
+ // Thumb2 uses a two-level jump. That is, it jumps into the jump table
+ // which does another jump to the destination. This also makes it easier
+ // to translate it to TBB / TBH later.
+ // FIXME: This might not work if the function is extremely large.
+ return DAG.getNode(ARMISD::BR2_JT, dl, MVT::Other, Chain,
+ Addr, Op.getOperand(2), JTI);
+ }
+ if (getTargetMachine().getRelocationModel() == Reloc::PIC_) {
+ Addr =
+ DAG.getLoad((EVT)MVT::i32, dl, Chain, Addr,
+ MachinePointerInfo::getJumpTable(DAG.getMachineFunction()),
+ false, false, false, 0);
+ Chain = Addr.getValue(1);
+ Addr = DAG.getNode(ISD::ADD, dl, PTy, Addr, Table);
+ return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI);
+ } else {
+ Addr =
+ DAG.getLoad(PTy, dl, Chain, Addr,
+ MachinePointerInfo::getJumpTable(DAG.getMachineFunction()),
+ false, false, false, 0);
+ Chain = Addr.getValue(1);
+ return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI);
+ }
+}
+
+static SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG) {
+ EVT VT = Op.getValueType();
+ SDLoc dl(Op);
+
+ if (Op.getValueType().getVectorElementType() == MVT::i32) {
+ if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::f32)
+ return Op;
+ return DAG.UnrollVectorOp(Op.getNode());
+ }
+
+ assert(Op.getOperand(0).getValueType() == MVT::v4f32 &&
+ "Invalid type for custom lowering!");
+ if (VT != MVT::v4i16)
+ return DAG.UnrollVectorOp(Op.getNode());
+
+ Op = DAG.getNode(Op.getOpcode(), dl, MVT::v4i32, Op.getOperand(0));
+ return DAG.getNode(ISD::TRUNCATE, dl, VT, Op);
+}
+
+SDValue ARMTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const {
+ EVT VT = Op.getValueType();
+ if (VT.isVector())
+ return LowerVectorFP_TO_INT(Op, DAG);
+ if (Subtarget->isFPOnlySP() && Op.getOperand(0).getValueType() == MVT::f64) {
+ RTLIB::Libcall LC;
+ if (Op.getOpcode() == ISD::FP_TO_SINT)
+ LC = RTLIB::getFPTOSINT(Op.getOperand(0).getValueType(),
+ Op.getValueType());
+ else
+ LC = RTLIB::getFPTOUINT(Op.getOperand(0).getValueType(),
+ Op.getValueType());
+ return makeLibCall(DAG, LC, Op.getValueType(), Op.getOperand(0),
+ /*isSigned*/ false, SDLoc(Op)).first;
+ }
+
+ return Op;
+}
+
+static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
+ EVT VT = Op.getValueType();
+ SDLoc dl(Op);
+
+ if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::i32) {
+ if (VT.getVectorElementType() == MVT::f32)
+ return Op;
+ return DAG.UnrollVectorOp(Op.getNode());
+ }
+
+ assert(Op.getOperand(0).getValueType() == MVT::v4i16 &&
+ "Invalid type for custom lowering!");
+ if (VT != MVT::v4f32)
+ return DAG.UnrollVectorOp(Op.getNode());
+
+ unsigned CastOpc;
+ unsigned Opc;
+ switch (Op.getOpcode()) {
+ default: llvm_unreachable("Invalid opcode!");
+ case ISD::SINT_TO_FP:
+ CastOpc = ISD::SIGN_EXTEND;
+ Opc = ISD::SINT_TO_FP;
+ break;
+ case ISD::UINT_TO_FP:
+ CastOpc = ISD::ZERO_EXTEND;
+ Opc = ISD::UINT_TO_FP;
+ break;
+ }
+
+ Op = DAG.getNode(CastOpc, dl, MVT::v4i32, Op.getOperand(0));
+ return DAG.getNode(Opc, dl, VT, Op);
+}
+
+SDValue ARMTargetLowering::LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const {
+ EVT VT = Op.getValueType();
+ if (VT.isVector())
+ return LowerVectorINT_TO_FP(Op, DAG);
+ if (Subtarget->isFPOnlySP() && Op.getValueType() == MVT::f64) {
+ RTLIB::Libcall LC;
+ if (Op.getOpcode() == ISD::SINT_TO_FP)
+ LC = RTLIB::getSINTTOFP(Op.getOperand(0).getValueType(),
+ Op.getValueType());
+ else
+ LC = RTLIB::getUINTTOFP(Op.getOperand(0).getValueType(),
+ Op.getValueType());
+ return makeLibCall(DAG, LC, Op.getValueType(), Op.getOperand(0),
+ /*isSigned*/ false, SDLoc(Op)).first;
+ }
+
+ return Op;
+}
+
+SDValue ARMTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const {
+ // Implement fcopysign with a fabs and a conditional fneg.
+ SDValue Tmp0 = Op.getOperand(0);
+ SDValue Tmp1 = Op.getOperand(1);
+ SDLoc dl(Op);
+ EVT VT = Op.getValueType();
+ EVT SrcVT = Tmp1.getValueType();
+ bool InGPR = Tmp0.getOpcode() == ISD::BITCAST ||
+ Tmp0.getOpcode() == ARMISD::VMOVDRR;
+ bool UseNEON = !InGPR && Subtarget->hasNEON();
+
+ if (UseNEON) {
+ // Use VBSL to copy the sign bit.
+ unsigned EncodedVal = ARM_AM::createNEONModImm(0x6, 0x80);
+ SDValue Mask = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v2i32,
+ DAG.getTargetConstant(EncodedVal, dl, MVT::i32));
+ EVT OpVT = (VT == MVT::f32) ? MVT::v2i32 : MVT::v1i64;
+ if (VT == MVT::f64)
+ Mask = DAG.getNode(ARMISD::VSHL, dl, OpVT,
+ DAG.getNode(ISD::BITCAST, dl, OpVT, Mask),
+ DAG.getConstant(32, dl, MVT::i32));
+ else /*if (VT == MVT::f32)*/
+ Tmp0 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f32, Tmp0);
+ if (SrcVT == MVT::f32) {
+ Tmp1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f32, Tmp1);
+ if (VT == MVT::f64)
+ Tmp1 = DAG.getNode(ARMISD::VSHL, dl, OpVT,
+ DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp1),
+ DAG.getConstant(32, dl, MVT::i32));
+ } else if (VT == MVT::f32)
+ Tmp1 = DAG.getNode(ARMISD::VSHRu, dl, MVT::v1i64,
+ DAG.getNode(ISD::BITCAST, dl, MVT::v1i64, Tmp1),
+ DAG.getConstant(32, dl, MVT::i32));
+ Tmp0 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp0);
+ Tmp1 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp1);
+
+ SDValue AllOnes = DAG.getTargetConstant(ARM_AM::createNEONModImm(0xe, 0xff),
+ dl, MVT::i32);
+ AllOnes = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v8i8, AllOnes);
+ SDValue MaskNot = DAG.getNode(ISD::XOR, dl, OpVT, Mask,
+ DAG.getNode(ISD::BITCAST, dl, OpVT, AllOnes));
+
+ SDValue Res = DAG.getNode(ISD::OR, dl, OpVT,
+ DAG.getNode(ISD::AND, dl, OpVT, Tmp1, Mask),
+ DAG.getNode(ISD::AND, dl, OpVT, Tmp0, MaskNot));
+ if (VT == MVT::f32) {
+ Res = DAG.getNode(ISD::BITCAST, dl, MVT::v2f32, Res);
+ Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f32, Res,
+ DAG.getConstant(0, dl, MVT::i32));
+ } else {
+ Res = DAG.getNode(ISD::BITCAST, dl, MVT::f64, Res);
+ }
+
+ return Res;
+ }
+
+ // Bitcast operand 1 to i32.
+ if (SrcVT == MVT::f64)
+ Tmp1 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32),
+ Tmp1).getValue(1);
+ Tmp1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp1);
+
+ // Or in the signbit with integer operations.
+ SDValue Mask1 = DAG.getConstant(0x80000000, dl, MVT::i32);
+ SDValue Mask2 = DAG.getConstant(0x7fffffff, dl, MVT::i32);
+ Tmp1 = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp1, Mask1);
+ if (VT == MVT::f32) {
+ Tmp0 = DAG.getNode(ISD::AND, dl, MVT::i32,
+ DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp0), Mask2);
+ return DAG.getNode(ISD::BITCAST, dl, MVT::f32,
+ DAG.getNode(ISD::OR, dl, MVT::i32, Tmp0, Tmp1));
+ }
+
+ // f64: Or the high part with signbit and then combine two parts.
+ Tmp0 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32),
+ Tmp0);
+ SDValue Lo = Tmp0.getValue(0);
+ SDValue Hi = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp0.getValue(1), Mask2);
+ Hi = DAG.getNode(ISD::OR, dl, MVT::i32, Hi, Tmp1);
+ return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
+}
+
+SDValue ARMTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const{
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ MFI->setReturnAddressIsTaken(true);
+
+ if (verifyReturnAddressArgumentIsConstant(Op, DAG))
+ return SDValue();
+
+ EVT VT = Op.getValueType();
+ SDLoc dl(Op);
+ unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+ if (Depth) {
+ SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
+ SDValue Offset = DAG.getConstant(4, dl, MVT::i32);
+ return DAG.getLoad(VT, dl, DAG.getEntryNode(),
+ DAG.getNode(ISD::ADD, dl, VT, FrameAddr, Offset),
+ MachinePointerInfo(), false, false, false, 0);
+ }
+
+ // Return LR, which contains the return address. Mark it an implicit live-in.
+ unsigned Reg = MF.addLiveIn(ARM::LR, getRegClassFor(MVT::i32));
+ return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, VT);
+}
+
+SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
+ const ARMBaseRegisterInfo &ARI =
+ *static_cast<const ARMBaseRegisterInfo*>(RegInfo);
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ MFI->setFrameAddressIsTaken(true);
+
+ EVT VT = Op.getValueType();
+ SDLoc dl(Op); // FIXME probably not meaningful
+ unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+ unsigned FrameReg = ARI.getFrameRegister(MF);
+ SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT);
+ while (Depth--)
+ FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr,
+ MachinePointerInfo(),
+ false, false, false, 0);
+ return FrameAddr;
+}
+
+// FIXME? Maybe this could be a TableGen attribute on some registers and
+// this table could be generated automatically from RegInfo.
+unsigned ARMTargetLowering::getRegisterByName(const char* RegName, EVT VT,
+ SelectionDAG &DAG) const {
+ unsigned Reg = StringSwitch<unsigned>(RegName)
+ .Case("sp", ARM::SP)
+ .Default(0);
+ if (Reg)
+ return Reg;
+ report_fatal_error(Twine("Invalid register name \""
+ + StringRef(RegName) + "\"."));
+}
+
+// Result is 64 bit value so split into two 32 bit values and return as a
+// pair of values.
+static void ExpandREAD_REGISTER(SDNode *N, SmallVectorImpl<SDValue> &Results,
+ SelectionDAG &DAG) {
+ SDLoc DL(N);
+
+ // This function is only supposed to be called for i64 type destination.
+ assert(N->getValueType(0) == MVT::i64
+ && "ExpandREAD_REGISTER called for non-i64 type result.");
+
+ SDValue Read = DAG.getNode(ISD::READ_REGISTER, DL,
+ DAG.getVTList(MVT::i32, MVT::i32, MVT::Other),
+ N->getOperand(0),
+ N->getOperand(1));
+
+ Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Read.getValue(0),
+ Read.getValue(1)));
+ Results.push_back(Read.getOperand(0));
+}
+
+/// \p BC is a bitcast that is about to be turned into a VMOVDRR.
+/// When \p DstVT, the destination type of \p BC, is on the vector
+/// register bank and the source of bitcast, \p Op, operates on the same bank,
+/// it might be possible to combine them, such that everything stays on the
+/// vector register bank.
+/// \p return The node that would replace \p BT, if the combine
+/// is possible.
+static SDValue CombineVMOVDRRCandidateWithVecOp(const SDNode *BC,
+ SelectionDAG &DAG) {
+ SDValue Op = BC->getOperand(0);
+ EVT DstVT = BC->getValueType(0);
+
+ // The only vector instruction that can produce a scalar (remember,
+ // since the bitcast was about to be turned into VMOVDRR, the source
+ // type is i64) from a vector is EXTRACT_VECTOR_ELT.
+ // Moreover, we can do this combine only if there is one use.
+ // Finally, if the destination type is not a vector, there is not
+ // much point on forcing everything on the vector bank.
+ if (!DstVT.isVector() || Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
+ !Op.hasOneUse())
+ return SDValue();
+
+ // If the index is not constant, we will introduce an additional
+ // multiply that will stick.
+ // Give up in that case.
+ ConstantSDNode *Index = dyn_cast<ConstantSDNode>(Op.getOperand(1));
+ if (!Index)
+ return SDValue();
+ unsigned DstNumElt = DstVT.getVectorNumElements();
+
+ // Compute the new index.
+ const APInt &APIntIndex = Index->getAPIntValue();
+ APInt NewIndex(APIntIndex.getBitWidth(), DstNumElt);
+ NewIndex *= APIntIndex;
+ // Check if the new constant index fits into i32.
+ if (NewIndex.getBitWidth() > 32)
+ return SDValue();
+
+ // vMTy bitcast(i64 extractelt vNi64 src, i32 index) ->
+ // vMTy extractsubvector vNxMTy (bitcast vNi64 src), i32 index*M)
+ SDLoc dl(Op);
+ SDValue ExtractSrc = Op.getOperand(0);
+ EVT VecVT = EVT::getVectorVT(
+ *DAG.getContext(), DstVT.getScalarType(),
+ ExtractSrc.getValueType().getVectorNumElements() * DstNumElt);
+ SDValue BitCast = DAG.getNode(ISD::BITCAST, dl, VecVT, ExtractSrc);
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DstVT, BitCast,
+ DAG.getConstant(NewIndex.getZExtValue(), dl, MVT::i32));
+}
+
+/// ExpandBITCAST - If the target supports VFP, this function is called to
+/// expand a bit convert where either the source or destination type is i64 to
+/// use a VMOVDRR or VMOVRRD node. This should not be done when the non-i64
+/// operand type is illegal (e.g., v2f32 for a target that doesn't support
+/// vectors), since the legalizer won't know what to do with that.
+static SDValue ExpandBITCAST(SDNode *N, SelectionDAG &DAG) {
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ SDLoc dl(N);
+ SDValue Op = N->getOperand(0);
+
+ // This function is only supposed to be called for i64 types, either as the
+ // source or destination of the bit convert.
+ EVT SrcVT = Op.getValueType();
+ EVT DstVT = N->getValueType(0);
+ assert((SrcVT == MVT::i64 || DstVT == MVT::i64) &&
+ "ExpandBITCAST called for non-i64 type");
+
+ // Turn i64->f64 into VMOVDRR.
+ if (SrcVT == MVT::i64 && TLI.isTypeLegal(DstVT)) {
+ // Do not force values to GPRs (this is what VMOVDRR does for the inputs)
+ // if we can combine the bitcast with its source.
+ if (SDValue Val = CombineVMOVDRRCandidateWithVecOp(N, DAG))
+ return Val;
+
+ SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op,
+ DAG.getConstant(0, dl, MVT::i32));
+ SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op,
+ DAG.getConstant(1, dl, MVT::i32));
+ return DAG.getNode(ISD::BITCAST, dl, DstVT,
+ DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi));
+ }
+
+ // Turn f64->i64 into VMOVRRD.
+ if (DstVT == MVT::i64 && TLI.isTypeLegal(SrcVT)) {
+ SDValue Cvt;
+ if (DAG.getDataLayout().isBigEndian() && SrcVT.isVector() &&
+ SrcVT.getVectorNumElements() > 1)
+ Cvt = DAG.getNode(ARMISD::VMOVRRD, dl,
+ DAG.getVTList(MVT::i32, MVT::i32),
+ DAG.getNode(ARMISD::VREV64, dl, SrcVT, Op));
+ else
+ Cvt = DAG.getNode(ARMISD::VMOVRRD, dl,
+ DAG.getVTList(MVT::i32, MVT::i32), Op);
+ // Merge the pieces into a single i64 value.
+ return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Cvt, Cvt.getValue(1));
+ }
+
+ return SDValue();
+}
+
+/// getZeroVector - Returns a vector of specified type with all zero elements.
+/// Zero vectors are used to represent vector negation and in those cases
+/// will be implemented with the NEON VNEG instruction. However, VNEG does
+/// not support i64 elements, so sometimes the zero vectors will need to be
+/// explicitly constructed. Regardless, use a canonical VMOV to create the
+/// zero vector.
+static SDValue getZeroVector(EVT VT, SelectionDAG &DAG, SDLoc dl) {
+ assert(VT.isVector() && "Expected a vector type");
+ // The canonical modified immediate encoding of a zero vector is....0!
+ SDValue EncodedVal = DAG.getTargetConstant(0, dl, MVT::i32);
+ EVT VmovVT = VT.is128BitVector() ? MVT::v4i32 : MVT::v2i32;
+ SDValue Vmov = DAG.getNode(ARMISD::VMOVIMM, dl, VmovVT, EncodedVal);
+ return DAG.getNode(ISD::BITCAST, dl, VT, Vmov);
+}
+
+/// LowerShiftRightParts - Lower SRA_PARTS, which returns two
+/// i32 values and take a 2 x i32 value to shift plus a shift amount.
+SDValue ARMTargetLowering::LowerShiftRightParts(SDValue Op,
+ SelectionDAG &DAG) const {
+ assert(Op.getNumOperands() == 3 && "Not a double-shift!");
+ EVT VT = Op.getValueType();
+ unsigned VTBits = VT.getSizeInBits();
+ SDLoc dl(Op);
+ SDValue ShOpLo = Op.getOperand(0);
+ SDValue ShOpHi = Op.getOperand(1);
+ SDValue ShAmt = Op.getOperand(2);
+ SDValue ARMcc;
+ unsigned Opc = (Op.getOpcode() == ISD::SRA_PARTS) ? ISD::SRA : ISD::SRL;
+
+ assert(Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS);
+
+ SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
+ DAG.getConstant(VTBits, dl, MVT::i32), ShAmt);
+ SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, ShAmt);
+ SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt,
+ DAG.getConstant(VTBits, dl, MVT::i32));
+ SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, RevShAmt);
+ SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
+ SDValue TrueVal = DAG.getNode(Opc, dl, VT, ShOpHi, ExtraShAmt);
+
+ SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
+ SDValue Cmp = getARMCmp(ExtraShAmt, DAG.getConstant(0, dl, MVT::i32),
+ ISD::SETGE, ARMcc, DAG, dl);
+ SDValue Hi = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt);
+ SDValue Lo = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc,
+ CCR, Cmp);
+
+ SDValue Ops[2] = { Lo, Hi };
+ return DAG.getMergeValues(Ops, dl);
+}
+
+/// LowerShiftLeftParts - Lower SHL_PARTS, which returns two
+/// i32 values and take a 2 x i32 value to shift plus a shift amount.
+SDValue ARMTargetLowering::LowerShiftLeftParts(SDValue Op,
+ SelectionDAG &DAG) const {
+ assert(Op.getNumOperands() == 3 && "Not a double-shift!");
+ EVT VT = Op.getValueType();
+ unsigned VTBits = VT.getSizeInBits();
+ SDLoc dl(Op);
+ SDValue ShOpLo = Op.getOperand(0);
+ SDValue ShOpHi = Op.getOperand(1);
+ SDValue ShAmt = Op.getOperand(2);
+ SDValue ARMcc;
+
+ assert(Op.getOpcode() == ISD::SHL_PARTS);
+ SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
+ DAG.getConstant(VTBits, dl, MVT::i32), ShAmt);
+ SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, RevShAmt);
+ SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt,
+ DAG.getConstant(VTBits, dl, MVT::i32));
+ SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, ShAmt);
+ SDValue Tmp3 = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ExtraShAmt);
+
+ SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
+ SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
+ SDValue Cmp = getARMCmp(ExtraShAmt, DAG.getConstant(0, dl, MVT::i32),
+ ISD::SETGE, ARMcc, DAG, dl);
+ SDValue Lo = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt);
+ SDValue Hi = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, Tmp3, ARMcc,
+ CCR, Cmp);
+
+ SDValue Ops[2] = { Lo, Hi };
+ return DAG.getMergeValues(Ops, dl);
+}
+
+SDValue ARMTargetLowering::LowerFLT_ROUNDS_(SDValue Op,
+ SelectionDAG &DAG) const {
+ // The rounding mode is in bits 23:22 of the FPSCR.
+ // The ARM rounding mode value to FLT_ROUNDS mapping is 0->1, 1->2, 2->3, 3->0
+ // The formula we use to implement this is (((FPSCR + 1 << 22) >> 22) & 3)
+ // so that the shift + and get folded into a bitfield extract.
+ SDLoc dl(Op);
+ SDValue FPSCR = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::i32,
+ DAG.getConstant(Intrinsic::arm_get_fpscr, dl,
+ MVT::i32));
+ SDValue FltRounds = DAG.getNode(ISD::ADD, dl, MVT::i32, FPSCR,
+ DAG.getConstant(1U << 22, dl, MVT::i32));
+ SDValue RMODE = DAG.getNode(ISD::SRL, dl, MVT::i32, FltRounds,
+ DAG.getConstant(22, dl, MVT::i32));
+ return DAG.getNode(ISD::AND, dl, MVT::i32, RMODE,
+ DAG.getConstant(3, dl, MVT::i32));
+}
+
+static SDValue LowerCTTZ(SDNode *N, SelectionDAG &DAG,
+ const ARMSubtarget *ST) {
+ SDLoc dl(N);
+ EVT VT = N->getValueType(0);
+ if (VT.isVector()) {
+ assert(ST->hasNEON());
+
+ // Compute the least significant set bit: LSB = X & -X
+ SDValue X = N->getOperand(0);
+ SDValue NX = DAG.getNode(ISD::SUB, dl, VT, getZeroVector(VT, DAG, dl), X);
+ SDValue LSB = DAG.getNode(ISD::AND, dl, VT, X, NX);
+
+ EVT ElemTy = VT.getVectorElementType();
+
+ if (ElemTy == MVT::i8) {
+ // Compute with: cttz(x) = ctpop(lsb - 1)
+ SDValue One = DAG.getNode(ARMISD::VMOVIMM, dl, VT,
+ DAG.getTargetConstant(1, dl, ElemTy));
+ SDValue Bits = DAG.getNode(ISD::SUB, dl, VT, LSB, One);
+ return DAG.getNode(ISD::CTPOP, dl, VT, Bits);
+ }
+
+ if ((ElemTy == MVT::i16 || ElemTy == MVT::i32) &&
+ (N->getOpcode() == ISD::CTTZ_ZERO_UNDEF)) {
+ // Compute with: cttz(x) = (width - 1) - ctlz(lsb), if x != 0
+ unsigned NumBits = ElemTy.getSizeInBits();
+ SDValue WidthMinus1 =
+ DAG.getNode(ARMISD::VMOVIMM, dl, VT,
+ DAG.getTargetConstant(NumBits - 1, dl, ElemTy));
+ SDValue CTLZ = DAG.getNode(ISD::CTLZ, dl, VT, LSB);
+ return DAG.getNode(ISD::SUB, dl, VT, WidthMinus1, CTLZ);
+ }
+
+ // Compute with: cttz(x) = ctpop(lsb - 1)
+
+ // Since we can only compute the number of bits in a byte with vcnt.8, we
+ // have to gather the result with pairwise addition (vpaddl) for i16, i32,
+ // and i64.
+
+ // Compute LSB - 1.
+ SDValue Bits;
+ if (ElemTy == MVT::i64) {
+ // Load constant 0xffff'ffff'ffff'ffff to register.
+ SDValue FF = DAG.getNode(ARMISD::VMOVIMM, dl, VT,
+ DAG.getTargetConstant(0x1eff, dl, MVT::i32));
+ Bits = DAG.getNode(ISD::ADD, dl, VT, LSB, FF);
+ } else {
+ SDValue One = DAG.getNode(ARMISD::VMOVIMM, dl, VT,
+ DAG.getTargetConstant(1, dl, ElemTy));
+ Bits = DAG.getNode(ISD::SUB, dl, VT, LSB, One);
+ }
+
+ // Count #bits with vcnt.8.
+ EVT VT8Bit = VT.is64BitVector() ? MVT::v8i8 : MVT::v16i8;
+ SDValue BitsVT8 = DAG.getNode(ISD::BITCAST, dl, VT8Bit, Bits);
+ SDValue Cnt8 = DAG.getNode(ISD::CTPOP, dl, VT8Bit, BitsVT8);
+
+ // Gather the #bits with vpaddl (pairwise add.)
+ EVT VT16Bit = VT.is64BitVector() ? MVT::v4i16 : MVT::v8i16;
+ SDValue Cnt16 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT16Bit,
+ DAG.getTargetConstant(Intrinsic::arm_neon_vpaddlu, dl, MVT::i32),
+ Cnt8);
+ if (ElemTy == MVT::i16)
+ return Cnt16;
+
+ EVT VT32Bit = VT.is64BitVector() ? MVT::v2i32 : MVT::v4i32;
+ SDValue Cnt32 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT32Bit,
+ DAG.getTargetConstant(Intrinsic::arm_neon_vpaddlu, dl, MVT::i32),
+ Cnt16);
+ if (ElemTy == MVT::i32)
+ return Cnt32;
+
+ assert(ElemTy == MVT::i64);
+ SDValue Cnt64 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+ DAG.getTargetConstant(Intrinsic::arm_neon_vpaddlu, dl, MVT::i32),
+ Cnt32);
+ return Cnt64;
+ }
+
+ if (!ST->hasV6T2Ops())
+ return SDValue();
+
+ SDValue rbit = DAG.getNode(ISD::BITREVERSE, dl, VT, N->getOperand(0));
+ return DAG.getNode(ISD::CTLZ, dl, VT, rbit);
+}
+
+/// getCTPOP16BitCounts - Returns a v8i8/v16i8 vector containing the bit-count
+/// for each 16-bit element from operand, repeated. The basic idea is to
+/// leverage vcnt to get the 8-bit counts, gather and add the results.
+///
+/// Trace for v4i16:
+/// input = [v0 v1 v2 v3 ] (vi 16-bit element)
+/// cast: N0 = [w0 w1 w2 w3 w4 w5 w6 w7] (v0 = [w0 w1], wi 8-bit element)
+/// vcnt: N1 = [b0 b1 b2 b3 b4 b5 b6 b7] (bi = bit-count of 8-bit element wi)
+/// vrev: N2 = [b1 b0 b3 b2 b5 b4 b7 b6]
+/// [b0 b1 b2 b3 b4 b5 b6 b7]
+/// +[b1 b0 b3 b2 b5 b4 b7 b6]
+/// N3=N1+N2 = [k0 k0 k1 k1 k2 k2 k3 k3] (k0 = b0+b1 = bit-count of 16-bit v0,
+/// vuzp: = [k0 k1 k2 k3 k0 k1 k2 k3] each ki is 8-bits)
+static SDValue getCTPOP16BitCounts(SDNode *N, SelectionDAG &DAG) {
+ EVT VT = N->getValueType(0);
+ SDLoc DL(N);
+
+ EVT VT8Bit = VT.is64BitVector() ? MVT::v8i8 : MVT::v16i8;
+ SDValue N0 = DAG.getNode(ISD::BITCAST, DL, VT8Bit, N->getOperand(0));
+ SDValue N1 = DAG.getNode(ISD::CTPOP, DL, VT8Bit, N0);
+ SDValue N2 = DAG.getNode(ARMISD::VREV16, DL, VT8Bit, N1);
+ SDValue N3 = DAG.getNode(ISD::ADD, DL, VT8Bit, N1, N2);
+ return DAG.getNode(ARMISD::VUZP, DL, VT8Bit, N3, N3);
+}
+
+/// lowerCTPOP16BitElements - Returns a v4i16/v8i16 vector containing the
+/// bit-count for each 16-bit element from the operand. We need slightly
+/// different sequencing for v4i16 and v8i16 to stay within NEON's available
+/// 64/128-bit registers.
+///
+/// Trace for v4i16:
+/// input = [v0 v1 v2 v3 ] (vi 16-bit element)
+/// v8i8: BitCounts = [k0 k1 k2 k3 k0 k1 k2 k3 ] (ki is the bit-count of vi)
+/// v8i16:Extended = [k0 k1 k2 k3 k0 k1 k2 k3 ]
+/// v4i16:Extracted = [k0 k1 k2 k3 ]
+static SDValue lowerCTPOP16BitElements(SDNode *N, SelectionDAG &DAG) {
+ EVT VT = N->getValueType(0);
+ SDLoc DL(N);
+
+ SDValue BitCounts = getCTPOP16BitCounts(N, DAG);
+ if (VT.is64BitVector()) {
+ SDValue Extended = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v8i16, BitCounts);
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v4i16, Extended,
+ DAG.getIntPtrConstant(0, DL));
+ } else {
+ SDValue Extracted = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v8i8,
+ BitCounts, DAG.getIntPtrConstant(0, DL));
+ return DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v8i16, Extracted);
+ }
+}
+
+/// lowerCTPOP32BitElements - Returns a v2i32/v4i32 vector containing the
+/// bit-count for each 32-bit element from the operand. The idea here is
+/// to split the vector into 16-bit elements, leverage the 16-bit count
+/// routine, and then combine the results.
+///
+/// Trace for v2i32 (v4i32 similar with Extracted/Extended exchanged):
+/// input = [v0 v1 ] (vi: 32-bit elements)
+/// Bitcast = [w0 w1 w2 w3 ] (wi: 16-bit elements, v0 = [w0 w1])
+/// Counts16 = [k0 k1 k2 k3 ] (ki: 16-bit elements, bit-count of wi)
+/// vrev: N0 = [k1 k0 k3 k2 ]
+/// [k0 k1 k2 k3 ]
+/// N1 =+[k1 k0 k3 k2 ]
+/// [k0 k2 k1 k3 ]
+/// N2 =+[k1 k3 k0 k2 ]
+/// [k0 k2 k1 k3 ]
+/// Extended =+[k1 k3 k0 k2 ]
+/// [k0 k2 ]
+/// Extracted=+[k1 k3 ]
+///
+static SDValue lowerCTPOP32BitElements(SDNode *N, SelectionDAG &DAG) {
+ EVT VT = N->getValueType(0);
+ SDLoc DL(N);
+
+ EVT VT16Bit = VT.is64BitVector() ? MVT::v4i16 : MVT::v8i16;
+
+ SDValue Bitcast = DAG.getNode(ISD::BITCAST, DL, VT16Bit, N->getOperand(0));
+ SDValue Counts16 = lowerCTPOP16BitElements(Bitcast.getNode(), DAG);
+ SDValue N0 = DAG.getNode(ARMISD::VREV32, DL, VT16Bit, Counts16);
+ SDValue N1 = DAG.getNode(ISD::ADD, DL, VT16Bit, Counts16, N0);
+ SDValue N2 = DAG.getNode(ARMISD::VUZP, DL, VT16Bit, N1, N1);
+
+ if (VT.is64BitVector()) {
+ SDValue Extended = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v4i32, N2);
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v2i32, Extended,
+ DAG.getIntPtrConstant(0, DL));
+ } else {
+ SDValue Extracted = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v4i16, N2,
+ DAG.getIntPtrConstant(0, DL));
+ return DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v4i32, Extracted);
+ }
+}
+
+static SDValue LowerCTPOP(SDNode *N, SelectionDAG &DAG,
+ const ARMSubtarget *ST) {
+ EVT VT = N->getValueType(0);
+
+ assert(ST->hasNEON() && "Custom ctpop lowering requires NEON.");
+ assert((VT == MVT::v2i32 || VT == MVT::v4i32 ||
+ VT == MVT::v4i16 || VT == MVT::v8i16) &&
+ "Unexpected type for custom ctpop lowering");
+
+ if (VT.getVectorElementType() == MVT::i32)
+ return lowerCTPOP32BitElements(N, DAG);
+ else
+ return lowerCTPOP16BitElements(N, DAG);
+}
+
+static SDValue LowerShift(SDNode *N, SelectionDAG &DAG,
+ const ARMSubtarget *ST) {
+ EVT VT = N->getValueType(0);
+ SDLoc dl(N);
+
+ if (!VT.isVector())
+ return SDValue();
+
+ // Lower vector shifts on NEON to use VSHL.
+ assert(ST->hasNEON() && "unexpected vector shift");
+
+ // Left shifts translate directly to the vshiftu intrinsic.
+ if (N->getOpcode() == ISD::SHL)
+ return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+ DAG.getConstant(Intrinsic::arm_neon_vshiftu, dl,
+ MVT::i32),
+ N->getOperand(0), N->getOperand(1));
+
+ assert((N->getOpcode() == ISD::SRA ||
+ N->getOpcode() == ISD::SRL) && "unexpected vector shift opcode");
+
+ // NEON uses the same intrinsics for both left and right shifts. For
+ // right shifts, the shift amounts are negative, so negate the vector of
+ // shift amounts.
+ EVT ShiftVT = N->getOperand(1).getValueType();
+ SDValue NegatedCount = DAG.getNode(ISD::SUB, dl, ShiftVT,
+ getZeroVector(ShiftVT, DAG, dl),
+ N->getOperand(1));
+ Intrinsic::ID vshiftInt = (N->getOpcode() == ISD::SRA ?
+ Intrinsic::arm_neon_vshifts :
+ Intrinsic::arm_neon_vshiftu);
+ return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+ DAG.getConstant(vshiftInt, dl, MVT::i32),
+ N->getOperand(0), NegatedCount);
+}
+
+static SDValue Expand64BitShift(SDNode *N, SelectionDAG &DAG,
+ const ARMSubtarget *ST) {
+ EVT VT = N->getValueType(0);
+ SDLoc dl(N);
+
+ // We can get here for a node like i32 = ISD::SHL i32, i64
+ if (VT != MVT::i64)
+ return SDValue();
+
+ assert((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) &&
+ "Unknown shift to lower!");
+
+ // We only lower SRA, SRL of 1 here, all others use generic lowering.
+ if (!isOneConstant(N->getOperand(1)))
+ return SDValue();
+
+ // If we are in thumb mode, we don't have RRX.
+ if (ST->isThumb1Only()) return SDValue();
+
+ // Okay, we have a 64-bit SRA or SRL of 1. Lower this to an RRX expr.
+ SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0),
+ DAG.getConstant(0, dl, MVT::i32));
+ SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0),
+ DAG.getConstant(1, dl, MVT::i32));
+
+ // First, build a SRA_FLAG/SRL_FLAG op, which shifts the top part by one and
+ // captures the result into a carry flag.
+ unsigned Opc = N->getOpcode() == ISD::SRL ? ARMISD::SRL_FLAG:ARMISD::SRA_FLAG;
+ Hi = DAG.getNode(Opc, dl, DAG.getVTList(MVT::i32, MVT::Glue), Hi);
+
+ // The low part is an ARMISD::RRX operand, which shifts the carry in.
+ Lo = DAG.getNode(ARMISD::RRX, dl, MVT::i32, Lo, Hi.getValue(1));
+
+ // Merge the pieces into a single i64 value.
+ return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
+}
+
+static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) {
+ SDValue TmpOp0, TmpOp1;
+ bool Invert = false;
+ bool Swap = false;
+ unsigned Opc = 0;
+
+ SDValue Op0 = Op.getOperand(0);
+ SDValue Op1 = Op.getOperand(1);
+ SDValue CC = Op.getOperand(2);
+ EVT CmpVT = Op0.getValueType().changeVectorElementTypeToInteger();
+ EVT VT = Op.getValueType();
+ ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
+ SDLoc dl(Op);
+
+ if (CmpVT.getVectorElementType() == MVT::i64)
+ // 64-bit comparisons are not legal. We've marked SETCC as non-Custom,
+ // but it's possible that our operands are 64-bit but our result is 32-bit.
+ // Bail in this case.
+ return SDValue();
+
+ if (Op1.getValueType().isFloatingPoint()) {
+ switch (SetCCOpcode) {
+ default: llvm_unreachable("Illegal FP comparison");
+ case ISD::SETUNE:
+ case ISD::SETNE: Invert = true; // Fallthrough
+ case ISD::SETOEQ:
+ case ISD::SETEQ: Opc = ARMISD::VCEQ; break;
+ case ISD::SETOLT:
+ case ISD::SETLT: Swap = true; // Fallthrough
+ case ISD::SETOGT:
+ case ISD::SETGT: Opc = ARMISD::VCGT; break;
+ case ISD::SETOLE:
+ case ISD::SETLE: Swap = true; // Fallthrough
+ case ISD::SETOGE:
+ case ISD::SETGE: Opc = ARMISD::VCGE; break;
+ case ISD::SETUGE: Swap = true; // Fallthrough
+ case ISD::SETULE: Invert = true; Opc = ARMISD::VCGT; break;
+ case ISD::SETUGT: Swap = true; // Fallthrough
+ case ISD::SETULT: Invert = true; Opc = ARMISD::VCGE; break;
+ case ISD::SETUEQ: Invert = true; // Fallthrough
+ case ISD::SETONE:
+ // Expand this to (OLT | OGT).
+ TmpOp0 = Op0;
+ TmpOp1 = Op1;
+ Opc = ISD::OR;
+ Op0 = DAG.getNode(ARMISD::VCGT, dl, CmpVT, TmpOp1, TmpOp0);
+ Op1 = DAG.getNode(ARMISD::VCGT, dl, CmpVT, TmpOp0, TmpOp1);
+ break;
+ case ISD::SETUO: Invert = true; // Fallthrough
+ case ISD::SETO:
+ // Expand this to (OLT | OGE).
+ TmpOp0 = Op0;
+ TmpOp1 = Op1;
+ Opc = ISD::OR;
+ Op0 = DAG.getNode(ARMISD::VCGT, dl, CmpVT, TmpOp1, TmpOp0);
+ Op1 = DAG.getNode(ARMISD::VCGE, dl, CmpVT, TmpOp0, TmpOp1);
+ break;
+ }
+ } else {
+ // Integer comparisons.
+ switch (SetCCOpcode) {
+ default: llvm_unreachable("Illegal integer comparison");
+ case ISD::SETNE: Invert = true;
+ case ISD::SETEQ: Opc = ARMISD::VCEQ; break;
+ case ISD::SETLT: Swap = true;
+ case ISD::SETGT: Opc = ARMISD::VCGT; break;
+ case ISD::SETLE: Swap = true;
+ case ISD::SETGE: Opc = ARMISD::VCGE; break;
+ case ISD::SETULT: Swap = true;
+ case ISD::SETUGT: Opc = ARMISD::VCGTU; break;
+ case ISD::SETULE: Swap = true;
+ case ISD::SETUGE: Opc = ARMISD::VCGEU; break;
+ }
+
+ // Detect VTST (Vector Test Bits) = icmp ne (and (op0, op1), zero).
+ if (Opc == ARMISD::VCEQ) {
+
+ SDValue AndOp;
+ if (ISD::isBuildVectorAllZeros(Op1.getNode()))
+ AndOp = Op0;
+ else if (ISD::isBuildVectorAllZeros(Op0.getNode()))
+ AndOp = Op1;
+
+ // Ignore bitconvert.
+ if (AndOp.getNode() && AndOp.getOpcode() == ISD::BITCAST)
+ AndOp = AndOp.getOperand(0);
+
+ if (AndOp.getNode() && AndOp.getOpcode() == ISD::AND) {
+ Opc = ARMISD::VTST;
+ Op0 = DAG.getNode(ISD::BITCAST, dl, CmpVT, AndOp.getOperand(0));
+ Op1 = DAG.getNode(ISD::BITCAST, dl, CmpVT, AndOp.getOperand(1));
+ Invert = !Invert;
+ }
+ }
+ }
+
+ if (Swap)
+ std::swap(Op0, Op1);
+
+ // If one of the operands is a constant vector zero, attempt to fold the
+ // comparison to a specialized compare-against-zero form.
+ SDValue SingleOp;
+ if (ISD::isBuildVectorAllZeros(Op1.getNode()))
+ SingleOp = Op0;
+ else if (ISD::isBuildVectorAllZeros(Op0.getNode())) {
+ if (Opc == ARMISD::VCGE)
+ Opc = ARMISD::VCLEZ;
+ else if (Opc == ARMISD::VCGT)
+ Opc = ARMISD::VCLTZ;
+ SingleOp = Op1;
+ }
+
+ SDValue Result;
+ if (SingleOp.getNode()) {
+ switch (Opc) {
+ case ARMISD::VCEQ:
+ Result = DAG.getNode(ARMISD::VCEQZ, dl, CmpVT, SingleOp); break;
+ case ARMISD::VCGE:
+ Result = DAG.getNode(ARMISD::VCGEZ, dl, CmpVT, SingleOp); break;
+ case ARMISD::VCLEZ:
+ Result = DAG.getNode(ARMISD::VCLEZ, dl, CmpVT, SingleOp); break;
+ case ARMISD::VCGT:
+ Result = DAG.getNode(ARMISD::VCGTZ, dl, CmpVT, SingleOp); break;
+ case ARMISD::VCLTZ:
+ Result = DAG.getNode(ARMISD::VCLTZ, dl, CmpVT, SingleOp); break;
+ default:
+ Result = DAG.getNode(Opc, dl, CmpVT, Op0, Op1);
+ }
+ } else {
+ Result = DAG.getNode(Opc, dl, CmpVT, Op0, Op1);
+ }
+
+ Result = DAG.getSExtOrTrunc(Result, dl, VT);
+
+ if (Invert)
+ Result = DAG.getNOT(dl, Result, VT);
+
+ return Result;
+}
+
+/// isNEONModifiedImm - Check if the specified splat value corresponds to a
+/// valid vector constant for a NEON instruction with a "modified immediate"
+/// operand (e.g., VMOV). If so, return the encoded value.
+static SDValue isNEONModifiedImm(uint64_t SplatBits, uint64_t SplatUndef,
+ unsigned SplatBitSize, SelectionDAG &DAG,
+ SDLoc dl, EVT &VT, bool is128Bits,
+ NEONModImmType type) {
+ unsigned OpCmode, Imm;
+
+ // SplatBitSize is set to the smallest size that splats the vector, so a
+ // zero vector will always have SplatBitSize == 8. However, NEON modified
+ // immediate instructions others than VMOV do not support the 8-bit encoding
+ // of a zero vector, and the default encoding of zero is supposed to be the
+ // 32-bit version.
+ if (SplatBits == 0)
+ SplatBitSize = 32;
+
+ switch (SplatBitSize) {
+ case 8:
+ if (type != VMOVModImm)
+ return SDValue();
+ // Any 1-byte value is OK. Op=0, Cmode=1110.
+ assert((SplatBits & ~0xff) == 0 && "one byte splat value is too big");
+ OpCmode = 0xe;
+ Imm = SplatBits;
+ VT = is128Bits ? MVT::v16i8 : MVT::v8i8;
+ break;
+
+ case 16:
+ // NEON's 16-bit VMOV supports splat values where only one byte is nonzero.
+ VT = is128Bits ? MVT::v8i16 : MVT::v4i16;
+ if ((SplatBits & ~0xff) == 0) {
+ // Value = 0x00nn: Op=x, Cmode=100x.
+ OpCmode = 0x8;
+ Imm = SplatBits;
+ break;
+ }
+ if ((SplatBits & ~0xff00) == 0) {
+ // Value = 0xnn00: Op=x, Cmode=101x.
+ OpCmode = 0xa;
+ Imm = SplatBits >> 8;
+ break;
+ }
+ return SDValue();
+
+ case 32:
+ // NEON's 32-bit VMOV supports splat values where:
+ // * only one byte is nonzero, or
+ // * the least significant byte is 0xff and the second byte is nonzero, or
+ // * the least significant 2 bytes are 0xff and the third is nonzero.
+ VT = is128Bits ? MVT::v4i32 : MVT::v2i32;
+ if ((SplatBits & ~0xff) == 0) {
+ // Value = 0x000000nn: Op=x, Cmode=000x.
+ OpCmode = 0;
+ Imm = SplatBits;
+ break;
+ }
+ if ((SplatBits & ~0xff00) == 0) {
+ // Value = 0x0000nn00: Op=x, Cmode=001x.
+ OpCmode = 0x2;
+ Imm = SplatBits >> 8;
+ break;
+ }
+ if ((SplatBits & ~0xff0000) == 0) {
+ // Value = 0x00nn0000: Op=x, Cmode=010x.
+ OpCmode = 0x4;
+ Imm = SplatBits >> 16;
+ break;
+ }
+ if ((SplatBits & ~0xff000000) == 0) {
+ // Value = 0xnn000000: Op=x, Cmode=011x.
+ OpCmode = 0x6;
+ Imm = SplatBits >> 24;
+ break;
+ }
+
+ // cmode == 0b1100 and cmode == 0b1101 are not supported for VORR or VBIC
+ if (type == OtherModImm) return SDValue();
+
+ if ((SplatBits & ~0xffff) == 0 &&
+ ((SplatBits | SplatUndef) & 0xff) == 0xff) {
+ // Value = 0x0000nnff: Op=x, Cmode=1100.
+ OpCmode = 0xc;
+ Imm = SplatBits >> 8;
+ break;
+ }
+
+ if ((SplatBits & ~0xffffff) == 0 &&
+ ((SplatBits | SplatUndef) & 0xffff) == 0xffff) {
+ // Value = 0x00nnffff: Op=x, Cmode=1101.
+ OpCmode = 0xd;
+ Imm = SplatBits >> 16;
+ break;
+ }
+
+ // Note: there are a few 32-bit splat values (specifically: 00ffff00,
+ // ff000000, ff0000ff, and ffff00ff) that are valid for VMOV.I64 but not
+ // VMOV.I32. A (very) minor optimization would be to replicate the value
+ // and fall through here to test for a valid 64-bit splat. But, then the
+ // caller would also need to check and handle the change in size.
+ return SDValue();
+
+ case 64: {
+ if (type != VMOVModImm)
+ return SDValue();
+ // NEON has a 64-bit VMOV splat where each byte is either 0 or 0xff.
+ uint64_t BitMask = 0xff;
+ uint64_t Val = 0;
+ unsigned ImmMask = 1;
+ Imm = 0;
+ for (int ByteNum = 0; ByteNum < 8; ++ByteNum) {
+ if (((SplatBits | SplatUndef) & BitMask) == BitMask) {
+ Val |= BitMask;
+ Imm |= ImmMask;
+ } else if ((SplatBits & BitMask) != 0) {
+ return SDValue();
+ }
+ BitMask <<= 8;
+ ImmMask <<= 1;
+ }
+
+ if (DAG.getDataLayout().isBigEndian())
+ // swap higher and lower 32 bit word
+ Imm = ((Imm & 0xf) << 4) | ((Imm & 0xf0) >> 4);
+
+ // Op=1, Cmode=1110.
+ OpCmode = 0x1e;
+ VT = is128Bits ? MVT::v2i64 : MVT::v1i64;
+ break;
+ }
+
+ default:
+ llvm_unreachable("unexpected size for isNEONModifiedImm");
+ }
+
+ unsigned EncodedVal = ARM_AM::createNEONModImm(OpCmode, Imm);
+ return DAG.getTargetConstant(EncodedVal, dl, MVT::i32);
+}
+
+SDValue ARMTargetLowering::LowerConstantFP(SDValue Op, SelectionDAG &DAG,
+ const ARMSubtarget *ST) const {
+ if (!ST->hasVFP3())
+ return SDValue();
+
+ bool IsDouble = Op.getValueType() == MVT::f64;
+ ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Op);
+
+ // Use the default (constant pool) lowering for double constants when we have
+ // an SP-only FPU
+ if (IsDouble && Subtarget->isFPOnlySP())
+ return SDValue();
+
+ // Try splatting with a VMOV.f32...
+ APFloat FPVal = CFP->getValueAPF();
+ int ImmVal = IsDouble ? ARM_AM::getFP64Imm(FPVal) : ARM_AM::getFP32Imm(FPVal);
+
+ if (ImmVal != -1) {
+ if (IsDouble || !ST->useNEONForSinglePrecisionFP()) {
+ // We have code in place to select a valid ConstantFP already, no need to
+ // do any mangling.
+ return Op;
+ }
+
+ // It's a float and we are trying to use NEON operations where
+ // possible. Lower it to a splat followed by an extract.
+ SDLoc DL(Op);
+ SDValue NewVal = DAG.getTargetConstant(ImmVal, DL, MVT::i32);
+ SDValue VecConstant = DAG.getNode(ARMISD::VMOVFPIMM, DL, MVT::v2f32,
+ NewVal);
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecConstant,
+ DAG.getConstant(0, DL, MVT::i32));
+ }
+
+ // The rest of our options are NEON only, make sure that's allowed before
+ // proceeding..
+ if (!ST->hasNEON() || (!IsDouble && !ST->useNEONForSinglePrecisionFP()))
+ return SDValue();
+
+ EVT VMovVT;
+ uint64_t iVal = FPVal.bitcastToAPInt().getZExtValue();
+
+ // It wouldn't really be worth bothering for doubles except for one very
+ // important value, which does happen to match: 0.0. So make sure we don't do
+ // anything stupid.
+ if (IsDouble && (iVal & 0xffffffff) != (iVal >> 32))
+ return SDValue();
+
+ // Try a VMOV.i32 (FIXME: i8, i16, or i64 could work too).
+ SDValue NewVal = isNEONModifiedImm(iVal & 0xffffffffU, 0, 32, DAG, SDLoc(Op),
+ VMovVT, false, VMOVModImm);
+ if (NewVal != SDValue()) {
+ SDLoc DL(Op);
+ SDValue VecConstant = DAG.getNode(ARMISD::VMOVIMM, DL, VMovVT,
+ NewVal);
+ if (IsDouble)
+ return DAG.getNode(ISD::BITCAST, DL, MVT::f64, VecConstant);
+
+ // It's a float: cast and extract a vector element.
+ SDValue VecFConstant = DAG.getNode(ISD::BITCAST, DL, MVT::v2f32,
+ VecConstant);
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecFConstant,
+ DAG.getConstant(0, DL, MVT::i32));
+ }
+
+ // Finally, try a VMVN.i32
+ NewVal = isNEONModifiedImm(~iVal & 0xffffffffU, 0, 32, DAG, SDLoc(Op), VMovVT,
+ false, VMVNModImm);
+ if (NewVal != SDValue()) {
+ SDLoc DL(Op);
+ SDValue VecConstant = DAG.getNode(ARMISD::VMVNIMM, DL, VMovVT, NewVal);
+
+ if (IsDouble)
+ return DAG.getNode(ISD::BITCAST, DL, MVT::f64, VecConstant);
+
+ // It's a float: cast and extract a vector element.
+ SDValue VecFConstant = DAG.getNode(ISD::BITCAST, DL, MVT::v2f32,
+ VecConstant);
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecFConstant,
+ DAG.getConstant(0, DL, MVT::i32));
+ }
+
+ return SDValue();
+}
+
+// check if an VEXT instruction can handle the shuffle mask when the
+// vector sources of the shuffle are the same.
+static bool isSingletonVEXTMask(ArrayRef<int> M, EVT VT, unsigned &Imm) {
+ unsigned NumElts = VT.getVectorNumElements();
+
+ // Assume that the first shuffle index is not UNDEF. Fail if it is.
+ if (M[0] < 0)
+ return false;
+
+ Imm = M[0];
+
+ // If this is a VEXT shuffle, the immediate value is the index of the first
+ // element. The other shuffle indices must be the successive elements after
+ // the first one.
+ unsigned ExpectedElt = Imm;
+ for (unsigned i = 1; i < NumElts; ++i) {
+ // Increment the expected index. If it wraps around, just follow it
+ // back to index zero and keep going.
+ ++ExpectedElt;
+ if (ExpectedElt == NumElts)
+ ExpectedElt = 0;
+
+ if (M[i] < 0) continue; // ignore UNDEF indices
+ if (ExpectedElt != static_cast<unsigned>(M[i]))
+ return false;
+ }
+
+ return true;
+}
+
+
+static bool isVEXTMask(ArrayRef<int> M, EVT VT,
+ bool &ReverseVEXT, unsigned &Imm) {
+ unsigned NumElts = VT.getVectorNumElements();
+ ReverseVEXT = false;
+
+ // Assume that the first shuffle index is not UNDEF. Fail if it is.
+ if (M[0] < 0)
+ return false;
+
+ Imm = M[0];
+
+ // If this is a VEXT shuffle, the immediate value is the index of the first
+ // element. The other shuffle indices must be the successive elements after
+ // the first one.
+ unsigned ExpectedElt = Imm;
+ for (unsigned i = 1; i < NumElts; ++i) {
+ // Increment the expected index. If it wraps around, it may still be
+ // a VEXT but the source vectors must be swapped.
+ ExpectedElt += 1;
+ if (ExpectedElt == NumElts * 2) {
+ ExpectedElt = 0;
+ ReverseVEXT = true;
+ }
+
+ if (M[i] < 0) continue; // ignore UNDEF indices
+ if (ExpectedElt != static_cast<unsigned>(M[i]))
+ return false;
+ }
+
+ // Adjust the index value if the source operands will be swapped.
+ if (ReverseVEXT)
+ Imm -= NumElts;
+
+ return true;
+}
+
+/// isVREVMask - Check if a vector shuffle corresponds to a VREV
+/// instruction with the specified blocksize. (The order of the elements
+/// within each block of the vector is reversed.)
+static bool isVREVMask(ArrayRef<int> M, EVT VT, unsigned BlockSize) {
+ assert((BlockSize==16 || BlockSize==32 || BlockSize==64) &&
+ "Only possible block sizes for VREV are: 16, 32, 64");
+
+ unsigned EltSz = VT.getVectorElementType().getSizeInBits();
+ if (EltSz == 64)
+ return false;
+
+ unsigned NumElts = VT.getVectorNumElements();
+ unsigned BlockElts = M[0] + 1;
+ // If the first shuffle index is UNDEF, be optimistic.
+ if (M[0] < 0)
+ BlockElts = BlockSize / EltSz;
+
+ if (BlockSize <= EltSz || BlockSize != BlockElts * EltSz)
+ return false;
+
+ for (unsigned i = 0; i < NumElts; ++i) {
+ if (M[i] < 0) continue; // ignore UNDEF indices
+ if ((unsigned) M[i] != (i - i%BlockElts) + (BlockElts - 1 - i%BlockElts))
+ return false;
+ }
+
+ return true;
+}
+
+static bool isVTBLMask(ArrayRef<int> M, EVT VT) {
+ // We can handle <8 x i8> vector shuffles. If the index in the mask is out of
+ // range, then 0 is placed into the resulting vector. So pretty much any mask
+ // of 8 elements can work here.
+ return VT == MVT::v8i8 && M.size() == 8;
+}
+
+// Checks whether the shuffle mask represents a vector transpose (VTRN) by
+// checking that pairs of elements in the shuffle mask represent the same index
+// in each vector, incrementing the expected index by 2 at each step.
+// e.g. For v1,v2 of type v4i32 a valid shuffle mask is: [0, 4, 2, 6]
+// v1={a,b,c,d} => x=shufflevector v1, v2 shufflemask => x={a,e,c,g}
+// v2={e,f,g,h}
+// WhichResult gives the offset for each element in the mask based on which
+// of the two results it belongs to.
+//
+// The transpose can be represented either as:
+// result1 = shufflevector v1, v2, result1_shuffle_mask
+// result2 = shufflevector v1, v2, result2_shuffle_mask
+// where v1/v2 and the shuffle masks have the same number of elements
+// (here WhichResult (see below) indicates which result is being checked)
+//
+// or as:
+// results = shufflevector v1, v2, shuffle_mask
+// where both results are returned in one vector and the shuffle mask has twice
+// as many elements as v1/v2 (here WhichResult will always be 0 if true) here we
+// want to check the low half and high half of the shuffle mask as if it were
+// the other case
+static bool isVTRNMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
+ unsigned EltSz = VT.getVectorElementType().getSizeInBits();
+ if (EltSz == 64)
+ return false;
+
+ unsigned NumElts = VT.getVectorNumElements();
+ if (M.size() != NumElts && M.size() != NumElts*2)
+ return false;
+
+ // If the mask is twice as long as the input vector then we need to check the
+ // upper and lower parts of the mask with a matching value for WhichResult
+ // FIXME: A mask with only even values will be rejected in case the first
+ // element is undefined, e.g. [-1, 4, 2, 6] will be rejected, because only
+ // M[0] is used to determine WhichResult
+ for (unsigned i = 0; i < M.size(); i += NumElts) {
+ if (M.size() == NumElts * 2)
+ WhichResult = i / NumElts;
+ else
+ WhichResult = M[i] == 0 ? 0 : 1;
+ for (unsigned j = 0; j < NumElts; j += 2) {
+ if ((M[i+j] >= 0 && (unsigned) M[i+j] != j + WhichResult) ||
+ (M[i+j+1] >= 0 && (unsigned) M[i+j+1] != j + NumElts + WhichResult))
+ return false;
+ }
+ }
+
+ if (M.size() == NumElts*2)
+ WhichResult = 0;
+
+ return true;
+}
+
+/// isVTRN_v_undef_Mask - Special case of isVTRNMask for canonical form of
+/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
+/// Mask is e.g., <0, 0, 2, 2> instead of <0, 4, 2, 6>.
+static bool isVTRN_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){
+ unsigned EltSz = VT.getVectorElementType().getSizeInBits();
+ if (EltSz == 64)
+ return false;
+
+ unsigned NumElts = VT.getVectorNumElements();
+ if (M.size() != NumElts && M.size() != NumElts*2)
+ return false;
+
+ for (unsigned i = 0; i < M.size(); i += NumElts) {
+ if (M.size() == NumElts * 2)
+ WhichResult = i / NumElts;
+ else
+ WhichResult = M[i] == 0 ? 0 : 1;
+ for (unsigned j = 0; j < NumElts; j += 2) {
+ if ((M[i+j] >= 0 && (unsigned) M[i+j] != j + WhichResult) ||
+ (M[i+j+1] >= 0 && (unsigned) M[i+j+1] != j + WhichResult))
+ return false;
+ }
+ }
+
+ if (M.size() == NumElts*2)
+ WhichResult = 0;
+
+ return true;
+}
+
+// Checks whether the shuffle mask represents a vector unzip (VUZP) by checking
+// that the mask elements are either all even and in steps of size 2 or all odd
+// and in steps of size 2.
+// e.g. For v1,v2 of type v4i32 a valid shuffle mask is: [0, 2, 4, 6]
+// v1={a,b,c,d} => x=shufflevector v1, v2 shufflemask => x={a,c,e,g}
+// v2={e,f,g,h}
+// Requires similar checks to that of isVTRNMask with
+// respect the how results are returned.
+static bool isVUZPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
+ unsigned EltSz = VT.getVectorElementType().getSizeInBits();
+ if (EltSz == 64)
+ return false;
+
+ unsigned NumElts = VT.getVectorNumElements();
+ if (M.size() != NumElts && M.size() != NumElts*2)
+ return false;
+
+ for (unsigned i = 0; i < M.size(); i += NumElts) {
+ WhichResult = M[i] == 0 ? 0 : 1;
+ for (unsigned j = 0; j < NumElts; ++j) {
+ if (M[i+j] >= 0 && (unsigned) M[i+j] != 2 * j + WhichResult)
+ return false;
+ }
+ }
+
+ if (M.size() == NumElts*2)
+ WhichResult = 0;
+
+ // VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
+ if (VT.is64BitVector() && EltSz == 32)
+ return false;
+
+ return true;
+}
+
+/// isVUZP_v_undef_Mask - Special case of isVUZPMask for canonical form of
+/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
+/// Mask is e.g., <0, 2, 0, 2> instead of <0, 2, 4, 6>,
+static bool isVUZP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){
+ unsigned EltSz = VT.getVectorElementType().getSizeInBits();
+ if (EltSz == 64)
+ return false;
+
+ unsigned NumElts = VT.getVectorNumElements();
+ if (M.size() != NumElts && M.size() != NumElts*2)
+ return false;
+
+ unsigned Half = NumElts / 2;
+ for (unsigned i = 0; i < M.size(); i += NumElts) {
+ WhichResult = M[i] == 0 ? 0 : 1;
+ for (unsigned j = 0; j < NumElts; j += Half) {
+ unsigned Idx = WhichResult;
+ for (unsigned k = 0; k < Half; ++k) {
+ int MIdx = M[i + j + k];
+ if (MIdx >= 0 && (unsigned) MIdx != Idx)
+ return false;
+ Idx += 2;
+ }
+ }
+ }
+
+ if (M.size() == NumElts*2)
+ WhichResult = 0;
+
+ // VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
+ if (VT.is64BitVector() && EltSz == 32)
+ return false;
+
+ return true;
+}
+
+// Checks whether the shuffle mask represents a vector zip (VZIP) by checking
+// that pairs of elements of the shufflemask represent the same index in each
+// vector incrementing sequentially through the vectors.
+// e.g. For v1,v2 of type v4i32 a valid shuffle mask is: [0, 4, 1, 5]
+// v1={a,b,c,d} => x=shufflevector v1, v2 shufflemask => x={a,e,b,f}
+// v2={e,f,g,h}
+// Requires similar checks to that of isVTRNMask with respect the how results
+// are returned.
+static bool isVZIPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
+ unsigned EltSz = VT.getVectorElementType().getSizeInBits();
+ if (EltSz == 64)
+ return false;
+
+ unsigned NumElts = VT.getVectorNumElements();
+ if (M.size() != NumElts && M.size() != NumElts*2)
+ return false;
+
+ for (unsigned i = 0; i < M.size(); i += NumElts) {
+ WhichResult = M[i] == 0 ? 0 : 1;
+ unsigned Idx = WhichResult * NumElts / 2;
+ for (unsigned j = 0; j < NumElts; j += 2) {
+ if ((M[i+j] >= 0 && (unsigned) M[i+j] != Idx) ||
+ (M[i+j+1] >= 0 && (unsigned) M[i+j+1] != Idx + NumElts))
+ return false;
+ Idx += 1;
+ }
+ }
+
+ if (M.size() == NumElts*2)
+ WhichResult = 0;
+
+ // VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
+ if (VT.is64BitVector() && EltSz == 32)
+ return false;
+
+ return true;
+}
+
+/// isVZIP_v_undef_Mask - Special case of isVZIPMask for canonical form of
+/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
+/// Mask is e.g., <0, 0, 1, 1> instead of <0, 4, 1, 5>.
+static bool isVZIP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){
+ unsigned EltSz = VT.getVectorElementType().getSizeInBits();
+ if (EltSz == 64)
+ return false;
+
+ unsigned NumElts = VT.getVectorNumElements();
+ if (M.size() != NumElts && M.size() != NumElts*2)
+ return false;
+
+ for (unsigned i = 0; i < M.size(); i += NumElts) {
+ WhichResult = M[i] == 0 ? 0 : 1;
+ unsigned Idx = WhichResult * NumElts / 2;
+ for (unsigned j = 0; j < NumElts; j += 2) {
+ if ((M[i+j] >= 0 && (unsigned) M[i+j] != Idx) ||
+ (M[i+j+1] >= 0 && (unsigned) M[i+j+1] != Idx))
+ return false;
+ Idx += 1;
+ }
+ }
+
+ if (M.size() == NumElts*2)
+ WhichResult = 0;
+
+ // VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
+ if (VT.is64BitVector() && EltSz == 32)
+ return false;
+
+ return true;
+}
+
+/// Check if \p ShuffleMask is a NEON two-result shuffle (VZIP, VUZP, VTRN),
+/// and return the corresponding ARMISD opcode if it is, or 0 if it isn't.
+static unsigned isNEONTwoResultShuffleMask(ArrayRef<int> ShuffleMask, EVT VT,
+ unsigned &WhichResult,
+ bool &isV_UNDEF) {
+ isV_UNDEF = false;
+ if (isVTRNMask(ShuffleMask, VT, WhichResult))
+ return ARMISD::VTRN;
+ if (isVUZPMask(ShuffleMask, VT, WhichResult))
+ return ARMISD::VUZP;
+ if (isVZIPMask(ShuffleMask, VT, WhichResult))
+ return ARMISD::VZIP;
+
+ isV_UNDEF = true;
+ if (isVTRN_v_undef_Mask(ShuffleMask, VT, WhichResult))
+ return ARMISD::VTRN;
+ if (isVUZP_v_undef_Mask(ShuffleMask, VT, WhichResult))
+ return ARMISD::VUZP;
+ if (isVZIP_v_undef_Mask(ShuffleMask, VT, WhichResult))
+ return ARMISD::VZIP;
+
+ return 0;
+}
+
+/// \return true if this is a reverse operation on an vector.
+static bool isReverseMask(ArrayRef<int> M, EVT VT) {
+ unsigned NumElts = VT.getVectorNumElements();
+ // Make sure the mask has the right size.
+ if (NumElts != M.size())
+ return false;
+
+ // Look for <15, ..., 3, -1, 1, 0>.
+ for (unsigned i = 0; i != NumElts; ++i)
+ if (M[i] >= 0 && M[i] != (int) (NumElts - 1 - i))
+ return false;
+
+ return true;
+}
+
+// If N is an integer constant that can be moved into a register in one
+// instruction, return an SDValue of such a constant (will become a MOV
+// instruction). Otherwise return null.
+static SDValue IsSingleInstrConstant(SDValue N, SelectionDAG &DAG,
+ const ARMSubtarget *ST, SDLoc dl) {
+ uint64_t Val;
+ if (!isa<ConstantSDNode>(N))
+ return SDValue();
+ Val = cast<ConstantSDNode>(N)->getZExtValue();
+
+ if (ST->isThumb1Only()) {
+ if (Val <= 255 || ~Val <= 255)
+ return DAG.getConstant(Val, dl, MVT::i32);
+ } else {
+ if (ARM_AM::getSOImmVal(Val) != -1 || ARM_AM::getSOImmVal(~Val) != -1)
+ return DAG.getConstant(Val, dl, MVT::i32);
+ }
+ return SDValue();
+}
+
+// If this is a case we can't handle, return null and let the default
+// expansion code take care of it.
+SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
+ const ARMSubtarget *ST) const {
+ BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Op.getNode());
+ SDLoc dl(Op);
+ EVT VT = Op.getValueType();
+
+ APInt SplatBits, SplatUndef;
+ unsigned SplatBitSize;
+ bool HasAnyUndefs;
+ if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
+ if (SplatBitSize <= 64) {
+ // Check if an immediate VMOV works.
+ EVT VmovVT;
+ SDValue Val = isNEONModifiedImm(SplatBits.getZExtValue(),
+ SplatUndef.getZExtValue(), SplatBitSize,
+ DAG, dl, VmovVT, VT.is128BitVector(),
+ VMOVModImm);
+ if (Val.getNode()) {
+ SDValue Vmov = DAG.getNode(ARMISD::VMOVIMM, dl, VmovVT, Val);
+ return DAG.getNode(ISD::BITCAST, dl, VT, Vmov);
+ }
+
+ // Try an immediate VMVN.
+ uint64_t NegatedImm = (~SplatBits).getZExtValue();
+ Val = isNEONModifiedImm(NegatedImm,
+ SplatUndef.getZExtValue(), SplatBitSize,
+ DAG, dl, VmovVT, VT.is128BitVector(),
+ VMVNModImm);
+ if (Val.getNode()) {
+ SDValue Vmov = DAG.getNode(ARMISD::VMVNIMM, dl, VmovVT, Val);
+ return DAG.getNode(ISD::BITCAST, dl, VT, Vmov);
+ }
+
+ // Use vmov.f32 to materialize other v2f32 and v4f32 splats.
+ if ((VT == MVT::v2f32 || VT == MVT::v4f32) && SplatBitSize == 32) {
+ int ImmVal = ARM_AM::getFP32Imm(SplatBits);
+ if (ImmVal != -1) {
+ SDValue Val = DAG.getTargetConstant(ImmVal, dl, MVT::i32);
+ return DAG.getNode(ARMISD::VMOVFPIMM, dl, VT, Val);
+ }
+ }
+ }
+ }
+
+ // Scan through the operands to see if only one value is used.
+ //
+ // As an optimisation, even if more than one value is used it may be more
+ // profitable to splat with one value then change some lanes.
+ //
+ // Heuristically we decide to do this if the vector has a "dominant" value,
+ // defined as splatted to more than half of the lanes.
+ unsigned NumElts = VT.getVectorNumElements();
+ bool isOnlyLowElement = true;
+ bool usesOnlyOneValue = true;
+ bool hasDominantValue = false;
+ bool isConstant = true;
+
+ // Map of the number of times a particular SDValue appears in the
+ // element list.
+ DenseMap<SDValue, unsigned> ValueCounts;
+ SDValue Value;
+ for (unsigned i = 0; i < NumElts; ++i) {
+ SDValue V = Op.getOperand(i);
+ if (V.getOpcode() == ISD::UNDEF)
+ continue;
+ if (i > 0)
+ isOnlyLowElement = false;
+ if (!isa<ConstantFPSDNode>(V) && !isa<ConstantSDNode>(V))
+ isConstant = false;
+
+ ValueCounts.insert(std::make_pair(V, 0));
+ unsigned &Count = ValueCounts[V];
+
+ // Is this value dominant? (takes up more than half of the lanes)
+ if (++Count > (NumElts / 2)) {
+ hasDominantValue = true;
+ Value = V;
+ }
+ }
+ if (ValueCounts.size() != 1)
+ usesOnlyOneValue = false;
+ if (!Value.getNode() && ValueCounts.size() > 0)
+ Value = ValueCounts.begin()->first;
+
+ if (ValueCounts.size() == 0)
+ return DAG.getUNDEF(VT);
+
+ // Loads are better lowered with insert_vector_elt/ARMISD::BUILD_VECTOR.
+ // Keep going if we are hitting this case.
+ if (isOnlyLowElement && !ISD::isNormalLoad(Value.getNode()))
+ return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value);
+
+ unsigned EltSize = VT.getVectorElementType().getSizeInBits();
+
+ // Use VDUP for non-constant splats. For f32 constant splats, reduce to
+ // i32 and try again.
+ if (hasDominantValue && EltSize <= 32) {
+ if (!isConstant) {
+ SDValue N;
+
+ // If we are VDUPing a value that comes directly from a vector, that will
+ // cause an unnecessary move to and from a GPR, where instead we could
+ // just use VDUPLANE. We can only do this if the lane being extracted
+ // is at a constant index, as the VDUP from lane instructions only have
+ // constant-index forms.
+ ConstantSDNode *constIndex;
+ if (Value->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
+ (constIndex = dyn_cast<ConstantSDNode>(Value->getOperand(1)))) {
+ // We need to create a new undef vector to use for the VDUPLANE if the
+ // size of the vector from which we get the value is different than the
+ // size of the vector that we need to create. We will insert the element
+ // such that the register coalescer will remove unnecessary copies.
+ if (VT != Value->getOperand(0).getValueType()) {
+ unsigned index = constIndex->getAPIntValue().getLimitedValue() %
+ VT.getVectorNumElements();
+ N = DAG.getNode(ARMISD::VDUPLANE, dl, VT,
+ DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, DAG.getUNDEF(VT),
+ Value, DAG.getConstant(index, dl, MVT::i32)),
+ DAG.getConstant(index, dl, MVT::i32));
+ } else
+ N = DAG.getNode(ARMISD::VDUPLANE, dl, VT,
+ Value->getOperand(0), Value->getOperand(1));
+ } else
+ N = DAG.getNode(ARMISD::VDUP, dl, VT, Value);
+
+ if (!usesOnlyOneValue) {
+ // The dominant value was splatted as 'N', but we now have to insert
+ // all differing elements.
+ for (unsigned I = 0; I < NumElts; ++I) {
+ if (Op.getOperand(I) == Value)
+ continue;
+ SmallVector<SDValue, 3> Ops;
+ Ops.push_back(N);
+ Ops.push_back(Op.getOperand(I));
+ Ops.push_back(DAG.getConstant(I, dl, MVT::i32));
+ N = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Ops);
+ }
+ }
+ return N;
+ }
+ if (VT.getVectorElementType().isFloatingPoint()) {
+ SmallVector<SDValue, 8> Ops;
+ for (unsigned i = 0; i < NumElts; ++i)
+ Ops.push_back(DAG.getNode(ISD::BITCAST, dl, MVT::i32,
+ Op.getOperand(i)));
+ EVT VecVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, NumElts);
+ SDValue Val = DAG.getNode(ISD::BUILD_VECTOR, dl, VecVT, Ops);
+ Val = LowerBUILD_VECTOR(Val, DAG, ST);
+ if (Val.getNode())
+ return DAG.getNode(ISD::BITCAST, dl, VT, Val);
+ }
+ if (usesOnlyOneValue) {
+ SDValue Val = IsSingleInstrConstant(Value, DAG, ST, dl);
+ if (isConstant && Val.getNode())
+ return DAG.getNode(ARMISD::VDUP, dl, VT, Val);
+ }
+ }
+
+ // If all elements are constants and the case above didn't get hit, fall back
+ // to the default expansion, which will generate a load from the constant
+ // pool.
+ if (isConstant)
+ return SDValue();
+
+ // Empirical tests suggest this is rarely worth it for vectors of length <= 2.
+ if (NumElts >= 4) {
+ SDValue shuffle = ReconstructShuffle(Op, DAG);
+ if (shuffle != SDValue())
+ return shuffle;
+ }
+
+ // Vectors with 32- or 64-bit elements can be built by directly assigning
+ // the subregisters. Lower it to an ARMISD::BUILD_VECTOR so the operands
+ // will be legalized.
+ if (EltSize >= 32) {
+ // Do the expansion with floating-point types, since that is what the VFP
+ // registers are defined to use, and since i64 is not legal.
+ EVT EltVT = EVT::getFloatingPointVT(EltSize);
+ EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts);
+ SmallVector<SDValue, 8> Ops;
+ for (unsigned i = 0; i < NumElts; ++i)
+ Ops.push_back(DAG.getNode(ISD::BITCAST, dl, EltVT, Op.getOperand(i)));
+ SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, Ops);
+ return DAG.getNode(ISD::BITCAST, dl, VT, Val);
+ }
+
+ // If all else fails, just use a sequence of INSERT_VECTOR_ELT when we
+ // know the default expansion would otherwise fall back on something even
+ // worse. For a vector with one or two non-undef values, that's
+ // scalar_to_vector for the elements followed by a shuffle (provided the
+ // shuffle is valid for the target) and materialization element by element
+ // on the stack followed by a load for everything else.
+ if (!isConstant && !usesOnlyOneValue) {
+ SDValue Vec = DAG.getUNDEF(VT);
+ for (unsigned i = 0 ; i < NumElts; ++i) {
+ SDValue V = Op.getOperand(i);
+ if (V.getOpcode() == ISD::UNDEF)
+ continue;
+ SDValue LaneIdx = DAG.getConstant(i, dl, MVT::i32);
+ Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Vec, V, LaneIdx);
+ }
+ return Vec;
+ }
+
+ return SDValue();
+}
+
+// Gather data to see if the operation can be modelled as a
+// shuffle in combination with VEXTs.
+SDValue ARMTargetLowering::ReconstructShuffle(SDValue Op,
+ SelectionDAG &DAG) const {
+ assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unknown opcode!");
+ SDLoc dl(Op);
+ EVT VT = Op.getValueType();
+ unsigned NumElts = VT.getVectorNumElements();
+
+ struct ShuffleSourceInfo {
+ SDValue Vec;
+ unsigned MinElt;
+ unsigned MaxElt;
+
+ // We may insert some combination of BITCASTs and VEXT nodes to force Vec to
+ // be compatible with the shuffle we intend to construct. As a result
+ // ShuffleVec will be some sliding window into the original Vec.
+ SDValue ShuffleVec;
+
+ // Code should guarantee that element i in Vec starts at element "WindowBase
+ // + i * WindowScale in ShuffleVec".
+ int WindowBase;
+ int WindowScale;
+
+ bool operator ==(SDValue OtherVec) { return Vec == OtherVec; }
+ ShuffleSourceInfo(SDValue Vec)
+ : Vec(Vec), MinElt(UINT_MAX), MaxElt(0), ShuffleVec(Vec), WindowBase(0),
+ WindowScale(1) {}
+ };
+
+ // First gather all vectors used as an immediate source for this BUILD_VECTOR
+ // node.
+ SmallVector<ShuffleSourceInfo, 2> Sources;
+ for (unsigned i = 0; i < NumElts; ++i) {
+ SDValue V = Op.getOperand(i);
+ if (V.getOpcode() == ISD::UNDEF)
+ continue;
+ else if (V.getOpcode() != ISD::EXTRACT_VECTOR_ELT) {
+ // A shuffle can only come from building a vector from various
+ // elements of other vectors.
+ return SDValue();
+ } else if (!isa<ConstantSDNode>(V.getOperand(1))) {
+ // Furthermore, shuffles require a constant mask, whereas extractelts
+ // accept variable indices.
+ return SDValue();
+ }
+
+ // Add this element source to the list if it's not already there.
+ SDValue SourceVec = V.getOperand(0);
+ auto Source = std::find(Sources.begin(), Sources.end(), SourceVec);
+ if (Source == Sources.end())
+ Source = Sources.insert(Sources.end(), ShuffleSourceInfo(SourceVec));
+
+ // Update the minimum and maximum lane number seen.
+ unsigned EltNo = cast<ConstantSDNode>(V.getOperand(1))->getZExtValue();
+ Source->MinElt = std::min(Source->MinElt, EltNo);
+ Source->MaxElt = std::max(Source->MaxElt, EltNo);
+ }
+
+ // Currently only do something sane when at most two source vectors
+ // are involved.
+ if (Sources.size() > 2)
+ return SDValue();
+
+ // Find out the smallest element size among result and two sources, and use
+ // it as element size to build the shuffle_vector.
+ EVT SmallestEltTy = VT.getVectorElementType();
+ for (auto &Source : Sources) {
+ EVT SrcEltTy = Source.Vec.getValueType().getVectorElementType();
+ if (SrcEltTy.bitsLT(SmallestEltTy))
+ SmallestEltTy = SrcEltTy;
+ }
+ unsigned ResMultiplier =
+ VT.getVectorElementType().getSizeInBits() / SmallestEltTy.getSizeInBits();
+ NumElts = VT.getSizeInBits() / SmallestEltTy.getSizeInBits();
+ EVT ShuffleVT = EVT::getVectorVT(*DAG.getContext(), SmallestEltTy, NumElts);
+
+ // If the source vector is too wide or too narrow, we may nevertheless be able
+ // to construct a compatible shuffle either by concatenating it with UNDEF or
+ // extracting a suitable range of elements.
+ for (auto &Src : Sources) {
+ EVT SrcVT = Src.ShuffleVec.getValueType();
+
+ if (SrcVT.getSizeInBits() == VT.getSizeInBits())
+ continue;
+
+ // This stage of the search produces a source with the same element type as
+ // the original, but with a total width matching the BUILD_VECTOR output.
+ EVT EltVT = SrcVT.getVectorElementType();
+ unsigned NumSrcElts = VT.getSizeInBits() / EltVT.getSizeInBits();
+ EVT DestVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumSrcElts);
+
+ if (SrcVT.getSizeInBits() < VT.getSizeInBits()) {
+ if (2 * SrcVT.getSizeInBits() != VT.getSizeInBits())
+ return SDValue();
+ // We can pad out the smaller vector for free, so if it's part of a
+ // shuffle...
+ Src.ShuffleVec =
+ DAG.getNode(ISD::CONCAT_VECTORS, dl, DestVT, Src.ShuffleVec,
+ DAG.getUNDEF(Src.ShuffleVec.getValueType()));
+ continue;
+ }
+
+ if (SrcVT.getSizeInBits() != 2 * VT.getSizeInBits())
+ return SDValue();
+
+ if (Src.MaxElt - Src.MinElt >= NumSrcElts) {
+ // Span too large for a VEXT to cope
+ return SDValue();
+ }
+
+ if (Src.MinElt >= NumSrcElts) {
+ // The extraction can just take the second half
+ Src.ShuffleVec =
+ DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
+ DAG.getConstant(NumSrcElts, dl, MVT::i32));
+ Src.WindowBase = -NumSrcElts;
+ } else if (Src.MaxElt < NumSrcElts) {
+ // The extraction can just take the first half
+ Src.ShuffleVec =
+ DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
+ DAG.getConstant(0, dl, MVT::i32));
+ } else {
+ // An actual VEXT is needed
+ SDValue VEXTSrc1 =
+ DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
+ DAG.getConstant(0, dl, MVT::i32));
+ SDValue VEXTSrc2 =
+ DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
+ DAG.getConstant(NumSrcElts, dl, MVT::i32));
+
+ Src.ShuffleVec = DAG.getNode(ARMISD::VEXT, dl, DestVT, VEXTSrc1,
+ VEXTSrc2,
+ DAG.getConstant(Src.MinElt, dl, MVT::i32));
+ Src.WindowBase = -Src.MinElt;
+ }
+ }
+
+ // Another possible incompatibility occurs from the vector element types. We
+ // can fix this by bitcasting the source vectors to the same type we intend
+ // for the shuffle.
+ for (auto &Src : Sources) {
+ EVT SrcEltTy = Src.ShuffleVec.getValueType().getVectorElementType();
+ if (SrcEltTy == SmallestEltTy)
+ continue;
+ assert(ShuffleVT.getVectorElementType() == SmallestEltTy);
+ Src.ShuffleVec = DAG.getNode(ISD::BITCAST, dl, ShuffleVT, Src.ShuffleVec);
+ Src.WindowScale = SrcEltTy.getSizeInBits() / SmallestEltTy.getSizeInBits();
+ Src.WindowBase *= Src.WindowScale;
+ }
+
+ // Final sanity check before we try to actually produce a shuffle.
+ DEBUG(
+ for (auto Src : Sources)
+ assert(Src.ShuffleVec.getValueType() == ShuffleVT);
+ );
+
+ // The stars all align, our next step is to produce the mask for the shuffle.
+ SmallVector<int, 8> Mask(ShuffleVT.getVectorNumElements(), -1);
+ int BitsPerShuffleLane = ShuffleVT.getVectorElementType().getSizeInBits();
+ for (unsigned i = 0; i < VT.getVectorNumElements(); ++i) {
+ SDValue Entry = Op.getOperand(i);
+ if (Entry.getOpcode() == ISD::UNDEF)
+ continue;
+
+ auto Src = std::find(Sources.begin(), Sources.end(), Entry.getOperand(0));
+ int EltNo = cast<ConstantSDNode>(Entry.getOperand(1))->getSExtValue();
+
+ // EXTRACT_VECTOR_ELT performs an implicit any_ext; BUILD_VECTOR an implicit
+ // trunc. So only std::min(SrcBits, DestBits) actually get defined in this
+ // segment.
+ EVT OrigEltTy = Entry.getOperand(0).getValueType().getVectorElementType();
+ int BitsDefined = std::min(OrigEltTy.getSizeInBits(),
+ VT.getVectorElementType().getSizeInBits());
+ int LanesDefined = BitsDefined / BitsPerShuffleLane;
+
+ // This source is expected to fill ResMultiplier lanes of the final shuffle,
+ // starting at the appropriate offset.
+ int *LaneMask = &Mask[i * ResMultiplier];
+
+ int ExtractBase = EltNo * Src->WindowScale + Src->WindowBase;
+ ExtractBase += NumElts * (Src - Sources.begin());
+ for (int j = 0; j < LanesDefined; ++j)
+ LaneMask[j] = ExtractBase + j;
+ }
+
+ // Final check before we try to produce nonsense...
+ if (!isShuffleMaskLegal(Mask, ShuffleVT))
+ return SDValue();
+
+ // We can't handle more than two sources. This should have already
+ // been checked before this point.
+ assert(Sources.size() <= 2 && "Too many sources!");
+
+ SDValue ShuffleOps[] = { DAG.getUNDEF(ShuffleVT), DAG.getUNDEF(ShuffleVT) };
+ for (unsigned i = 0; i < Sources.size(); ++i)
+ ShuffleOps[i] = Sources[i].ShuffleVec;
+
+ SDValue Shuffle = DAG.getVectorShuffle(ShuffleVT, dl, ShuffleOps[0],
+ ShuffleOps[1], &Mask[0]);
+ return DAG.getNode(ISD::BITCAST, dl, VT, Shuffle);
+}
+
+/// isShuffleMaskLegal - Targets can use this to indicate that they only
+/// support *some* VECTOR_SHUFFLE operations, those with specific masks.
+/// By default, if a target supports the VECTOR_SHUFFLE node, all mask values
+/// are assumed to be legal.
+bool
+ARMTargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &M,
+ EVT VT) const {
+ if (VT.getVectorNumElements() == 4 &&
+ (VT.is128BitVector() || VT.is64BitVector())) {
+ unsigned PFIndexes[4];
+ for (unsigned i = 0; i != 4; ++i) {
+ if (M[i] < 0)
+ PFIndexes[i] = 8;
+ else
+ PFIndexes[i] = M[i];
+ }
+
+ // Compute the index in the perfect shuffle table.
+ unsigned PFTableIndex =
+ PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];
+ unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
+ unsigned Cost = (PFEntry >> 30);
+
+ if (Cost <= 4)
+ return true;
+ }
+
+ bool ReverseVEXT, isV_UNDEF;
+ unsigned Imm, WhichResult;
+
+ unsigned EltSize = VT.getVectorElementType().getSizeInBits();
+ return (EltSize >= 32 ||
+ ShuffleVectorSDNode::isSplatMask(&M[0], VT) ||
+ isVREVMask(M, VT, 64) ||
+ isVREVMask(M, VT, 32) ||
+ isVREVMask(M, VT, 16) ||
+ isVEXTMask(M, VT, ReverseVEXT, Imm) ||
+ isVTBLMask(M, VT) ||
+ isNEONTwoResultShuffleMask(M, VT, WhichResult, isV_UNDEF) ||
+ ((VT == MVT::v8i16 || VT == MVT::v16i8) && isReverseMask(M, VT)));
+}
+
+/// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
+/// the specified operations to build the shuffle.
+static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
+ SDValue RHS, SelectionDAG &DAG,
+ SDLoc dl) {
+ unsigned OpNum = (PFEntry >> 26) & 0x0F;
+ unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
+ unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1);
+
+ enum {
+ OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
+ OP_VREV,
+ OP_VDUP0,
+ OP_VDUP1,
+ OP_VDUP2,
+ OP_VDUP3,
+ OP_VEXT1,
+ OP_VEXT2,
+ OP_VEXT3,
+ OP_VUZPL, // VUZP, left result
+ OP_VUZPR, // VUZP, right result
+ OP_VZIPL, // VZIP, left result
+ OP_VZIPR, // VZIP, right result
+ OP_VTRNL, // VTRN, left result
+ OP_VTRNR // VTRN, right result
+ };
+
+ if (OpNum == OP_COPY) {
+ if (LHSID == (1*9+2)*9+3) return LHS;
+ assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!");
+ return RHS;
+ }
+
+ SDValue OpLHS, OpRHS;
+ OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);
+ OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl);
+ EVT VT = OpLHS.getValueType();
+
+ switch (OpNum) {
+ default: llvm_unreachable("Unknown shuffle opcode!");
+ case OP_VREV:
+ // VREV divides the vector in half and swaps within the half.
+ if (VT.getVectorElementType() == MVT::i32 ||
+ VT.getVectorElementType() == MVT::f32)
+ return DAG.getNode(ARMISD::VREV64, dl, VT, OpLHS);
+ // vrev <4 x i16> -> VREV32
+ if (VT.getVectorElementType() == MVT::i16)
+ return DAG.getNode(ARMISD::VREV32, dl, VT, OpLHS);
+ // vrev <4 x i8> -> VREV16
+ assert(VT.getVectorElementType() == MVT::i8);
+ return DAG.getNode(ARMISD::VREV16, dl, VT, OpLHS);
+ case OP_VDUP0:
+ case OP_VDUP1:
+ case OP_VDUP2:
+ case OP_VDUP3:
+ return DAG.getNode(ARMISD::VDUPLANE, dl, VT,
+ OpLHS, DAG.getConstant(OpNum-OP_VDUP0, dl, MVT::i32));
+ case OP_VEXT1:
+ case OP_VEXT2:
+ case OP_VEXT3:
+ return DAG.getNode(ARMISD::VEXT, dl, VT,
+ OpLHS, OpRHS,
+ DAG.getConstant(OpNum - OP_VEXT1 + 1, dl, MVT::i32));
+ case OP_VUZPL:
+ case OP_VUZPR:
+ return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT),
+ OpLHS, OpRHS).getValue(OpNum-OP_VUZPL);
+ case OP_VZIPL:
+ case OP_VZIPR:
+ return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT),
+ OpLHS, OpRHS).getValue(OpNum-OP_VZIPL);
+ case OP_VTRNL:
+ case OP_VTRNR:
+ return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT),
+ OpLHS, OpRHS).getValue(OpNum-OP_VTRNL);
+ }
+}
+
+static SDValue LowerVECTOR_SHUFFLEv8i8(SDValue Op,
+ ArrayRef<int> ShuffleMask,
+ SelectionDAG &DAG) {
+ // Check to see if we can use the VTBL instruction.
+ SDValue V1 = Op.getOperand(0);
+ SDValue V2 = Op.getOperand(1);
+ SDLoc DL(Op);
+
+ SmallVector<SDValue, 8> VTBLMask;
+ for (ArrayRef<int>::iterator
+ I = ShuffleMask.begin(), E = ShuffleMask.end(); I != E; ++I)
+ VTBLMask.push_back(DAG.getConstant(*I, DL, MVT::i32));
+
+ if (V2.getNode()->getOpcode() == ISD::UNDEF)
+ return DAG.getNode(ARMISD::VTBL1, DL, MVT::v8i8, V1,
+ DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v8i8, VTBLMask));
+
+ return DAG.getNode(ARMISD::VTBL2, DL, MVT::v8i8, V1, V2,
+ DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v8i8, VTBLMask));
+}
+
+static SDValue LowerReverse_VECTOR_SHUFFLEv16i8_v8i16(SDValue Op,
+ SelectionDAG &DAG) {
+ SDLoc DL(Op);
+ SDValue OpLHS = Op.getOperand(0);
+ EVT VT = OpLHS.getValueType();
+
+ assert((VT == MVT::v8i16 || VT == MVT::v16i8) &&
+ "Expect an v8i16/v16i8 type");
+ OpLHS = DAG.getNode(ARMISD::VREV64, DL, VT, OpLHS);
+ // For a v16i8 type: After the VREV, we have got <8, ...15, 8, ..., 0>. Now,
+ // extract the first 8 bytes into the top double word and the last 8 bytes
+ // into the bottom double word. The v8i16 case is similar.
+ unsigned ExtractNum = (VT == MVT::v16i8) ? 8 : 4;
+ return DAG.getNode(ARMISD::VEXT, DL, VT, OpLHS, OpLHS,
+ DAG.getConstant(ExtractNum, DL, MVT::i32));
+}
+
+static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
+ SDValue V1 = Op.getOperand(0);
+ SDValue V2 = Op.getOperand(1);
+ SDLoc dl(Op);
+ EVT VT = Op.getValueType();
+ ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
+
+ // Convert shuffles that are directly supported on NEON to target-specific
+ // DAG nodes, instead of keeping them as shuffles and matching them again
+ // during code selection. This is more efficient and avoids the possibility
+ // of inconsistencies between legalization and selection.
+ // FIXME: floating-point vectors should be canonicalized to integer vectors
+ // of the same time so that they get CSEd properly.
+ ArrayRef<int> ShuffleMask = SVN->getMask();
+
+ unsigned EltSize = VT.getVectorElementType().getSizeInBits();
+ if (EltSize <= 32) {
+ if (ShuffleVectorSDNode::isSplatMask(&ShuffleMask[0], VT)) {
+ int Lane = SVN->getSplatIndex();
+ // If this is undef splat, generate it via "just" vdup, if possible.
+ if (Lane == -1) Lane = 0;
+
+ // Test if V1 is a SCALAR_TO_VECTOR.
+ if (Lane == 0 && V1.getOpcode() == ISD::SCALAR_TO_VECTOR) {
+ return DAG.getNode(ARMISD::VDUP, dl, VT, V1.getOperand(0));
+ }
+ // Test if V1 is a BUILD_VECTOR which is equivalent to a SCALAR_TO_VECTOR
+ // (and probably will turn into a SCALAR_TO_VECTOR once legalization
+ // reaches it).
+ if (Lane == 0 && V1.getOpcode() == ISD::BUILD_VECTOR &&
+ !isa<ConstantSDNode>(V1.getOperand(0))) {
+ bool IsScalarToVector = true;
+ for (unsigned i = 1, e = V1.getNumOperands(); i != e; ++i)
+ if (V1.getOperand(i).getOpcode() != ISD::UNDEF) {
+ IsScalarToVector = false;
+ break;
+ }
+ if (IsScalarToVector)
+ return DAG.getNode(ARMISD::VDUP, dl, VT, V1.getOperand(0));
+ }
+ return DAG.getNode(ARMISD::VDUPLANE, dl, VT, V1,
+ DAG.getConstant(Lane, dl, MVT::i32));
+ }
+
+ bool ReverseVEXT;
+ unsigned Imm;
+ if (isVEXTMask(ShuffleMask, VT, ReverseVEXT, Imm)) {
+ if (ReverseVEXT)
+ std::swap(V1, V2);
+ return DAG.getNode(ARMISD::VEXT, dl, VT, V1, V2,
+ DAG.getConstant(Imm, dl, MVT::i32));
+ }
+
+ if (isVREVMask(ShuffleMask, VT, 64))
+ return DAG.getNode(ARMISD::VREV64, dl, VT, V1);
+ if (isVREVMask(ShuffleMask, VT, 32))
+ return DAG.getNode(ARMISD::VREV32, dl, VT, V1);
+ if (isVREVMask(ShuffleMask, VT, 16))
+ return DAG.getNode(ARMISD::VREV16, dl, VT, V1);
+
+ if (V2->getOpcode() == ISD::UNDEF &&
+ isSingletonVEXTMask(ShuffleMask, VT, Imm)) {
+ return DAG.getNode(ARMISD::VEXT, dl, VT, V1, V1,
+ DAG.getConstant(Imm, dl, MVT::i32));
+ }
+
+ // Check for Neon shuffles that modify both input vectors in place.
+ // If both results are used, i.e., if there are two shuffles with the same
+ // source operands and with masks corresponding to both results of one of
+ // these operations, DAG memoization will ensure that a single node is
+ // used for both shuffles.
+ unsigned WhichResult;
+ bool isV_UNDEF;
+ if (unsigned ShuffleOpc = isNEONTwoResultShuffleMask(
+ ShuffleMask, VT, WhichResult, isV_UNDEF)) {
+ if (isV_UNDEF)
+ V2 = V1;
+ return DAG.getNode(ShuffleOpc, dl, DAG.getVTList(VT, VT), V1, V2)
+ .getValue(WhichResult);
+ }
+
+ // Also check for these shuffles through CONCAT_VECTORS: we canonicalize
+ // shuffles that produce a result larger than their operands with:
+ // shuffle(concat(v1, undef), concat(v2, undef))
+ // ->
+ // shuffle(concat(v1, v2), undef)
+ // because we can access quad vectors (see PerformVECTOR_SHUFFLECombine).
+ //
+ // This is useful in the general case, but there are special cases where
+ // native shuffles produce larger results: the two-result ops.
+ //
+ // Look through the concat when lowering them:
+ // shuffle(concat(v1, v2), undef)
+ // ->
+ // concat(VZIP(v1, v2):0, :1)
+ //
+ if (V1->getOpcode() == ISD::CONCAT_VECTORS &&
+ V2->getOpcode() == ISD::UNDEF) {
+ SDValue SubV1 = V1->getOperand(0);
+ SDValue SubV2 = V1->getOperand(1);
+ EVT SubVT = SubV1.getValueType();
+
+ // We expect these to have been canonicalized to -1.
+ assert(std::all_of(ShuffleMask.begin(), ShuffleMask.end(), [&](int i) {
+ return i < (int)VT.getVectorNumElements();
+ }) && "Unexpected shuffle index into UNDEF operand!");
+
+ if (unsigned ShuffleOpc = isNEONTwoResultShuffleMask(
+ ShuffleMask, SubVT, WhichResult, isV_UNDEF)) {
+ if (isV_UNDEF)
+ SubV2 = SubV1;
+ assert((WhichResult == 0) &&
+ "In-place shuffle of concat can only have one result!");
+ SDValue Res = DAG.getNode(ShuffleOpc, dl, DAG.getVTList(SubVT, SubVT),
+ SubV1, SubV2);
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Res.getValue(0),
+ Res.getValue(1));
+ }
+ }
+ }
+
+ // If the shuffle is not directly supported and it has 4 elements, use
+ // the PerfectShuffle-generated table to synthesize it from other shuffles.
+ unsigned NumElts = VT.getVectorNumElements();
+ if (NumElts == 4) {
+ unsigned PFIndexes[4];
+ for (unsigned i = 0; i != 4; ++i) {
+ if (ShuffleMask[i] < 0)
+ PFIndexes[i] = 8;
+ else
+ PFIndexes[i] = ShuffleMask[i];
+ }
+
+ // Compute the index in the perfect shuffle table.
+ unsigned PFTableIndex =
+ PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];
+ unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
+ unsigned Cost = (PFEntry >> 30);
+
+ if (Cost <= 4)
+ return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
+ }
+
+ // Implement shuffles with 32- or 64-bit elements as ARMISD::BUILD_VECTORs.
+ if (EltSize >= 32) {
+ // Do the expansion with floating-point types, since that is what the VFP
+ // registers are defined to use, and since i64 is not legal.
+ EVT EltVT = EVT::getFloatingPointVT(EltSize);
+ EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts);
+ V1 = DAG.getNode(ISD::BITCAST, dl, VecVT, V1);
+ V2 = DAG.getNode(ISD::BITCAST, dl, VecVT, V2);
+ SmallVector<SDValue, 8> Ops;
+ for (unsigned i = 0; i < NumElts; ++i) {
+ if (ShuffleMask[i] < 0)
+ Ops.push_back(DAG.getUNDEF(EltVT));
+ else
+ Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT,
+ ShuffleMask[i] < (int)NumElts ? V1 : V2,
+ DAG.getConstant(ShuffleMask[i] & (NumElts-1),
+ dl, MVT::i32)));
+ }
+ SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, Ops);
+ return DAG.getNode(ISD::BITCAST, dl, VT, Val);
+ }
+
+ if ((VT == MVT::v8i16 || VT == MVT::v16i8) && isReverseMask(ShuffleMask, VT))
+ return LowerReverse_VECTOR_SHUFFLEv16i8_v8i16(Op, DAG);
+
+ if (VT == MVT::v8i8) {
+ SDValue NewOp = LowerVECTOR_SHUFFLEv8i8(Op, ShuffleMask, DAG);
+ if (NewOp.getNode())
+ return NewOp;
+ }
+
+ return SDValue();
+}
+
+static SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
+ // INSERT_VECTOR_ELT is legal only for immediate indexes.
+ SDValue Lane = Op.getOperand(2);
+ if (!isa<ConstantSDNode>(Lane))
+ return SDValue();
+
+ return Op;
+}
+
+static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
+ // EXTRACT_VECTOR_ELT is legal only for immediate indexes.
+ SDValue Lane = Op.getOperand(1);
+ if (!isa<ConstantSDNode>(Lane))
+ return SDValue();
+
+ SDValue Vec = Op.getOperand(0);
+ if (Op.getValueType() == MVT::i32 &&
+ Vec.getValueType().getVectorElementType().getSizeInBits() < 32) {
+ SDLoc dl(Op);
+ return DAG.getNode(ARMISD::VGETLANEu, dl, MVT::i32, Vec, Lane);
+ }
+
+ return Op;
+}
+
+static SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) {
+ // The only time a CONCAT_VECTORS operation can have legal types is when
+ // two 64-bit vectors are concatenated to a 128-bit vector.
+ assert(Op.getValueType().is128BitVector() && Op.getNumOperands() == 2 &&
+ "unexpected CONCAT_VECTORS");
+ SDLoc dl(Op);
+ SDValue Val = DAG.getUNDEF(MVT::v2f64);
+ SDValue Op0 = Op.getOperand(0);
+ SDValue Op1 = Op.getOperand(1);
+ if (Op0.getOpcode() != ISD::UNDEF)
+ Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Val,
+ DAG.getNode(ISD::BITCAST, dl, MVT::f64, Op0),
+ DAG.getIntPtrConstant(0, dl));
+ if (Op1.getOpcode() != ISD::UNDEF)
+ Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Val,
+ DAG.getNode(ISD::BITCAST, dl, MVT::f64, Op1),
+ DAG.getIntPtrConstant(1, dl));
+ return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Val);
+}
+
+/// isExtendedBUILD_VECTOR - Check if N is a constant BUILD_VECTOR where each
+/// element has been zero/sign-extended, depending on the isSigned parameter,
+/// from an integer type half its size.
+static bool isExtendedBUILD_VECTOR(SDNode *N, SelectionDAG &DAG,
+ bool isSigned) {
+ // A v2i64 BUILD_VECTOR will have been legalized to a BITCAST from v4i32.
+ EVT VT = N->getValueType(0);
+ if (VT == MVT::v2i64 && N->getOpcode() == ISD::BITCAST) {
+ SDNode *BVN = N->getOperand(0).getNode();
+ if (BVN->getValueType(0) != MVT::v4i32 ||
+ BVN->getOpcode() != ISD::BUILD_VECTOR)
+ return false;
+ unsigned LoElt = DAG.getDataLayout().isBigEndian() ? 1 : 0;
+ unsigned HiElt = 1 - LoElt;
+ ConstantSDNode *Lo0 = dyn_cast<ConstantSDNode>(BVN->getOperand(LoElt));
+ ConstantSDNode *Hi0 = dyn_cast<ConstantSDNode>(BVN->getOperand(HiElt));
+ ConstantSDNode *Lo1 = dyn_cast<ConstantSDNode>(BVN->getOperand(LoElt+2));
+ ConstantSDNode *Hi1 = dyn_cast<ConstantSDNode>(BVN->getOperand(HiElt+2));
+ if (!Lo0 || !Hi0 || !Lo1 || !Hi1)
+ return false;
+ if (isSigned) {
+ if (Hi0->getSExtValue() == Lo0->getSExtValue() >> 32 &&
+ Hi1->getSExtValue() == Lo1->getSExtValue() >> 32)
+ return true;
+ } else {
+ if (Hi0->isNullValue() && Hi1->isNullValue())
+ return true;
+ }
+ return false;
+ }
+
+ if (N->getOpcode() != ISD::BUILD_VECTOR)
+ return false;
+
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+ SDNode *Elt = N->getOperand(i).getNode();
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) {
+ unsigned EltSize = VT.getVectorElementType().getSizeInBits();
+ unsigned HalfSize = EltSize / 2;
+ if (isSigned) {
+ if (!isIntN(HalfSize, C->getSExtValue()))
+ return false;
+ } else {
+ if (!isUIntN(HalfSize, C->getZExtValue()))
+ return false;
+ }
+ continue;
+ }
+ return false;
+ }
+
+ return true;
+}
+
+/// isSignExtended - Check if a node is a vector value that is sign-extended
+/// or a constant BUILD_VECTOR with sign-extended elements.
+static bool isSignExtended(SDNode *N, SelectionDAG &DAG) {
+ if (N->getOpcode() == ISD::SIGN_EXTEND || ISD::isSEXTLoad(N))
+ return true;
+ if (isExtendedBUILD_VECTOR(N, DAG, true))
+ return true;
+ return false;
+}
+
+/// isZeroExtended - Check if a node is a vector value that is zero-extended
+/// or a constant BUILD_VECTOR with zero-extended elements.
+static bool isZeroExtended(SDNode *N, SelectionDAG &DAG) {
+ if (N->getOpcode() == ISD::ZERO_EXTEND || ISD::isZEXTLoad(N))
+ return true;
+ if (isExtendedBUILD_VECTOR(N, DAG, false))
+ return true;
+ return false;
+}
+
+static EVT getExtensionTo64Bits(const EVT &OrigVT) {
+ if (OrigVT.getSizeInBits() >= 64)
+ return OrigVT;
+
+ assert(OrigVT.isSimple() && "Expecting a simple value type");
+
+ MVT::SimpleValueType OrigSimpleTy = OrigVT.getSimpleVT().SimpleTy;
+ switch (OrigSimpleTy) {
+ default: llvm_unreachable("Unexpected Vector Type");
+ case MVT::v2i8:
+ case MVT::v2i16:
+ return MVT::v2i32;
+ case MVT::v4i8:
+ return MVT::v4i16;
+ }
+}
+
+/// AddRequiredExtensionForVMULL - Add a sign/zero extension to extend the total
+/// value size to 64 bits. We need a 64-bit D register as an operand to VMULL.
+/// We insert the required extension here to get the vector to fill a D register.
+static SDValue AddRequiredExtensionForVMULL(SDValue N, SelectionDAG &DAG,
+ const EVT &OrigTy,
+ const EVT &ExtTy,
+ unsigned ExtOpcode) {
+ // The vector originally had a size of OrigTy. It was then extended to ExtTy.
+ // We expect the ExtTy to be 128-bits total. If the OrigTy is less than
+ // 64-bits we need to insert a new extension so that it will be 64-bits.
+ assert(ExtTy.is128BitVector() && "Unexpected extension size");
+ if (OrigTy.getSizeInBits() >= 64)
+ return N;
+
+ // Must extend size to at least 64 bits to be used as an operand for VMULL.
+ EVT NewVT = getExtensionTo64Bits(OrigTy);
+
+ return DAG.getNode(ExtOpcode, SDLoc(N), NewVT, N);
+}
+
+/// SkipLoadExtensionForVMULL - return a load of the original vector size that
+/// does not do any sign/zero extension. If the original vector is less
+/// than 64 bits, an appropriate extension will be added after the load to
+/// reach a total size of 64 bits. We have to add the extension separately
+/// because ARM does not have a sign/zero extending load for vectors.
+static SDValue SkipLoadExtensionForVMULL(LoadSDNode *LD, SelectionDAG& DAG) {
+ EVT ExtendedTy = getExtensionTo64Bits(LD->getMemoryVT());
+
+ // The load already has the right type.
+ if (ExtendedTy == LD->getMemoryVT())
+ return DAG.getLoad(LD->getMemoryVT(), SDLoc(LD), LD->getChain(),
+ LD->getBasePtr(), LD->getPointerInfo(), LD->isVolatile(),
+ LD->isNonTemporal(), LD->isInvariant(),
+ LD->getAlignment());
+
+ // We need to create a zextload/sextload. We cannot just create a load
+ // followed by a zext/zext node because LowerMUL is also run during normal
+ // operation legalization where we can't create illegal types.
+ return DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD), ExtendedTy,
+ LD->getChain(), LD->getBasePtr(), LD->getPointerInfo(),
+ LD->getMemoryVT(), LD->isVolatile(), LD->isInvariant(),
+ LD->isNonTemporal(), LD->getAlignment());
+}
+
+/// SkipExtensionForVMULL - For a node that is a SIGN_EXTEND, ZERO_EXTEND,
+/// extending load, or BUILD_VECTOR with extended elements, return the
+/// unextended value. The unextended vector should be 64 bits so that it can
+/// be used as an operand to a VMULL instruction. If the original vector size
+/// before extension is less than 64 bits we add a an extension to resize
+/// the vector to 64 bits.
+static SDValue SkipExtensionForVMULL(SDNode *N, SelectionDAG &DAG) {
+ if (N->getOpcode() == ISD::SIGN_EXTEND || N->getOpcode() == ISD::ZERO_EXTEND)
+ return AddRequiredExtensionForVMULL(N->getOperand(0), DAG,
+ N->getOperand(0)->getValueType(0),
+ N->getValueType(0),
+ N->getOpcode());
+
+ if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N))
+ return SkipLoadExtensionForVMULL(LD, DAG);
+
+ // Otherwise, the value must be a BUILD_VECTOR. For v2i64, it will
+ // have been legalized as a BITCAST from v4i32.
+ if (N->getOpcode() == ISD::BITCAST) {
+ SDNode *BVN = N->getOperand(0).getNode();
+ assert(BVN->getOpcode() == ISD::BUILD_VECTOR &&
+ BVN->getValueType(0) == MVT::v4i32 && "expected v4i32 BUILD_VECTOR");
+ unsigned LowElt = DAG.getDataLayout().isBigEndian() ? 1 : 0;
+ return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), MVT::v2i32,
+ BVN->getOperand(LowElt), BVN->getOperand(LowElt+2));
+ }
+ // Construct a new BUILD_VECTOR with elements truncated to half the size.
+ assert(N->getOpcode() == ISD::BUILD_VECTOR && "expected BUILD_VECTOR");
+ EVT VT = N->getValueType(0);
+ unsigned EltSize = VT.getVectorElementType().getSizeInBits() / 2;
+ unsigned NumElts = VT.getVectorNumElements();
+ MVT TruncVT = MVT::getIntegerVT(EltSize);
+ SmallVector<SDValue, 8> Ops;
+ SDLoc dl(N);
+ for (unsigned i = 0; i != NumElts; ++i) {
+ ConstantSDNode *C = cast<ConstantSDNode>(N->getOperand(i));
+ const APInt &CInt = C->getAPIntValue();
+ // Element types smaller than 32 bits are not legal, so use i32 elements.
+ // The values are implicitly truncated so sext vs. zext doesn't matter.
+ Ops.push_back(DAG.getConstant(CInt.zextOrTrunc(32), dl, MVT::i32));
+ }
+ return DAG.getNode(ISD::BUILD_VECTOR, dl,
+ MVT::getVectorVT(TruncVT, NumElts), Ops);
+}
+
+static bool isAddSubSExt(SDNode *N, SelectionDAG &DAG) {
+ unsigned Opcode = N->getOpcode();
+ if (Opcode == ISD::ADD || Opcode == ISD::SUB) {
+ SDNode *N0 = N->getOperand(0).getNode();
+ SDNode *N1 = N->getOperand(1).getNode();
+ return N0->hasOneUse() && N1->hasOneUse() &&
+ isSignExtended(N0, DAG) && isSignExtended(N1, DAG);
+ }
+ return false;
+}
+
+static bool isAddSubZExt(SDNode *N, SelectionDAG &DAG) {
+ unsigned Opcode = N->getOpcode();
+ if (Opcode == ISD::ADD || Opcode == ISD::SUB) {
+ SDNode *N0 = N->getOperand(0).getNode();
+ SDNode *N1 = N->getOperand(1).getNode();
+ return N0->hasOneUse() && N1->hasOneUse() &&
+ isZeroExtended(N0, DAG) && isZeroExtended(N1, DAG);
+ }
+ return false;
+}
+
+static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) {
+ // Multiplications are only custom-lowered for 128-bit vectors so that
+ // VMULL can be detected. Otherwise v2i64 multiplications are not legal.
+ EVT VT = Op.getValueType();
+ assert(VT.is128BitVector() && VT.isInteger() &&
+ "unexpected type for custom-lowering ISD::MUL");
+ SDNode *N0 = Op.getOperand(0).getNode();
+ SDNode *N1 = Op.getOperand(1).getNode();
+ unsigned NewOpc = 0;
+ bool isMLA = false;
+ bool isN0SExt = isSignExtended(N0, DAG);
+ bool isN1SExt = isSignExtended(N1, DAG);
+ if (isN0SExt && isN1SExt)
+ NewOpc = ARMISD::VMULLs;
+ else {
+ bool isN0ZExt = isZeroExtended(N0, DAG);
+ bool isN1ZExt = isZeroExtended(N1, DAG);
+ if (isN0ZExt && isN1ZExt)
+ NewOpc = ARMISD::VMULLu;
+ else if (isN1SExt || isN1ZExt) {
+ // Look for (s/zext A + s/zext B) * (s/zext C). We want to turn these
+ // into (s/zext A * s/zext C) + (s/zext B * s/zext C)
+ if (isN1SExt && isAddSubSExt(N0, DAG)) {
+ NewOpc = ARMISD::VMULLs;
+ isMLA = true;
+ } else if (isN1ZExt && isAddSubZExt(N0, DAG)) {
+ NewOpc = ARMISD::VMULLu;
+ isMLA = true;
+ } else if (isN0ZExt && isAddSubZExt(N1, DAG)) {
+ std::swap(N0, N1);
+ NewOpc = ARMISD::VMULLu;
+ isMLA = true;
+ }
+ }
+
+ if (!NewOpc) {
+ if (VT == MVT::v2i64)
+ // Fall through to expand this. It is not legal.
+ return SDValue();
+ else
+ // Other vector multiplications are legal.
+ return Op;
+ }
+ }
+
+ // Legalize to a VMULL instruction.
+ SDLoc DL(Op);
+ SDValue Op0;
+ SDValue Op1 = SkipExtensionForVMULL(N1, DAG);
+ if (!isMLA) {
+ Op0 = SkipExtensionForVMULL(N0, DAG);
+ assert(Op0.getValueType().is64BitVector() &&
+ Op1.getValueType().is64BitVector() &&
+ "unexpected types for extended operands to VMULL");
+ return DAG.getNode(NewOpc, DL, VT, Op0, Op1);
+ }
+
+ // Optimizing (zext A + zext B) * C, to (VMULL A, C) + (VMULL B, C) during
+ // isel lowering to take advantage of no-stall back to back vmul + vmla.
+ // vmull q0, d4, d6
+ // vmlal q0, d5, d6
+ // is faster than
+ // vaddl q0, d4, d5
+ // vmovl q1, d6
+ // vmul q0, q0, q1
+ SDValue N00 = SkipExtensionForVMULL(N0->getOperand(0).getNode(), DAG);
+ SDValue N01 = SkipExtensionForVMULL(N0->getOperand(1).getNode(), DAG);
+ EVT Op1VT = Op1.getValueType();
+ return DAG.getNode(N0->getOpcode(), DL, VT,
+ DAG.getNode(NewOpc, DL, VT,
+ DAG.getNode(ISD::BITCAST, DL, Op1VT, N00), Op1),
+ DAG.getNode(NewOpc, DL, VT,
+ DAG.getNode(ISD::BITCAST, DL, Op1VT, N01), Op1));
+}
+
+static SDValue
+LowerSDIV_v4i8(SDValue X, SDValue Y, SDLoc dl, SelectionDAG &DAG) {
+ // TODO: Should this propagate fast-math-flags?
+
+ // Convert to float
+ // float4 xf = vcvt_f32_s32(vmovl_s16(a.lo));
+ // float4 yf = vcvt_f32_s32(vmovl_s16(b.lo));
+ X = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, X);
+ Y = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, Y);
+ X = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, X);
+ Y = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, Y);
+ // Get reciprocal estimate.
+ // float4 recip = vrecpeq_f32(yf);
+ Y = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
+ DAG.getConstant(Intrinsic::arm_neon_vrecpe, dl, MVT::i32),
+ Y);
+ // Because char has a smaller range than uchar, we can actually get away
+ // without any newton steps. This requires that we use a weird bias
+ // of 0xb000, however (again, this has been exhaustively tested).
+ // float4 result = as_float4(as_int4(xf*recip) + 0xb000);
+ X = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, X, Y);
+ X = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, X);
+ Y = DAG.getConstant(0xb000, dl, MVT::i32);
+ Y = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Y, Y, Y, Y);
+ X = DAG.getNode(ISD::ADD, dl, MVT::v4i32, X, Y);
+ X = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, X);
+ // Convert back to short.
+ X = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, X);
+ X = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, X);
+ return X;
+}
+
+static SDValue
+LowerSDIV_v4i16(SDValue N0, SDValue N1, SDLoc dl, SelectionDAG &DAG) {
+ // TODO: Should this propagate fast-math-flags?
+
+ SDValue N2;
+ // Convert to float.
+ // float4 yf = vcvt_f32_s32(vmovl_s16(y));
+ // float4 xf = vcvt_f32_s32(vmovl_s16(x));
+ N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, N0);
+ N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, N1);
+ N0 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N0);
+ N1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N1);
+
+ // Use reciprocal estimate and one refinement step.
+ // float4 recip = vrecpeq_f32(yf);
+ // recip *= vrecpsq_f32(yf, recip);
+ N2 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
+ DAG.getConstant(Intrinsic::arm_neon_vrecpe, dl, MVT::i32),
+ N1);
+ N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
+ DAG.getConstant(Intrinsic::arm_neon_vrecps, dl, MVT::i32),
+ N1, N2);
+ N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2);
+ // Because short has a smaller range than ushort, we can actually get away
+ // with only a single newton step. This requires that we use a weird bias
+ // of 89, however (again, this has been exhaustively tested).
+ // float4 result = as_float4(as_int4(xf*recip) + 0x89);
+ N0 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N0, N2);
+ N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, N0);
+ N1 = DAG.getConstant(0x89, dl, MVT::i32);
+ N1 = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, N1, N1, N1, N1);
+ N0 = DAG.getNode(ISD::ADD, dl, MVT::v4i32, N0, N1);
+ N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, N0);
+ // Convert back to integer and return.
+ // return vmovn_s32(vcvt_s32_f32(result));
+ N0 = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, N0);
+ N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, N0);
+ return N0;
+}
+
+static SDValue LowerSDIV(SDValue Op, SelectionDAG &DAG) {
+ EVT VT = Op.getValueType();
+ assert((VT == MVT::v4i16 || VT == MVT::v8i8) &&
+ "unexpected type for custom-lowering ISD::SDIV");
+
+ SDLoc dl(Op);
+ SDValue N0 = Op.getOperand(0);
+ SDValue N1 = Op.getOperand(1);
+ SDValue N2, N3;
+
+ if (VT == MVT::v8i8) {
+ N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i16, N0);
+ N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i16, N1);
+
+ N2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
+ DAG.getIntPtrConstant(4, dl));
+ N3 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
+ DAG.getIntPtrConstant(4, dl));
+ N0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
+ DAG.getIntPtrConstant(0, dl));
+ N1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
+ DAG.getIntPtrConstant(0, dl));
+
+ N0 = LowerSDIV_v4i8(N0, N1, dl, DAG); // v4i16
+ N2 = LowerSDIV_v4i8(N2, N3, dl, DAG); // v4i16
+
+ N0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i16, N0, N2);
+ N0 = LowerCONCAT_VECTORS(N0, DAG);
+
+ N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v8i8, N0);
+ return N0;
+ }
+ return LowerSDIV_v4i16(N0, N1, dl, DAG);
+}
+
+static SDValue LowerUDIV(SDValue Op, SelectionDAG &DAG) {
+ // TODO: Should this propagate fast-math-flags?
+ EVT VT = Op.getValueType();
+ assert((VT == MVT::v4i16 || VT == MVT::v8i8) &&
+ "unexpected type for custom-lowering ISD::UDIV");
+
+ SDLoc dl(Op);
+ SDValue N0 = Op.getOperand(0);
+ SDValue N1 = Op.getOperand(1);
+ SDValue N2, N3;
+
+ if (VT == MVT::v8i8) {
+ N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v8i16, N0);
+ N1 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v8i16, N1);
+
+ N2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
+ DAG.getIntPtrConstant(4, dl));
+ N3 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
+ DAG.getIntPtrConstant(4, dl));
+ N0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
+ DAG.getIntPtrConstant(0, dl));
+ N1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
+ DAG.getIntPtrConstant(0, dl));
+
+ N0 = LowerSDIV_v4i16(N0, N1, dl, DAG); // v4i16
+ N2 = LowerSDIV_v4i16(N2, N3, dl, DAG); // v4i16
+
+ N0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i16, N0, N2);
+ N0 = LowerCONCAT_VECTORS(N0, DAG);
+
+ N0 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v8i8,
+ DAG.getConstant(Intrinsic::arm_neon_vqmovnsu, dl,
+ MVT::i32),
+ N0);
+ return N0;
+ }
+
+ // v4i16 sdiv ... Convert to float.
+ // float4 yf = vcvt_f32_s32(vmovl_u16(y));
+ // float4 xf = vcvt_f32_s32(vmovl_u16(x));
+ N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v4i32, N0);
+ N1 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v4i32, N1);
+ N0 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N0);
+ SDValue BN1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N1);
+
+ // Use reciprocal estimate and two refinement steps.
+ // float4 recip = vrecpeq_f32(yf);
+ // recip *= vrecpsq_f32(yf, recip);
+ // recip *= vrecpsq_f32(yf, recip);
+ N2 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
+ DAG.getConstant(Intrinsic::arm_neon_vrecpe, dl, MVT::i32),
+ BN1);
+ N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
+ DAG.getConstant(Intrinsic::arm_neon_vrecps, dl, MVT::i32),
+ BN1, N2);
+ N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2);
+ N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
+ DAG.getConstant(Intrinsic::arm_neon_vrecps, dl, MVT::i32),
+ BN1, N2);
+ N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2);
+ // Simply multiplying by the reciprocal estimate can leave us a few ulps
+ // too low, so we add 2 ulps (exhaustive testing shows that this is enough,
+ // and that it will never cause us to return an answer too large).
+ // float4 result = as_float4(as_int4(xf*recip) + 2);
+ N0 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N0, N2);
+ N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, N0);
+ N1 = DAG.getConstant(2, dl, MVT::i32);
+ N1 = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, N1, N1, N1, N1);
+ N0 = DAG.getNode(ISD::ADD, dl, MVT::v4i32, N0, N1);
+ N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, N0);
+ // Convert back to integer and return.
+ // return vmovn_u32(vcvt_s32_f32(result));
+ N0 = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, N0);
+ N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, N0);
+ return N0;
+}
+
+static SDValue LowerADDC_ADDE_SUBC_SUBE(SDValue Op, SelectionDAG &DAG) {
+ EVT VT = Op.getNode()->getValueType(0);
+ SDVTList VTs = DAG.getVTList(VT, MVT::i32);
+
+ unsigned Opc;
+ bool ExtraOp = false;
+ switch (Op.getOpcode()) {
+ default: llvm_unreachable("Invalid code");
+ case ISD::ADDC: Opc = ARMISD::ADDC; break;
+ case ISD::ADDE: Opc = ARMISD::ADDE; ExtraOp = true; break;
+ case ISD::SUBC: Opc = ARMISD::SUBC; break;
+ case ISD::SUBE: Opc = ARMISD::SUBE; ExtraOp = true; break;
+ }
+
+ if (!ExtraOp)
+ return DAG.getNode(Opc, SDLoc(Op), VTs, Op.getOperand(0),
+ Op.getOperand(1));
+ return DAG.getNode(Opc, SDLoc(Op), VTs, Op.getOperand(0),
+ Op.getOperand(1), Op.getOperand(2));
+}
+
+SDValue ARMTargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const {
+ assert(Subtarget->isTargetDarwin());
+
+ // For iOS, we want to call an alternative entry point: __sincos_stret,
+ // return values are passed via sret.
+ SDLoc dl(Op);
+ SDValue Arg = Op.getOperand(0);
+ EVT ArgVT = Arg.getValueType();
+ Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
+ auto PtrVT = getPointerTy(DAG.getDataLayout());
+
+ MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo();
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+
+ // Pair of floats / doubles used to pass the result.
+ Type *RetTy = StructType::get(ArgTy, ArgTy, nullptr);
+ auto &DL = DAG.getDataLayout();
+
+ ArgListTy Args;
+ bool ShouldUseSRet = Subtarget->isAPCS_ABI();
+ SDValue SRet;
+ if (ShouldUseSRet) {
+ // Create stack object for sret.
+ const uint64_t ByteSize = DL.getTypeAllocSize(RetTy);
+ const unsigned StackAlign = DL.getPrefTypeAlignment(RetTy);
+ int FrameIdx = FrameInfo->CreateStackObject(ByteSize, StackAlign, false);
+ SRet = DAG.getFrameIndex(FrameIdx, TLI.getPointerTy(DL));
+
+ ArgListEntry Entry;
+ Entry.Node = SRet;
+ Entry.Ty = RetTy->getPointerTo();
+ Entry.isSExt = false;
+ Entry.isZExt = false;
+ Entry.isSRet = true;
+ Args.push_back(Entry);
+ RetTy = Type::getVoidTy(*DAG.getContext());
+ }
+
+ ArgListEntry Entry;
+ Entry.Node = Arg;
+ Entry.Ty = ArgTy;
+ Entry.isSExt = false;
+ Entry.isZExt = false;
+ Args.push_back(Entry);
+
+ const char *LibcallName =
+ (ArgVT == MVT::f64) ? "__sincos_stret" : "__sincosf_stret";
+ RTLIB::Libcall LC =
+ (ArgVT == MVT::f64) ? RTLIB::SINCOS_F64 : RTLIB::SINCOS_F32;
+ CallingConv::ID CC = getLibcallCallingConv(LC);
+ SDValue Callee = DAG.getExternalSymbol(LibcallName, getPointerTy(DL));
+
+ TargetLowering::CallLoweringInfo CLI(DAG);
+ CLI.setDebugLoc(dl)
+ .setChain(DAG.getEntryNode())
+ .setCallee(CC, RetTy, Callee, std::move(Args), 0)
+ .setDiscardResult(ShouldUseSRet);
+ std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
+
+ if (!ShouldUseSRet)
+ return CallResult.first;
+
+ SDValue LoadSin = DAG.getLoad(ArgVT, dl, CallResult.second, SRet,
+ MachinePointerInfo(), false, false, false, 0);
+
+ // Address of cos field.
+ SDValue Add = DAG.getNode(ISD::ADD, dl, PtrVT, SRet,
+ DAG.getIntPtrConstant(ArgVT.getStoreSize(), dl));
+ SDValue LoadCos = DAG.getLoad(ArgVT, dl, LoadSin.getValue(1), Add,
+ MachinePointerInfo(), false, false, false, 0);
+
+ SDVTList Tys = DAG.getVTList(ArgVT, ArgVT);
+ return DAG.getNode(ISD::MERGE_VALUES, dl, Tys,
+ LoadSin.getValue(0), LoadCos.getValue(0));
+}
+
+SDValue ARMTargetLowering::LowerWindowsDIVLibCall(SDValue Op, SelectionDAG &DAG,
+ bool Signed,
+ SDValue &Chain) const {
+ EVT VT = Op.getValueType();
+ assert((VT == MVT::i32 || VT == MVT::i64) &&
+ "unexpected type for custom lowering DIV");
+ SDLoc dl(Op);
+
+ const auto &DL = DAG.getDataLayout();
+ const auto &TLI = DAG.getTargetLoweringInfo();
+
+ const char *Name = nullptr;
+ if (Signed)
+ Name = (VT == MVT::i32) ? "__rt_sdiv" : "__rt_sdiv64";
+ else
+ Name = (VT == MVT::i32) ? "__rt_udiv" : "__rt_udiv64";
+
+ SDValue ES = DAG.getExternalSymbol(Name, TLI.getPointerTy(DL));
+
+ ARMTargetLowering::ArgListTy Args;
+
+ for (auto AI : {1, 0}) {
+ ArgListEntry Arg;
+ Arg.Node = Op.getOperand(AI);
+ Arg.Ty = Arg.Node.getValueType().getTypeForEVT(*DAG.getContext());
+ Args.push_back(Arg);
+ }
+
+ CallLoweringInfo CLI(DAG);
+ CLI.setDebugLoc(dl)
+ .setChain(Chain)
+ .setCallee(CallingConv::ARM_AAPCS_VFP, VT.getTypeForEVT(*DAG.getContext()),
+ ES, std::move(Args), 0);
+
+ return LowerCallTo(CLI).first;
+}
+
+SDValue ARMTargetLowering::LowerDIV_Windows(SDValue Op, SelectionDAG &DAG,
+ bool Signed) const {
+ assert(Op.getValueType() == MVT::i32 &&
+ "unexpected type for custom lowering DIV");
+ SDLoc dl(Op);
+
+ SDValue DBZCHK = DAG.getNode(ARMISD::WIN__DBZCHK, dl, MVT::Other,
+ DAG.getEntryNode(), Op.getOperand(1));
+
+ return LowerWindowsDIVLibCall(Op, DAG, Signed, DBZCHK);
+}
+
+void ARMTargetLowering::ExpandDIV_Windows(
+ SDValue Op, SelectionDAG &DAG, bool Signed,
+ SmallVectorImpl<SDValue> &Results) const {
+ const auto &DL = DAG.getDataLayout();
+ const auto &TLI = DAG.getTargetLoweringInfo();
+
+ assert(Op.getValueType() == MVT::i64 &&
+ "unexpected type for custom lowering DIV");
+ SDLoc dl(Op);
+
+ SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op.getOperand(1),
+ DAG.getConstant(0, dl, MVT::i32));
+ SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op.getOperand(1),
+ DAG.getConstant(1, dl, MVT::i32));
+ SDValue Or = DAG.getNode(ISD::OR, dl, MVT::i32, Lo, Hi);
+
+ SDValue DBZCHK =
+ DAG.getNode(ARMISD::WIN__DBZCHK, dl, MVT::Other, DAG.getEntryNode(), Or);
+
+ SDValue Result = LowerWindowsDIVLibCall(Op, DAG, Signed, DBZCHK);
+
+ SDValue Lower = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Result);
+ SDValue Upper = DAG.getNode(ISD::SRL, dl, MVT::i64, Result,
+ DAG.getConstant(32, dl, TLI.getPointerTy(DL)));
+ Upper = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Upper);
+
+ Results.push_back(Lower);
+ Results.push_back(Upper);
+}
+
+static SDValue LowerAtomicLoadStore(SDValue Op, SelectionDAG &DAG) {
+ // Monotonic load/store is legal for all targets
+ if (cast<AtomicSDNode>(Op)->getOrdering() <= Monotonic)
+ return Op;
+
+ // Acquire/Release load/store is not legal for targets without a
+ // dmb or equivalent available.
+ return SDValue();
+}
+
+static void ReplaceREADCYCLECOUNTER(SDNode *N,
+ SmallVectorImpl<SDValue> &Results,
+ SelectionDAG &DAG,
+ const ARMSubtarget *Subtarget) {
+ SDLoc DL(N);
+ // Under Power Management extensions, the cycle-count is:
+ // mrc p15, #0, <Rt>, c9, c13, #0
+ SDValue Ops[] = { N->getOperand(0), // Chain
+ DAG.getConstant(Intrinsic::arm_mrc, DL, MVT::i32),
+ DAG.getConstant(15, DL, MVT::i32),
+ DAG.getConstant(0, DL, MVT::i32),
+ DAG.getConstant(9, DL, MVT::i32),
+ DAG.getConstant(13, DL, MVT::i32),
+ DAG.getConstant(0, DL, MVT::i32)
+ };
+
+ SDValue Cycles32 = DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL,
+ DAG.getVTList(MVT::i32, MVT::Other), Ops);
+ Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Cycles32,
+ DAG.getConstant(0, DL, MVT::i32)));
+ Results.push_back(Cycles32.getValue(1));
+}
+
+SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
+ switch (Op.getOpcode()) {
+ default: llvm_unreachable("Don't know how to custom lower this!");
+ case ISD::WRITE_REGISTER: return LowerWRITE_REGISTER(Op, DAG);
+ case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
+ case ISD::BlockAddress: return LowerBlockAddress(Op, DAG);
+ case ISD::GlobalAddress:
+ switch (Subtarget->getTargetTriple().getObjectFormat()) {
+ default: llvm_unreachable("unknown object format");
+ case Triple::COFF:
+ return LowerGlobalAddressWindows(Op, DAG);
+ case Triple::ELF:
+ return LowerGlobalAddressELF(Op, DAG);
+ case Triple::MachO:
+ return LowerGlobalAddressDarwin(Op, DAG);
+ }
+ case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
+ case ISD::SELECT: return LowerSELECT(Op, DAG);
+ case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
+ case ISD::BR_CC: return LowerBR_CC(Op, DAG);
+ case ISD::BR_JT: return LowerBR_JT(Op, DAG);
+ case ISD::VASTART: return LowerVASTART(Op, DAG);
+ case ISD::ATOMIC_FENCE: return LowerATOMIC_FENCE(Op, DAG, Subtarget);
+ case ISD::PREFETCH: return LowerPREFETCH(Op, DAG, Subtarget);
+ case ISD::SINT_TO_FP:
+ case ISD::UINT_TO_FP: return LowerINT_TO_FP(Op, DAG);
+ case ISD::FP_TO_SINT:
+ case ISD::FP_TO_UINT: return LowerFP_TO_INT(Op, DAG);
+ case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG);
+ case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
+ case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
+ case ISD::EH_SJLJ_SETJMP: return LowerEH_SJLJ_SETJMP(Op, DAG);
+ case ISD::EH_SJLJ_LONGJMP: return LowerEH_SJLJ_LONGJMP(Op, DAG);
+ case ISD::EH_SJLJ_SETUP_DISPATCH: return LowerEH_SJLJ_SETUP_DISPATCH(Op, DAG);
+ case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG,
+ Subtarget);
+ case ISD::BITCAST: return ExpandBITCAST(Op.getNode(), DAG);
+ case ISD::SHL:
+ case ISD::SRL:
+ case ISD::SRA: return LowerShift(Op.getNode(), DAG, Subtarget);
+ case ISD::SREM: return LowerREM(Op.getNode(), DAG);
+ case ISD::UREM: return LowerREM(Op.getNode(), DAG);
+ case ISD::SHL_PARTS: return LowerShiftLeftParts(Op, DAG);
+ case ISD::SRL_PARTS:
+ case ISD::SRA_PARTS: return LowerShiftRightParts(Op, DAG);
+ case ISD::CTTZ:
+ case ISD::CTTZ_ZERO_UNDEF: return LowerCTTZ(Op.getNode(), DAG, Subtarget);
+ case ISD::CTPOP: return LowerCTPOP(Op.getNode(), DAG, Subtarget);
+ case ISD::SETCC: return LowerVSETCC(Op, DAG);
+ case ISD::ConstantFP: return LowerConstantFP(Op, DAG, Subtarget);
+ case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG, Subtarget);
+ case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
+ case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
+ case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
+ case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG);
+ case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG);
+ case ISD::MUL: return LowerMUL(Op, DAG);
+ case ISD::SDIV: return LowerSDIV(Op, DAG);
+ case ISD::UDIV: return LowerUDIV(Op, DAG);
+ case ISD::ADDC:
+ case ISD::ADDE:
+ case ISD::SUBC:
+ case ISD::SUBE: return LowerADDC_ADDE_SUBC_SUBE(Op, DAG);
+ case ISD::SADDO:
+ case ISD::UADDO:
+ case ISD::SSUBO:
+ case ISD::USUBO:
+ return LowerXALUO(Op, DAG);
+ case ISD::ATOMIC_LOAD:
+ case ISD::ATOMIC_STORE: return LowerAtomicLoadStore(Op, DAG);
+ case ISD::FSINCOS: return LowerFSINCOS(Op, DAG);
+ case ISD::SDIVREM:
+ case ISD::UDIVREM: return LowerDivRem(Op, DAG);
+ case ISD::DYNAMIC_STACKALLOC:
+ if (Subtarget->getTargetTriple().isWindowsItaniumEnvironment())
+ return LowerDYNAMIC_STACKALLOC(Op, DAG);
+ llvm_unreachable("Don't know how to custom lower this!");
+ case ISD::FP_ROUND: return LowerFP_ROUND(Op, DAG);
+ case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG);
+ case ARMISD::WIN__DBZCHK: return SDValue();
+ }
+}
+
+/// ReplaceNodeResults - Replace the results of node with an illegal result
+/// type with new values built out of custom code.
+void ARMTargetLowering::ReplaceNodeResults(SDNode *N,
+ SmallVectorImpl<SDValue> &Results,
+ SelectionDAG &DAG) const {
+ SDValue Res;
+ switch (N->getOpcode()) {
+ default:
+ llvm_unreachable("Don't know how to custom expand this!");
+ case ISD::READ_REGISTER:
+ ExpandREAD_REGISTER(N, Results, DAG);
+ break;
+ case ISD::BITCAST:
+ Res = ExpandBITCAST(N, DAG);
+ break;
+ case ISD::SRL:
+ case ISD::SRA:
+ Res = Expand64BitShift(N, DAG, Subtarget);
+ break;
+ case ISD::SREM:
+ case ISD::UREM:
+ Res = LowerREM(N, DAG);
+ break;
+ case ISD::READCYCLECOUNTER:
+ ReplaceREADCYCLECOUNTER(N, Results, DAG, Subtarget);
+ return;
+ case ISD::UDIV:
+ case ISD::SDIV:
+ assert(Subtarget->isTargetWindows() && "can only expand DIV on Windows");
+ return ExpandDIV_Windows(SDValue(N, 0), DAG, N->getOpcode() == ISD::SDIV,
+ Results);
+ }
+ if (Res.getNode())
+ Results.push_back(Res);
+}
+
+//===----------------------------------------------------------------------===//
+// ARM Scheduler Hooks
+//===----------------------------------------------------------------------===//
+
+/// SetupEntryBlockForSjLj - Insert code into the entry block that creates and
+/// registers the function context.
+void ARMTargetLowering::
+SetupEntryBlockForSjLj(MachineInstr *MI, MachineBasicBlock *MBB,
+ MachineBasicBlock *DispatchBB, int FI) const {
+ const TargetInstrInfo *TII = Subtarget->getInstrInfo();
+ DebugLoc dl = MI->getDebugLoc();
+ MachineFunction *MF = MBB->getParent();
+ MachineRegisterInfo *MRI = &MF->getRegInfo();
+ MachineConstantPool *MCP = MF->getConstantPool();
+ ARMFunctionInfo *AFI = MF->getInfo<ARMFunctionInfo>();
+ const Function *F = MF->getFunction();
+
+ bool isThumb = Subtarget->isThumb();
+ bool isThumb2 = Subtarget->isThumb2();
+
+ unsigned PCLabelId = AFI->createPICLabelUId();
+ unsigned PCAdj = (isThumb || isThumb2) ? 4 : 8;
+ ARMConstantPoolValue *CPV =
+ ARMConstantPoolMBB::Create(F->getContext(), DispatchBB, PCLabelId, PCAdj);
+ unsigned CPI = MCP->getConstantPoolIndex(CPV, 4);
+
+ const TargetRegisterClass *TRC = isThumb ? &ARM::tGPRRegClass
+ : &ARM::GPRRegClass;
+
+ // Grab constant pool and fixed stack memory operands.
+ MachineMemOperand *CPMMO =
+ MF->getMachineMemOperand(MachinePointerInfo::getConstantPool(*MF),
+ MachineMemOperand::MOLoad, 4, 4);
+
+ MachineMemOperand *FIMMOSt =
+ MF->getMachineMemOperand(MachinePointerInfo::getFixedStack(*MF, FI),
+ MachineMemOperand::MOStore, 4, 4);
+
+ // Load the address of the dispatch MBB into the jump buffer.
+ if (isThumb2) {
+ // Incoming value: jbuf
+ // ldr.n r5, LCPI1_1
+ // orr r5, r5, #1
+ // add r5, pc
+ // str r5, [$jbuf, #+4] ; &jbuf[1]
+ unsigned NewVReg1 = MRI->createVirtualRegister(TRC);
+ AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::t2LDRpci), NewVReg1)
+ .addConstantPoolIndex(CPI)
+ .addMemOperand(CPMMO));
+ // Set the low bit because of thumb mode.
+ unsigned NewVReg2 = MRI->createVirtualRegister(TRC);
+ AddDefaultCC(
+ AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::t2ORRri), NewVReg2)
+ .addReg(NewVReg1, RegState::Kill)
+ .addImm(0x01)));
+ unsigned NewVReg3 = MRI->createVirtualRegister(TRC);
+ BuildMI(*MBB, MI, dl, TII->get(ARM::tPICADD), NewVReg3)
+ .addReg(NewVReg2, RegState::Kill)
+ .addImm(PCLabelId);
+ AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::t2STRi12))
+ .addReg(NewVReg3, RegState::Kill)
+ .addFrameIndex(FI)
+ .addImm(36) // &jbuf[1] :: pc
+ .addMemOperand(FIMMOSt));
+ } else if (isThumb) {
+ // Incoming value: jbuf
+ // ldr.n r1, LCPI1_4
+ // add r1, pc
+ // mov r2, #1
+ // orrs r1, r2
+ // add r2, $jbuf, #+4 ; &jbuf[1]
+ // str r1, [r2]
+ unsigned NewVReg1 = MRI->createVirtualRegister(TRC);
+ AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::tLDRpci), NewVReg1)
+ .addConstantPoolIndex(CPI)
+ .addMemOperand(CPMMO));
+ unsigned NewVReg2 = MRI->createVirtualRegister(TRC);
+ BuildMI(*MBB, MI, dl, TII->get(ARM::tPICADD), NewVReg2)
+ .addReg(NewVReg1, RegState::Kill)
+ .addImm(PCLabelId);
+ // Set the low bit because of thumb mode.
+ unsigned NewVReg3 = MRI->createVirtualRegister(TRC);
+ AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::tMOVi8), NewVReg3)
+ .addReg(ARM::CPSR, RegState::Define)
+ .addImm(1));
+ unsigned NewVReg4 = MRI->createVirtualRegister(TRC);
+ AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::tORR), NewVReg4)
+ .addReg(ARM::CPSR, RegState::Define)
+ .addReg(NewVReg2, RegState::Kill)
+ .addReg(NewVReg3, RegState::Kill));
+ unsigned NewVReg5 = MRI->createVirtualRegister(TRC);
+ BuildMI(*MBB, MI, dl, TII->get(ARM::tADDframe), NewVReg5)
+ .addFrameIndex(FI)
+ .addImm(36); // &jbuf[1] :: pc
+ AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::tSTRi))
+ .addReg(NewVReg4, RegState::Kill)
+ .addReg(NewVReg5, RegState::Kill)
+ .addImm(0)
+ .addMemOperand(FIMMOSt));
+ } else {
+ // Incoming value: jbuf
+ // ldr r1, LCPI1_1
+ // add r1, pc, r1
+ // str r1, [$jbuf, #+4] ; &jbuf[1]
+ unsigned NewVReg1 = MRI->createVirtualRegister(TRC);
+ AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::LDRi12), NewVReg1)
+ .addConstantPoolIndex(CPI)
+ .addImm(0)
+ .addMemOperand(CPMMO));
+ unsigned NewVReg2 = MRI->createVirtualRegister(TRC);
+ AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::PICADD), NewVReg2)
+ .addReg(NewVReg1, RegState::Kill)
+ .addImm(PCLabelId));
+ AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::STRi12))
+ .addReg(NewVReg2, RegState::Kill)
+ .addFrameIndex(FI)
+ .addImm(36) // &jbuf[1] :: pc
+ .addMemOperand(FIMMOSt));
+ }
+}
+
+void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr *MI,
+ MachineBasicBlock *MBB) const {
+ const TargetInstrInfo *TII = Subtarget->getInstrInfo();
+ DebugLoc dl = MI->getDebugLoc();
+ MachineFunction *MF = MBB->getParent();
+ MachineRegisterInfo *MRI = &MF->getRegInfo();
+ MachineFrameInfo *MFI = MF->getFrameInfo();
+ int FI = MFI->getFunctionContextIndex();
+
+ const TargetRegisterClass *TRC = Subtarget->isThumb() ? &ARM::tGPRRegClass
+ : &ARM::GPRnopcRegClass;
+
+ // Get a mapping of the call site numbers to all of the landing pads they're
+ // associated with.
+ DenseMap<unsigned, SmallVector<MachineBasicBlock*, 2> > CallSiteNumToLPad;
+ unsigned MaxCSNum = 0;
+ MachineModuleInfo &MMI = MF->getMMI();
+ for (MachineFunction::iterator BB = MF->begin(), E = MF->end(); BB != E;
+ ++BB) {
+ if (!BB->isEHPad()) continue;
+
+ // FIXME: We should assert that the EH_LABEL is the first MI in the landing
+ // pad.
+ for (MachineBasicBlock::iterator
+ II = BB->begin(), IE = BB->end(); II != IE; ++II) {
+ if (!II->isEHLabel()) continue;
+
+ MCSymbol *Sym = II->getOperand(0).getMCSymbol();
+ if (!MMI.hasCallSiteLandingPad(Sym)) continue;
+
+ SmallVectorImpl<unsigned> &CallSiteIdxs = MMI.getCallSiteLandingPad(Sym);
+ for (SmallVectorImpl<unsigned>::iterator
+ CSI = CallSiteIdxs.begin(), CSE = CallSiteIdxs.end();
+ CSI != CSE; ++CSI) {
+ CallSiteNumToLPad[*CSI].push_back(&*BB);
+ MaxCSNum = std::max(MaxCSNum, *CSI);
+ }
+ break;
+ }
+ }
+
+ // Get an ordered list of the machine basic blocks for the jump table.
+ std::vector<MachineBasicBlock*> LPadList;
+ SmallPtrSet<MachineBasicBlock*, 64> InvokeBBs;
+ LPadList.reserve(CallSiteNumToLPad.size());
+ for (unsigned I = 1; I <= MaxCSNum; ++I) {
+ SmallVectorImpl<MachineBasicBlock*> &MBBList = CallSiteNumToLPad[I];
+ for (SmallVectorImpl<MachineBasicBlock*>::iterator
+ II = MBBList.begin(), IE = MBBList.end(); II != IE; ++II) {
+ LPadList.push_back(*II);
+ InvokeBBs.insert((*II)->pred_begin(), (*II)->pred_end());
+ }
+ }
+
+ assert(!LPadList.empty() &&
+ "No landing pad destinations for the dispatch jump table!");
+
+ // Create the jump table and associated information.
+ MachineJumpTableInfo *JTI =
+ MF->getOrCreateJumpTableInfo(MachineJumpTableInfo::EK_Inline);
+ unsigned MJTI = JTI->createJumpTableIndex(LPadList);
+ Reloc::Model RelocM = getTargetMachine().getRelocationModel();
+
+ // Create the MBBs for the dispatch code.
+
+ // Shove the dispatch's address into the return slot in the function context.
+ MachineBasicBlock *DispatchBB = MF->CreateMachineBasicBlock();
+ DispatchBB->setIsEHPad();
+
+ MachineBasicBlock *TrapBB = MF->CreateMachineBasicBlock();
+ unsigned trap_opcode;
+ if (Subtarget->isThumb())
+ trap_opcode = ARM::tTRAP;
+ else
+ trap_opcode = Subtarget->useNaClTrap() ? ARM::TRAPNaCl : ARM::TRAP;
+
+ BuildMI(TrapBB, dl, TII->get(trap_opcode));
+ DispatchBB->addSuccessor(TrapBB);
+
+ MachineBasicBlock *DispContBB = MF->CreateMachineBasicBlock();
+ DispatchBB->addSuccessor(DispContBB);
+
+ // Insert and MBBs.
+ MF->insert(MF->end(), DispatchBB);
+ MF->insert(MF->end(), DispContBB);
+ MF->insert(MF->end(), TrapBB);
+
+ // Insert code into the entry block that creates and registers the function
+ // context.
+ SetupEntryBlockForSjLj(MI, MBB, DispatchBB, FI);
+
+ MachineMemOperand *FIMMOLd = MF->getMachineMemOperand(
+ MachinePointerInfo::getFixedStack(*MF, FI),
+ MachineMemOperand::MOLoad | MachineMemOperand::MOVolatile, 4, 4);
+
+ MachineInstrBuilder MIB;
+ MIB = BuildMI(DispatchBB, dl, TII->get(ARM::Int_eh_sjlj_dispatchsetup));
+
+ const ARMBaseInstrInfo *AII = static_cast<const ARMBaseInstrInfo*>(TII);
+ const ARMBaseRegisterInfo &RI = AII->getRegisterInfo();
+
+ // Add a register mask with no preserved registers. This results in all
+ // registers being marked as clobbered.
+ MIB.addRegMask(RI.getNoPreservedMask());
+
+ unsigned NumLPads = LPadList.size();
+ if (Subtarget->isThumb2()) {
+ unsigned NewVReg1 = MRI->createVirtualRegister(TRC);
+ AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::t2LDRi12), NewVReg1)
+ .addFrameIndex(FI)
+ .addImm(4)
+ .addMemOperand(FIMMOLd));
+
+ if (NumLPads < 256) {
+ AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::t2CMPri))
+ .addReg(NewVReg1)
+ .addImm(LPadList.size()));
+ } else {
+ unsigned VReg1 = MRI->createVirtualRegister(TRC);
+ AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::t2MOVi16), VReg1)
+ .addImm(NumLPads & 0xFFFF));
+
+ unsigned VReg2 = VReg1;
+ if ((NumLPads & 0xFFFF0000) != 0) {
+ VReg2 = MRI->createVirtualRegister(TRC);
+ AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::t2MOVTi16), VReg2)
+ .addReg(VReg1)
+ .addImm(NumLPads >> 16));
+ }
+
+ AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::t2CMPrr))
+ .addReg(NewVReg1)
+ .addReg(VReg2));
+ }
+
+ BuildMI(DispatchBB, dl, TII->get(ARM::t2Bcc))
+ .addMBB(TrapBB)
+ .addImm(ARMCC::HI)
+ .addReg(ARM::CPSR);
+
+ unsigned NewVReg3 = MRI->createVirtualRegister(TRC);
+ AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::t2LEApcrelJT),NewVReg3)
+ .addJumpTableIndex(MJTI));
+
+ unsigned NewVReg4 = MRI->createVirtualRegister(TRC);
+ AddDefaultCC(
+ AddDefaultPred(
+ BuildMI(DispContBB, dl, TII->get(ARM::t2ADDrs), NewVReg4)
+ .addReg(NewVReg3, RegState::Kill)
+ .addReg(NewVReg1)
+ .addImm(ARM_AM::getSORegOpc(ARM_AM::lsl, 2))));
+
+ BuildMI(DispContBB, dl, TII->get(ARM::t2BR_JT))
+ .addReg(NewVReg4, RegState::Kill)
+ .addReg(NewVReg1)
+ .addJumpTableIndex(MJTI);
+ } else if (Subtarget->isThumb()) {
+ unsigned NewVReg1 = MRI->createVirtualRegister(TRC);
+ AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::tLDRspi), NewVReg1)
+ .addFrameIndex(FI)
+ .addImm(1)
+ .addMemOperand(FIMMOLd));
+
+ if (NumLPads < 256) {
+ AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::tCMPi8))
+ .addReg(NewVReg1)
+ .addImm(NumLPads));
+ } else {
+ MachineConstantPool *ConstantPool = MF->getConstantPool();
+ Type *Int32Ty = Type::getInt32Ty(MF->getFunction()->getContext());
+ const Constant *C = ConstantInt::get(Int32Ty, NumLPads);
+
+ // MachineConstantPool wants an explicit alignment.
+ unsigned Align = MF->getDataLayout().getPrefTypeAlignment(Int32Ty);
+ if (Align == 0)
+ Align = MF->getDataLayout().getTypeAllocSize(C->getType());
+ unsigned Idx = ConstantPool->getConstantPoolIndex(C, Align);
+
+ unsigned VReg1 = MRI->createVirtualRegister(TRC);
+ AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::tLDRpci))
+ .addReg(VReg1, RegState::Define)
+ .addConstantPoolIndex(Idx));
+ AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::tCMPr))
+ .addReg(NewVReg1)
+ .addReg(VReg1));
+ }
+
+ BuildMI(DispatchBB, dl, TII->get(ARM::tBcc))
+ .addMBB(TrapBB)
+ .addImm(ARMCC::HI)
+ .addReg(ARM::CPSR);
+
+ unsigned NewVReg2 = MRI->createVirtualRegister(TRC);
+ AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::tLSLri), NewVReg2)
+ .addReg(ARM::CPSR, RegState::Define)
+ .addReg(NewVReg1)
+ .addImm(2));
+
+ unsigned NewVReg3 = MRI->createVirtualRegister(TRC);
+ AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::tLEApcrelJT), NewVReg3)
+ .addJumpTableIndex(MJTI));
+
+ unsigned NewVReg4 = MRI->createVirtualRegister(TRC);
+ AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::tADDrr), NewVReg4)
+ .addReg(ARM::CPSR, RegState::Define)
+ .addReg(NewVReg2, RegState::Kill)
+ .addReg(NewVReg3));
+
+ MachineMemOperand *JTMMOLd = MF->getMachineMemOperand(
+ MachinePointerInfo::getJumpTable(*MF), MachineMemOperand::MOLoad, 4, 4);
+
+ unsigned NewVReg5 = MRI->createVirtualRegister(TRC);
+ AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::tLDRi), NewVReg5)
+ .addReg(NewVReg4, RegState::Kill)
+ .addImm(0)
+ .addMemOperand(JTMMOLd));
+
+ unsigned NewVReg6 = NewVReg5;
+ if (RelocM == Reloc::PIC_) {
+ NewVReg6 = MRI->createVirtualRegister(TRC);
+ AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::tADDrr), NewVReg6)
+ .addReg(ARM::CPSR, RegState::Define)
+ .addReg(NewVReg5, RegState::Kill)
+ .addReg(NewVReg3));
+ }
+
+ BuildMI(DispContBB, dl, TII->get(ARM::tBR_JTr))
+ .addReg(NewVReg6, RegState::Kill)
+ .addJumpTableIndex(MJTI);
+ } else {
+ unsigned NewVReg1 = MRI->createVirtualRegister(TRC);
+ AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::LDRi12), NewVReg1)
+ .addFrameIndex(FI)
+ .addImm(4)
+ .addMemOperand(FIMMOLd));
+
+ if (NumLPads < 256) {
+ AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::CMPri))
+ .addReg(NewVReg1)
+ .addImm(NumLPads));
+ } else if (Subtarget->hasV6T2Ops() && isUInt<16>(NumLPads)) {
+ unsigned VReg1 = MRI->createVirtualRegister(TRC);
+ AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::MOVi16), VReg1)
+ .addImm(NumLPads & 0xFFFF));
+
+ unsigned VReg2 = VReg1;
+ if ((NumLPads & 0xFFFF0000) != 0) {
+ VReg2 = MRI->createVirtualRegister(TRC);
+ AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::MOVTi16), VReg2)
+ .addReg(VReg1)
+ .addImm(NumLPads >> 16));
+ }
+
+ AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::CMPrr))
+ .addReg(NewVReg1)
+ .addReg(VReg2));
+ } else {
+ MachineConstantPool *ConstantPool = MF->getConstantPool();
+ Type *Int32Ty = Type::getInt32Ty(MF->getFunction()->getContext());
+ const Constant *C = ConstantInt::get(Int32Ty, NumLPads);
+
+ // MachineConstantPool wants an explicit alignment.
+ unsigned Align = MF->getDataLayout().getPrefTypeAlignment(Int32Ty);
+ if (Align == 0)
+ Align = MF->getDataLayout().getTypeAllocSize(C->getType());
+ unsigned Idx = ConstantPool->getConstantPoolIndex(C, Align);
+
+ unsigned VReg1 = MRI->createVirtualRegister(TRC);
+ AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::LDRcp))
+ .addReg(VReg1, RegState::Define)
+ .addConstantPoolIndex(Idx)
+ .addImm(0));
+ AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::CMPrr))
+ .addReg(NewVReg1)
+ .addReg(VReg1, RegState::Kill));
+ }
+
+ BuildMI(DispatchBB, dl, TII->get(ARM::Bcc))
+ .addMBB(TrapBB)
+ .addImm(ARMCC::HI)
+ .addReg(ARM::CPSR);
+
+ unsigned NewVReg3 = MRI->createVirtualRegister(TRC);
+ AddDefaultCC(
+ AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::MOVsi), NewVReg3)
+ .addReg(NewVReg1)
+ .addImm(ARM_AM::getSORegOpc(ARM_AM::lsl, 2))));
+ unsigned NewVReg4 = MRI->createVirtualRegister(TRC);
+ AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::LEApcrelJT), NewVReg4)
+ .addJumpTableIndex(MJTI));
+
+ MachineMemOperand *JTMMOLd = MF->getMachineMemOperand(
+ MachinePointerInfo::getJumpTable(*MF), MachineMemOperand::MOLoad, 4, 4);
+ unsigned NewVReg5 = MRI->createVirtualRegister(TRC);
+ AddDefaultPred(
+ BuildMI(DispContBB, dl, TII->get(ARM::LDRrs), NewVReg5)
+ .addReg(NewVReg3, RegState::Kill)
+ .addReg(NewVReg4)
+ .addImm(0)
+ .addMemOperand(JTMMOLd));
+
+ if (RelocM == Reloc::PIC_) {
+ BuildMI(DispContBB, dl, TII->get(ARM::BR_JTadd))
+ .addReg(NewVReg5, RegState::Kill)
+ .addReg(NewVReg4)
+ .addJumpTableIndex(MJTI);
+ } else {
+ BuildMI(DispContBB, dl, TII->get(ARM::BR_JTr))
+ .addReg(NewVReg5, RegState::Kill)
+ .addJumpTableIndex(MJTI);
+ }
+ }
+
+ // Add the jump table entries as successors to the MBB.
+ SmallPtrSet<MachineBasicBlock*, 8> SeenMBBs;
+ for (std::vector<MachineBasicBlock*>::iterator
+ I = LPadList.begin(), E = LPadList.end(); I != E; ++I) {
+ MachineBasicBlock *CurMBB = *I;
+ if (SeenMBBs.insert(CurMBB).second)
+ DispContBB->addSuccessor(CurMBB);
+ }
+
+ // N.B. the order the invoke BBs are processed in doesn't matter here.
+ const MCPhysReg *SavedRegs = RI.getCalleeSavedRegs(MF);
+ SmallVector<MachineBasicBlock*, 64> MBBLPads;
+ for (MachineBasicBlock *BB : InvokeBBs) {
+
+ // Remove the landing pad successor from the invoke block and replace it
+ // with the new dispatch block.
+ SmallVector<MachineBasicBlock*, 4> Successors(BB->succ_begin(),
+ BB->succ_end());
+ while (!Successors.empty()) {
+ MachineBasicBlock *SMBB = Successors.pop_back_val();
+ if (SMBB->isEHPad()) {
+ BB->removeSuccessor(SMBB);
+ MBBLPads.push_back(SMBB);
+ }
+ }
+
+ BB->addSuccessor(DispatchBB, BranchProbability::getZero());
+ BB->normalizeSuccProbs();
+
+ // Find the invoke call and mark all of the callee-saved registers as
+ // 'implicit defined' so that they're spilled. This prevents code from
+ // moving instructions to before the EH block, where they will never be
+ // executed.
+ for (MachineBasicBlock::reverse_iterator
+ II = BB->rbegin(), IE = BB->rend(); II != IE; ++II) {
+ if (!II->isCall()) continue;
+
+ DenseMap<unsigned, bool> DefRegs;
+ for (MachineInstr::mop_iterator
+ OI = II->operands_begin(), OE = II->operands_end();
+ OI != OE; ++OI) {
+ if (!OI->isReg()) continue;
+ DefRegs[OI->getReg()] = true;
+ }
+
+ MachineInstrBuilder MIB(*MF, &*II);
+
+ for (unsigned i = 0; SavedRegs[i] != 0; ++i) {
+ unsigned Reg = SavedRegs[i];
+ if (Subtarget->isThumb2() &&
+ !ARM::tGPRRegClass.contains(Reg) &&
+ !ARM::hGPRRegClass.contains(Reg))
+ continue;
+ if (Subtarget->isThumb1Only() && !ARM::tGPRRegClass.contains(Reg))
+ continue;
+ if (!Subtarget->isThumb() && !ARM::GPRRegClass.contains(Reg))
+ continue;
+ if (!DefRegs[Reg])
+ MIB.addReg(Reg, RegState::ImplicitDefine | RegState::Dead);
+ }
+
+ break;
+ }
+ }
+
+ // Mark all former landing pads as non-landing pads. The dispatch is the only
+ // landing pad now.
+ for (SmallVectorImpl<MachineBasicBlock*>::iterator
+ I = MBBLPads.begin(), E = MBBLPads.end(); I != E; ++I)
+ (*I)->setIsEHPad(false);
+
+ // The instruction is gone now.
+ MI->eraseFromParent();
+}
+
+static
+MachineBasicBlock *OtherSucc(MachineBasicBlock *MBB, MachineBasicBlock *Succ) {
+ for (MachineBasicBlock::succ_iterator I = MBB->succ_begin(),
+ E = MBB->succ_end(); I != E; ++I)
+ if (*I != Succ)
+ return *I;
+ llvm_unreachable("Expecting a BB with two successors!");
+}
+
+/// Return the load opcode for a given load size. If load size >= 8,
+/// neon opcode will be returned.
+static unsigned getLdOpcode(unsigned LdSize, bool IsThumb1, bool IsThumb2) {
+ if (LdSize >= 8)
+ return LdSize == 16 ? ARM::VLD1q32wb_fixed
+ : LdSize == 8 ? ARM::VLD1d32wb_fixed : 0;
+ if (IsThumb1)
+ return LdSize == 4 ? ARM::tLDRi
+ : LdSize == 2 ? ARM::tLDRHi
+ : LdSize == 1 ? ARM::tLDRBi : 0;
+ if (IsThumb2)
+ return LdSize == 4 ? ARM::t2LDR_POST
+ : LdSize == 2 ? ARM::t2LDRH_POST
+ : LdSize == 1 ? ARM::t2LDRB_POST : 0;
+ return LdSize == 4 ? ARM::LDR_POST_IMM
+ : LdSize == 2 ? ARM::LDRH_POST
+ : LdSize == 1 ? ARM::LDRB_POST_IMM : 0;
+}
+
+/// Return the store opcode for a given store size. If store size >= 8,
+/// neon opcode will be returned.
+static unsigned getStOpcode(unsigned StSize, bool IsThumb1, bool IsThumb2) {
+ if (StSize >= 8)
+ return StSize == 16 ? ARM::VST1q32wb_fixed
+ : StSize == 8 ? ARM::VST1d32wb_fixed : 0;
+ if (IsThumb1)
+ return StSize == 4 ? ARM::tSTRi
+ : StSize == 2 ? ARM::tSTRHi
+ : StSize == 1 ? ARM::tSTRBi : 0;
+ if (IsThumb2)
+ return StSize == 4 ? ARM::t2STR_POST
+ : StSize == 2 ? ARM::t2STRH_POST
+ : StSize == 1 ? ARM::t2STRB_POST : 0;
+ return StSize == 4 ? ARM::STR_POST_IMM
+ : StSize == 2 ? ARM::STRH_POST
+ : StSize == 1 ? ARM::STRB_POST_IMM : 0;
+}
+
+/// Emit a post-increment load operation with given size. The instructions
+/// will be added to BB at Pos.
+static void emitPostLd(MachineBasicBlock *BB, MachineInstr *Pos,
+ const TargetInstrInfo *TII, DebugLoc dl,
+ unsigned LdSize, unsigned Data, unsigned AddrIn,
+ unsigned AddrOut, bool IsThumb1, bool IsThumb2) {
+ unsigned LdOpc = getLdOpcode(LdSize, IsThumb1, IsThumb2);
+ assert(LdOpc != 0 && "Should have a load opcode");
+ if (LdSize >= 8) {
+ AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data)
+ .addReg(AddrOut, RegState::Define).addReg(AddrIn)
+ .addImm(0));
+ } else if (IsThumb1) {
+ // load + update AddrIn
+ AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data)
+ .addReg(AddrIn).addImm(0));
+ MachineInstrBuilder MIB =
+ BuildMI(*BB, Pos, dl, TII->get(ARM::tADDi8), AddrOut);
+ MIB = AddDefaultT1CC(MIB);
+ MIB.addReg(AddrIn).addImm(LdSize);
+ AddDefaultPred(MIB);
+ } else if (IsThumb2) {
+ AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data)
+ .addReg(AddrOut, RegState::Define).addReg(AddrIn)
+ .addImm(LdSize));
+ } else { // arm
+ AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data)
+ .addReg(AddrOut, RegState::Define).addReg(AddrIn)
+ .addReg(0).addImm(LdSize));
+ }
+}
+
+/// Emit a post-increment store operation with given size. The instructions
+/// will be added to BB at Pos.
+static void emitPostSt(MachineBasicBlock *BB, MachineInstr *Pos,
+ const TargetInstrInfo *TII, DebugLoc dl,
+ unsigned StSize, unsigned Data, unsigned AddrIn,
+ unsigned AddrOut, bool IsThumb1, bool IsThumb2) {
+ unsigned StOpc = getStOpcode(StSize, IsThumb1, IsThumb2);
+ assert(StOpc != 0 && "Should have a store opcode");
+ if (StSize >= 8) {
+ AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(StOpc), AddrOut)
+ .addReg(AddrIn).addImm(0).addReg(Data));
+ } else if (IsThumb1) {
+ // store + update AddrIn
+ AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(StOpc)).addReg(Data)
+ .addReg(AddrIn).addImm(0));
+ MachineInstrBuilder MIB =
+ BuildMI(*BB, Pos, dl, TII->get(ARM::tADDi8), AddrOut);
+ MIB = AddDefaultT1CC(MIB);
+ MIB.addReg(AddrIn).addImm(StSize);
+ AddDefaultPred(MIB);
+ } else if (IsThumb2) {
+ AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(StOpc), AddrOut)
+ .addReg(Data).addReg(AddrIn).addImm(StSize));
+ } else { // arm
+ AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(StOpc), AddrOut)
+ .addReg(Data).addReg(AddrIn).addReg(0)
+ .addImm(StSize));
+ }
+}
+
+MachineBasicBlock *
+ARMTargetLowering::EmitStructByval(MachineInstr *MI,
+ MachineBasicBlock *BB) const {
+ // This pseudo instruction has 3 operands: dst, src, size
+ // We expand it to a loop if size > Subtarget->getMaxInlineSizeThreshold().
+ // Otherwise, we will generate unrolled scalar copies.
+ const TargetInstrInfo *TII = Subtarget->getInstrInfo();
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+ MachineFunction::iterator It = ++BB->getIterator();
+
+ unsigned dest = MI->getOperand(0).getReg();
+ unsigned src = MI->getOperand(1).getReg();
+ unsigned SizeVal = MI->getOperand(2).getImm();
+ unsigned Align = MI->getOperand(3).getImm();
+ DebugLoc dl = MI->getDebugLoc();
+
+ MachineFunction *MF = BB->getParent();
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+ unsigned UnitSize = 0;
+ const TargetRegisterClass *TRC = nullptr;
+ const TargetRegisterClass *VecTRC = nullptr;
+
+ bool IsThumb1 = Subtarget->isThumb1Only();
+ bool IsThumb2 = Subtarget->isThumb2();
+
+ if (Align & 1) {
+ UnitSize = 1;
+ } else if (Align & 2) {
+ UnitSize = 2;
+ } else {
+ // Check whether we can use NEON instructions.
+ if (!MF->getFunction()->hasFnAttribute(Attribute::NoImplicitFloat) &&
+ Subtarget->hasNEON()) {
+ if ((Align % 16 == 0) && SizeVal >= 16)
+ UnitSize = 16;
+ else if ((Align % 8 == 0) && SizeVal >= 8)
+ UnitSize = 8;
+ }
+ // Can't use NEON instructions.
+ if (UnitSize == 0)
+ UnitSize = 4;
+ }
+
+ // Select the correct opcode and register class for unit size load/store
+ bool IsNeon = UnitSize >= 8;
+ TRC = (IsThumb1 || IsThumb2) ? &ARM::tGPRRegClass : &ARM::GPRRegClass;
+ if (IsNeon)
+ VecTRC = UnitSize == 16 ? &ARM::DPairRegClass
+ : UnitSize == 8 ? &ARM::DPRRegClass
+ : nullptr;
+
+ unsigned BytesLeft = SizeVal % UnitSize;
+ unsigned LoopSize = SizeVal - BytesLeft;
+
+ if (SizeVal <= Subtarget->getMaxInlineSizeThreshold()) {
+ // Use LDR and STR to copy.
+ // [scratch, srcOut] = LDR_POST(srcIn, UnitSize)
+ // [destOut] = STR_POST(scratch, destIn, UnitSize)
+ unsigned srcIn = src;
+ unsigned destIn = dest;
+ for (unsigned i = 0; i < LoopSize; i+=UnitSize) {
+ unsigned srcOut = MRI.createVirtualRegister(TRC);
+ unsigned destOut = MRI.createVirtualRegister(TRC);
+ unsigned scratch = MRI.createVirtualRegister(IsNeon ? VecTRC : TRC);
+ emitPostLd(BB, MI, TII, dl, UnitSize, scratch, srcIn, srcOut,
+ IsThumb1, IsThumb2);
+ emitPostSt(BB, MI, TII, dl, UnitSize, scratch, destIn, destOut,
+ IsThumb1, IsThumb2);
+ srcIn = srcOut;
+ destIn = destOut;
+ }
+
+ // Handle the leftover bytes with LDRB and STRB.
+ // [scratch, srcOut] = LDRB_POST(srcIn, 1)
+ // [destOut] = STRB_POST(scratch, destIn, 1)
+ for (unsigned i = 0; i < BytesLeft; i++) {
+ unsigned srcOut = MRI.createVirtualRegister(TRC);
+ unsigned destOut = MRI.createVirtualRegister(TRC);
+ unsigned scratch = MRI.createVirtualRegister(TRC);
+ emitPostLd(BB, MI, TII, dl, 1, scratch, srcIn, srcOut,
+ IsThumb1, IsThumb2);
+ emitPostSt(BB, MI, TII, dl, 1, scratch, destIn, destOut,
+ IsThumb1, IsThumb2);
+ srcIn = srcOut;
+ destIn = destOut;
+ }
+ MI->eraseFromParent(); // The instruction is gone now.
+ return BB;
+ }
+
+ // Expand the pseudo op to a loop.
+ // thisMBB:
+ // ...
+ // movw varEnd, # --> with thumb2
+ // movt varEnd, #
+ // ldrcp varEnd, idx --> without thumb2
+ // fallthrough --> loopMBB
+ // loopMBB:
+ // PHI varPhi, varEnd, varLoop
+ // PHI srcPhi, src, srcLoop
+ // PHI destPhi, dst, destLoop
+ // [scratch, srcLoop] = LDR_POST(srcPhi, UnitSize)
+ // [destLoop] = STR_POST(scratch, destPhi, UnitSize)
+ // subs varLoop, varPhi, #UnitSize
+ // bne loopMBB
+ // fallthrough --> exitMBB
+ // exitMBB:
+ // epilogue to handle left-over bytes
+ // [scratch, srcOut] = LDRB_POST(srcLoop, 1)
+ // [destOut] = STRB_POST(scratch, destLoop, 1)
+ MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
+ MF->insert(It, loopMBB);
+ MF->insert(It, exitMBB);
+
+ // Transfer the remainder of BB and its successor edges to exitMBB.
+ exitMBB->splice(exitMBB->begin(), BB,
+ std::next(MachineBasicBlock::iterator(MI)), BB->end());
+ exitMBB->transferSuccessorsAndUpdatePHIs(BB);
+
+ // Load an immediate to varEnd.
+ unsigned varEnd = MRI.createVirtualRegister(TRC);
+ if (Subtarget->useMovt(*MF)) {
+ unsigned Vtmp = varEnd;
+ if ((LoopSize & 0xFFFF0000) != 0)
+ Vtmp = MRI.createVirtualRegister(TRC);
+ AddDefaultPred(BuildMI(BB, dl,
+ TII->get(IsThumb2 ? ARM::t2MOVi16 : ARM::MOVi16),
+ Vtmp).addImm(LoopSize & 0xFFFF));
+
+ if ((LoopSize & 0xFFFF0000) != 0)
+ AddDefaultPred(BuildMI(BB, dl,
+ TII->get(IsThumb2 ? ARM::t2MOVTi16 : ARM::MOVTi16),
+ varEnd)
+ .addReg(Vtmp)
+ .addImm(LoopSize >> 16));
+ } else {
+ MachineConstantPool *ConstantPool = MF->getConstantPool();
+ Type *Int32Ty = Type::getInt32Ty(MF->getFunction()->getContext());
+ const Constant *C = ConstantInt::get(Int32Ty, LoopSize);
+
+ // MachineConstantPool wants an explicit alignment.
+ unsigned Align = MF->getDataLayout().getPrefTypeAlignment(Int32Ty);
+ if (Align == 0)
+ Align = MF->getDataLayout().getTypeAllocSize(C->getType());
+ unsigned Idx = ConstantPool->getConstantPoolIndex(C, Align);
+
+ if (IsThumb1)
+ AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(ARM::tLDRpci)).addReg(
+ varEnd, RegState::Define).addConstantPoolIndex(Idx));
+ else
+ AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(ARM::LDRcp)).addReg(
+ varEnd, RegState::Define).addConstantPoolIndex(Idx).addImm(0));
+ }
+ BB->addSuccessor(loopMBB);
+
+ // Generate the loop body:
+ // varPhi = PHI(varLoop, varEnd)
+ // srcPhi = PHI(srcLoop, src)
+ // destPhi = PHI(destLoop, dst)
+ MachineBasicBlock *entryBB = BB;
+ BB = loopMBB;
+ unsigned varLoop = MRI.createVirtualRegister(TRC);
+ unsigned varPhi = MRI.createVirtualRegister(TRC);
+ unsigned srcLoop = MRI.createVirtualRegister(TRC);
+ unsigned srcPhi = MRI.createVirtualRegister(TRC);
+ unsigned destLoop = MRI.createVirtualRegister(TRC);
+ unsigned destPhi = MRI.createVirtualRegister(TRC);
+
+ BuildMI(*BB, BB->begin(), dl, TII->get(ARM::PHI), varPhi)
+ .addReg(varLoop).addMBB(loopMBB)
+ .addReg(varEnd).addMBB(entryBB);
+ BuildMI(BB, dl, TII->get(ARM::PHI), srcPhi)
+ .addReg(srcLoop).addMBB(loopMBB)
+ .addReg(src).addMBB(entryBB);
+ BuildMI(BB, dl, TII->get(ARM::PHI), destPhi)
+ .addReg(destLoop).addMBB(loopMBB)
+ .addReg(dest).addMBB(entryBB);
+
+ // [scratch, srcLoop] = LDR_POST(srcPhi, UnitSize)
+ // [destLoop] = STR_POST(scratch, destPhi, UnitSiz)
+ unsigned scratch = MRI.createVirtualRegister(IsNeon ? VecTRC : TRC);
+ emitPostLd(BB, BB->end(), TII, dl, UnitSize, scratch, srcPhi, srcLoop,
+ IsThumb1, IsThumb2);
+ emitPostSt(BB, BB->end(), TII, dl, UnitSize, scratch, destPhi, destLoop,
+ IsThumb1, IsThumb2);
+
+ // Decrement loop variable by UnitSize.
+ if (IsThumb1) {
+ MachineInstrBuilder MIB =
+ BuildMI(*BB, BB->end(), dl, TII->get(ARM::tSUBi8), varLoop);
+ MIB = AddDefaultT1CC(MIB);
+ MIB.addReg(varPhi).addImm(UnitSize);
+ AddDefaultPred(MIB);
+ } else {
+ MachineInstrBuilder MIB =
+ BuildMI(*BB, BB->end(), dl,
+ TII->get(IsThumb2 ? ARM::t2SUBri : ARM::SUBri), varLoop);
+ AddDefaultCC(AddDefaultPred(MIB.addReg(varPhi).addImm(UnitSize)));
+ MIB->getOperand(5).setReg(ARM::CPSR);
+ MIB->getOperand(5).setIsDef(true);
+ }
+ BuildMI(*BB, BB->end(), dl,
+ TII->get(IsThumb1 ? ARM::tBcc : IsThumb2 ? ARM::t2Bcc : ARM::Bcc))
+ .addMBB(loopMBB).addImm(ARMCC::NE).addReg(ARM::CPSR);
+
+ // loopMBB can loop back to loopMBB or fall through to exitMBB.
+ BB->addSuccessor(loopMBB);
+ BB->addSuccessor(exitMBB);
+
+ // Add epilogue to handle BytesLeft.
+ BB = exitMBB;
+ MachineInstr *StartOfExit = exitMBB->begin();
+
+ // [scratch, srcOut] = LDRB_POST(srcLoop, 1)
+ // [destOut] = STRB_POST(scratch, destLoop, 1)
+ unsigned srcIn = srcLoop;
+ unsigned destIn = destLoop;
+ for (unsigned i = 0; i < BytesLeft; i++) {
+ unsigned srcOut = MRI.createVirtualRegister(TRC);
+ unsigned destOut = MRI.createVirtualRegister(TRC);
+ unsigned scratch = MRI.createVirtualRegister(TRC);
+ emitPostLd(BB, StartOfExit, TII, dl, 1, scratch, srcIn, srcOut,
+ IsThumb1, IsThumb2);
+ emitPostSt(BB, StartOfExit, TII, dl, 1, scratch, destIn, destOut,
+ IsThumb1, IsThumb2);
+ srcIn = srcOut;
+ destIn = destOut;
+ }
+
+ MI->eraseFromParent(); // The instruction is gone now.
+ return BB;
+}
+
+MachineBasicBlock *
+ARMTargetLowering::EmitLowered__chkstk(MachineInstr *MI,
+ MachineBasicBlock *MBB) const {
+ const TargetMachine &TM = getTargetMachine();
+ const TargetInstrInfo &TII = *Subtarget->getInstrInfo();
+ DebugLoc DL = MI->getDebugLoc();
+
+ assert(Subtarget->isTargetWindows() &&
+ "__chkstk is only supported on Windows");
+ assert(Subtarget->isThumb2() && "Windows on ARM requires Thumb-2 mode");
+
+ // __chkstk takes the number of words to allocate on the stack in R4, and
+ // returns the stack adjustment in number of bytes in R4. This will not
+ // clober any other registers (other than the obvious lr).
+ //
+ // Although, technically, IP should be considered a register which may be
+ // clobbered, the call itself will not touch it. Windows on ARM is a pure
+ // thumb-2 environment, so there is no interworking required. As a result, we
+ // do not expect a veneer to be emitted by the linker, clobbering IP.
+ //
+ // Each module receives its own copy of __chkstk, so no import thunk is
+ // required, again, ensuring that IP is not clobbered.
+ //
+ // Finally, although some linkers may theoretically provide a trampoline for
+ // out of range calls (which is quite common due to a 32M range limitation of
+ // branches for Thumb), we can generate the long-call version via
+ // -mcmodel=large, alleviating the need for the trampoline which may clobber
+ // IP.
+
+ switch (TM.getCodeModel()) {
+ case CodeModel::Small:
+ case CodeModel::Medium:
+ case CodeModel::Default:
+ case CodeModel::Kernel:
+ BuildMI(*MBB, MI, DL, TII.get(ARM::tBL))
+ .addImm((unsigned)ARMCC::AL).addReg(0)
+ .addExternalSymbol("__chkstk")
+ .addReg(ARM::R4, RegState::Implicit | RegState::Kill)
+ .addReg(ARM::R4, RegState::Implicit | RegState::Define)
+ .addReg(ARM::R12, RegState::Implicit | RegState::Define | RegState::Dead);
+ break;
+ case CodeModel::Large:
+ case CodeModel::JITDefault: {
+ MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
+ unsigned Reg = MRI.createVirtualRegister(&ARM::rGPRRegClass);
+
+ BuildMI(*MBB, MI, DL, TII.get(ARM::t2MOVi32imm), Reg)
+ .addExternalSymbol("__chkstk");
+ BuildMI(*MBB, MI, DL, TII.get(ARM::tBLXr))
+ .addImm((unsigned)ARMCC::AL).addReg(0)
+ .addReg(Reg, RegState::Kill)
+ .addReg(ARM::R4, RegState::Implicit | RegState::Kill)
+ .addReg(ARM::R4, RegState::Implicit | RegState::Define)
+ .addReg(ARM::R12, RegState::Implicit | RegState::Define | RegState::Dead);
+ break;
+ }
+ }
+
+ AddDefaultCC(AddDefaultPred(BuildMI(*MBB, MI, DL, TII.get(ARM::t2SUBrr),
+ ARM::SP)
+ .addReg(ARM::SP).addReg(ARM::R4)));
+
+ MI->eraseFromParent();
+ return MBB;
+}
+
+MachineBasicBlock *
+ARMTargetLowering::EmitLowered__dbzchk(MachineInstr *MI,
+ MachineBasicBlock *MBB) const {
+ DebugLoc DL = MI->getDebugLoc();
+ MachineFunction *MF = MBB->getParent();
+ const TargetInstrInfo *TII = Subtarget->getInstrInfo();
+
+ MachineBasicBlock *ContBB = MF->CreateMachineBasicBlock();
+ MF->push_back(ContBB);
+ ContBB->splice(ContBB->begin(), MBB,
+ std::next(MachineBasicBlock::iterator(MI)), MBB->end());
+ MBB->addSuccessor(ContBB);
+
+ MachineBasicBlock *TrapBB = MF->CreateMachineBasicBlock();
+ MF->push_back(TrapBB);
+ BuildMI(TrapBB, DL, TII->get(ARM::t2UDF)).addImm(249);
+ MBB->addSuccessor(TrapBB);
+
+ BuildMI(*MBB, MI, DL, TII->get(ARM::tCBZ))
+ .addReg(MI->getOperand(0).getReg())
+ .addMBB(TrapBB);
+
+ MI->eraseFromParent();
+ return ContBB;
+}
+
+MachineBasicBlock *
+ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
+ MachineBasicBlock *BB) const {
+ const TargetInstrInfo *TII = Subtarget->getInstrInfo();
+ DebugLoc dl = MI->getDebugLoc();
+ bool isThumb2 = Subtarget->isThumb2();
+ switch (MI->getOpcode()) {
+ default: {
+ MI->dump();
+ llvm_unreachable("Unexpected instr type to insert");
+ }
+ // The Thumb2 pre-indexed stores have the same MI operands, they just
+ // define them differently in the .td files from the isel patterns, so
+ // they need pseudos.
+ case ARM::t2STR_preidx:
+ MI->setDesc(TII->get(ARM::t2STR_PRE));
+ return BB;
+ case ARM::t2STRB_preidx:
+ MI->setDesc(TII->get(ARM::t2STRB_PRE));
+ return BB;
+ case ARM::t2STRH_preidx:
+ MI->setDesc(TII->get(ARM::t2STRH_PRE));
+ return BB;
+
+ case ARM::STRi_preidx:
+ case ARM::STRBi_preidx: {
+ unsigned NewOpc = MI->getOpcode() == ARM::STRi_preidx ?
+ ARM::STR_PRE_IMM : ARM::STRB_PRE_IMM;
+ // Decode the offset.
+ unsigned Offset = MI->getOperand(4).getImm();
+ bool isSub = ARM_AM::getAM2Op(Offset) == ARM_AM::sub;
+ Offset = ARM_AM::getAM2Offset(Offset);
+ if (isSub)
+ Offset = -Offset;
+
+ MachineMemOperand *MMO = *MI->memoperands_begin();
+ BuildMI(*BB, MI, dl, TII->get(NewOpc))
+ .addOperand(MI->getOperand(0)) // Rn_wb
+ .addOperand(MI->getOperand(1)) // Rt
+ .addOperand(MI->getOperand(2)) // Rn
+ .addImm(Offset) // offset (skip GPR==zero_reg)
+ .addOperand(MI->getOperand(5)) // pred
+ .addOperand(MI->getOperand(6))
+ .addMemOperand(MMO);
+ MI->eraseFromParent();
+ return BB;
+ }
+ case ARM::STRr_preidx:
+ case ARM::STRBr_preidx:
+ case ARM::STRH_preidx: {
+ unsigned NewOpc;
+ switch (MI->getOpcode()) {
+ default: llvm_unreachable("unexpected opcode!");
+ case ARM::STRr_preidx: NewOpc = ARM::STR_PRE_REG; break;
+ case ARM::STRBr_preidx: NewOpc = ARM::STRB_PRE_REG; break;
+ case ARM::STRH_preidx: NewOpc = ARM::STRH_PRE; break;
+ }
+ MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(NewOpc));
+ for (unsigned i = 0; i < MI->getNumOperands(); ++i)
+ MIB.addOperand(MI->getOperand(i));
+ MI->eraseFromParent();
+ return BB;
+ }
+
+ case ARM::tMOVCCr_pseudo: {
+ // To "insert" a SELECT_CC instruction, we actually have to insert the
+ // diamond control-flow pattern. The incoming instruction knows the
+ // destination vreg to set, the condition code register to branch on, the
+ // true/false values to select between, and a branch opcode to use.
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+ MachineFunction::iterator It = ++BB->getIterator();
+
+ // thisMBB:
+ // ...
+ // TrueVal = ...
+ // cmpTY ccX, r1, r2
+ // bCC copy1MBB
+ // fallthrough --> copy0MBB
+ MachineBasicBlock *thisMBB = BB;
+ MachineFunction *F = BB->getParent();
+ MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
+ F->insert(It, copy0MBB);
+ F->insert(It, sinkMBB);
+
+ // Transfer the remainder of BB and its successor edges to sinkMBB.
+ sinkMBB->splice(sinkMBB->begin(), BB,
+ std::next(MachineBasicBlock::iterator(MI)), BB->end());
+ sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
+
+ BB->addSuccessor(copy0MBB);
+ BB->addSuccessor(sinkMBB);
+
+ BuildMI(BB, dl, TII->get(ARM::tBcc)).addMBB(sinkMBB)
+ .addImm(MI->getOperand(3).getImm()).addReg(MI->getOperand(4).getReg());
+
+ // copy0MBB:
+ // %FalseValue = ...
+ // # fallthrough to sinkMBB
+ BB = copy0MBB;
+
+ // Update machine-CFG edges
+ BB->addSuccessor(sinkMBB);
+
+ // sinkMBB:
+ // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
+ // ...
+ BB = sinkMBB;
+ BuildMI(*BB, BB->begin(), dl,
+ TII->get(ARM::PHI), MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB)
+ .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB);
+
+ MI->eraseFromParent(); // The pseudo instruction is gone now.
+ return BB;
+ }
+
+ case ARM::BCCi64:
+ case ARM::BCCZi64: {
+ // If there is an unconditional branch to the other successor, remove it.
+ BB->erase(std::next(MachineBasicBlock::iterator(MI)), BB->end());
+
+ // Compare both parts that make up the double comparison separately for
+ // equality.
+ bool RHSisZero = MI->getOpcode() == ARM::BCCZi64;
+
+ unsigned LHS1 = MI->getOperand(1).getReg();
+ unsigned LHS2 = MI->getOperand(2).getReg();
+ if (RHSisZero) {
+ AddDefaultPred(BuildMI(BB, dl,
+ TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
+ .addReg(LHS1).addImm(0));
+ BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
+ .addReg(LHS2).addImm(0)
+ .addImm(ARMCC::EQ).addReg(ARM::CPSR);
+ } else {
+ unsigned RHS1 = MI->getOperand(3).getReg();
+ unsigned RHS2 = MI->getOperand(4).getReg();
+ AddDefaultPred(BuildMI(BB, dl,
+ TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr))
+ .addReg(LHS1).addReg(RHS1));
+ BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr))
+ .addReg(LHS2).addReg(RHS2)
+ .addImm(ARMCC::EQ).addReg(ARM::CPSR);
+ }
+
+ MachineBasicBlock *destMBB = MI->getOperand(RHSisZero ? 3 : 5).getMBB();
+ MachineBasicBlock *exitMBB = OtherSucc(BB, destMBB);
+ if (MI->getOperand(0).getImm() == ARMCC::NE)
+ std::swap(destMBB, exitMBB);
+
+ BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
+ .addMBB(destMBB).addImm(ARMCC::EQ).addReg(ARM::CPSR);
+ if (isThumb2)
+ AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::t2B)).addMBB(exitMBB));
+ else
+ BuildMI(BB, dl, TII->get(ARM::B)) .addMBB(exitMBB);
+
+ MI->eraseFromParent(); // The pseudo instruction is gone now.
+ return BB;
+ }
+
+ case ARM::Int_eh_sjlj_setjmp:
+ case ARM::Int_eh_sjlj_setjmp_nofp:
+ case ARM::tInt_eh_sjlj_setjmp:
+ case ARM::t2Int_eh_sjlj_setjmp:
+ case ARM::t2Int_eh_sjlj_setjmp_nofp:
+ return BB;
+
+ case ARM::Int_eh_sjlj_setup_dispatch:
+ EmitSjLjDispatchBlock(MI, BB);
+ return BB;
+
+ case ARM::ABS:
+ case ARM::t2ABS: {
+ // To insert an ABS instruction, we have to insert the
+ // diamond control-flow pattern. The incoming instruction knows the
+ // source vreg to test against 0, the destination vreg to set,
+ // the condition code register to branch on, the
+ // true/false values to select between, and a branch opcode to use.
+ // It transforms
+ // V1 = ABS V0
+ // into
+ // V2 = MOVS V0
+ // BCC (branch to SinkBB if V0 >= 0)
+ // RSBBB: V3 = RSBri V2, 0 (compute ABS if V2 < 0)
+ // SinkBB: V1 = PHI(V2, V3)
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+ MachineFunction::iterator BBI = ++BB->getIterator();
+ MachineFunction *Fn = BB->getParent();
+ MachineBasicBlock *RSBBB = Fn->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *SinkBB = Fn->CreateMachineBasicBlock(LLVM_BB);
+ Fn->insert(BBI, RSBBB);
+ Fn->insert(BBI, SinkBB);
+
+ unsigned int ABSSrcReg = MI->getOperand(1).getReg();
+ unsigned int ABSDstReg = MI->getOperand(0).getReg();
+ bool ABSSrcKIll = MI->getOperand(1).isKill();
+ bool isThumb2 = Subtarget->isThumb2();
+ MachineRegisterInfo &MRI = Fn->getRegInfo();
+ // In Thumb mode S must not be specified if source register is the SP or
+ // PC and if destination register is the SP, so restrict register class
+ unsigned NewRsbDstReg =
+ MRI.createVirtualRegister(isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRRegClass);
+
+ // Transfer the remainder of BB and its successor edges to sinkMBB.
+ SinkBB->splice(SinkBB->begin(), BB,
+ std::next(MachineBasicBlock::iterator(MI)), BB->end());
+ SinkBB->transferSuccessorsAndUpdatePHIs(BB);
+
+ BB->addSuccessor(RSBBB);
+ BB->addSuccessor(SinkBB);
+
+ // fall through to SinkMBB
+ RSBBB->addSuccessor(SinkBB);
+
+ // insert a cmp at the end of BB
+ AddDefaultPred(BuildMI(BB, dl,
+ TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
+ .addReg(ABSSrcReg).addImm(0));
+
+ // insert a bcc with opposite CC to ARMCC::MI at the end of BB
+ BuildMI(BB, dl,
+ TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)).addMBB(SinkBB)
+ .addImm(ARMCC::getOppositeCondition(ARMCC::MI)).addReg(ARM::CPSR);
+
+ // insert rsbri in RSBBB
+ // Note: BCC and rsbri will be converted into predicated rsbmi
+ // by if-conversion pass
+ BuildMI(*RSBBB, RSBBB->begin(), dl,
+ TII->get(isThumb2 ? ARM::t2RSBri : ARM::RSBri), NewRsbDstReg)
+ .addReg(ABSSrcReg, ABSSrcKIll ? RegState::Kill : 0)
+ .addImm(0).addImm((unsigned)ARMCC::AL).addReg(0).addReg(0);
+
+ // insert PHI in SinkBB,
+ // reuse ABSDstReg to not change uses of ABS instruction
+ BuildMI(*SinkBB, SinkBB->begin(), dl,
+ TII->get(ARM::PHI), ABSDstReg)
+ .addReg(NewRsbDstReg).addMBB(RSBBB)
+ .addReg(ABSSrcReg).addMBB(BB);
+
+ // remove ABS instruction
+ MI->eraseFromParent();
+
+ // return last added BB
+ return SinkBB;
+ }
+ case ARM::COPY_STRUCT_BYVAL_I32:
+ ++NumLoopByVals;
+ return EmitStructByval(MI, BB);
+ case ARM::WIN__CHKSTK:
+ return EmitLowered__chkstk(MI, BB);
+ case ARM::WIN__DBZCHK:
+ return EmitLowered__dbzchk(MI, BB);
+ }
+}
+
+/// \brief Attaches vregs to MEMCPY that it will use as scratch registers
+/// when it is expanded into LDM/STM. This is done as a post-isel lowering
+/// instead of as a custom inserter because we need the use list from the SDNode.
+static void attachMEMCPYScratchRegs(const ARMSubtarget *Subtarget,
+ MachineInstr *MI, const SDNode *Node) {
+ bool isThumb1 = Subtarget->isThumb1Only();
+
+ DebugLoc DL = MI->getDebugLoc();
+ MachineFunction *MF = MI->getParent()->getParent();
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+ MachineInstrBuilder MIB(*MF, MI);
+
+ // If the new dst/src is unused mark it as dead.
+ if (!Node->hasAnyUseOfValue(0)) {
+ MI->getOperand(0).setIsDead(true);
+ }
+ if (!Node->hasAnyUseOfValue(1)) {
+ MI->getOperand(1).setIsDead(true);
+ }
+
+ // The MEMCPY both defines and kills the scratch registers.
+ for (unsigned I = 0; I != MI->getOperand(4).getImm(); ++I) {
+ unsigned TmpReg = MRI.createVirtualRegister(isThumb1 ? &ARM::tGPRRegClass
+ : &ARM::GPRRegClass);
+ MIB.addReg(TmpReg, RegState::Define|RegState::Dead);
+ }
+}
+
+void ARMTargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI,
+ SDNode *Node) const {
+ if (MI->getOpcode() == ARM::MEMCPY) {
+ attachMEMCPYScratchRegs(Subtarget, MI, Node);
+ return;
+ }
+
+ const MCInstrDesc *MCID = &MI->getDesc();
+ // Adjust potentially 's' setting instructions after isel, i.e. ADC, SBC, RSB,
+ // RSC. Coming out of isel, they have an implicit CPSR def, but the optional
+ // operand is still set to noreg. If needed, set the optional operand's
+ // register to CPSR, and remove the redundant implicit def.
+ //
+ // e.g. ADCS (..., CPSR<imp-def>) -> ADC (... opt:CPSR<def>).
+
+ // Rename pseudo opcodes.
+ unsigned NewOpc = convertAddSubFlagsOpcode(MI->getOpcode());
+ if (NewOpc) {
+ const ARMBaseInstrInfo *TII = Subtarget->getInstrInfo();
+ MCID = &TII->get(NewOpc);
+
+ assert(MCID->getNumOperands() == MI->getDesc().getNumOperands() + 1 &&
+ "converted opcode should be the same except for cc_out");
+
+ MI->setDesc(*MCID);
+
+ // Add the optional cc_out operand
+ MI->addOperand(MachineOperand::CreateReg(0, /*isDef=*/true));
+ }
+ unsigned ccOutIdx = MCID->getNumOperands() - 1;
+
+ // Any ARM instruction that sets the 's' bit should specify an optional
+ // "cc_out" operand in the last operand position.
+ if (!MI->hasOptionalDef() || !MCID->OpInfo[ccOutIdx].isOptionalDef()) {
+ assert(!NewOpc && "Optional cc_out operand required");
+ return;
+ }
+ // Look for an implicit def of CPSR added by MachineInstr ctor. Remove it
+ // since we already have an optional CPSR def.
+ bool definesCPSR = false;
+ bool deadCPSR = false;
+ for (unsigned i = MCID->getNumOperands(), e = MI->getNumOperands();
+ i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (MO.isReg() && MO.isDef() && MO.getReg() == ARM::CPSR) {
+ definesCPSR = true;
+ if (MO.isDead())
+ deadCPSR = true;
+ MI->RemoveOperand(i);
+ break;
+ }
+ }
+ if (!definesCPSR) {
+ assert(!NewOpc && "Optional cc_out operand required");
+ return;
+ }
+ assert(deadCPSR == !Node->hasAnyUseOfValue(1) && "inconsistent dead flag");
+ if (deadCPSR) {
+ assert(!MI->getOperand(ccOutIdx).getReg() &&
+ "expect uninitialized optional cc_out operand");
+ return;
+ }
+
+ // If this instruction was defined with an optional CPSR def and its dag node
+ // had a live implicit CPSR def, then activate the optional CPSR def.
+ MachineOperand &MO = MI->getOperand(ccOutIdx);
+ MO.setReg(ARM::CPSR);
+ MO.setIsDef(true);
+}
+
+//===----------------------------------------------------------------------===//
+// ARM Optimization Hooks
+//===----------------------------------------------------------------------===//
+
+// Helper function that checks if N is a null or all ones constant.
+static inline bool isZeroOrAllOnes(SDValue N, bool AllOnes) {
+ return AllOnes ? isAllOnesConstant(N) : isNullConstant(N);
+}
+
+// Return true if N is conditionally 0 or all ones.
+// Detects these expressions where cc is an i1 value:
+//
+// (select cc 0, y) [AllOnes=0]
+// (select cc y, 0) [AllOnes=0]
+// (zext cc) [AllOnes=0]
+// (sext cc) [AllOnes=0/1]
+// (select cc -1, y) [AllOnes=1]
+// (select cc y, -1) [AllOnes=1]
+//
+// Invert is set when N is the null/all ones constant when CC is false.
+// OtherOp is set to the alternative value of N.
+static bool isConditionalZeroOrAllOnes(SDNode *N, bool AllOnes,
+ SDValue &CC, bool &Invert,
+ SDValue &OtherOp,
+ SelectionDAG &DAG) {
+ switch (N->getOpcode()) {
+ default: return false;
+ case ISD::SELECT: {
+ CC = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDValue N2 = N->getOperand(2);
+ if (isZeroOrAllOnes(N1, AllOnes)) {
+ Invert = false;
+ OtherOp = N2;
+ return true;
+ }
+ if (isZeroOrAllOnes(N2, AllOnes)) {
+ Invert = true;
+ OtherOp = N1;
+ return true;
+ }
+ return false;
+ }
+ case ISD::ZERO_EXTEND:
+ // (zext cc) can never be the all ones value.
+ if (AllOnes)
+ return false;
+ // Fall through.
+ case ISD::SIGN_EXTEND: {
+ SDLoc dl(N);
+ EVT VT = N->getValueType(0);
+ CC = N->getOperand(0);
+ if (CC.getValueType() != MVT::i1)
+ return false;
+ Invert = !AllOnes;
+ if (AllOnes)
+ // When looking for an AllOnes constant, N is an sext, and the 'other'
+ // value is 0.
+ OtherOp = DAG.getConstant(0, dl, VT);
+ else if (N->getOpcode() == ISD::ZERO_EXTEND)
+ // When looking for a 0 constant, N can be zext or sext.
+ OtherOp = DAG.getConstant(1, dl, VT);
+ else
+ OtherOp = DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), dl,
+ VT);
+ return true;
+ }
+ }
+}
+
+// Combine a constant select operand into its use:
+//
+// (add (select cc, 0, c), x) -> (select cc, x, (add, x, c))
+// (sub x, (select cc, 0, c)) -> (select cc, x, (sub, x, c))
+// (and (select cc, -1, c), x) -> (select cc, x, (and, x, c)) [AllOnes=1]
+// (or (select cc, 0, c), x) -> (select cc, x, (or, x, c))
+// (xor (select cc, 0, c), x) -> (select cc, x, (xor, x, c))
+//
+// The transform is rejected if the select doesn't have a constant operand that
+// is null, or all ones when AllOnes is set.
+//
+// Also recognize sext/zext from i1:
+//
+// (add (zext cc), x) -> (select cc (add x, 1), x)
+// (add (sext cc), x) -> (select cc (add x, -1), x)
+//
+// These transformations eventually create predicated instructions.
+//
+// @param N The node to transform.
+// @param Slct The N operand that is a select.
+// @param OtherOp The other N operand (x above).
+// @param DCI Context.
+// @param AllOnes Require the select constant to be all ones instead of null.
+// @returns The new node, or SDValue() on failure.
+static
+SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp,
+ TargetLowering::DAGCombinerInfo &DCI,
+ bool AllOnes = false) {
+ SelectionDAG &DAG = DCI.DAG;
+ EVT VT = N->getValueType(0);
+ SDValue NonConstantVal;
+ SDValue CCOp;
+ bool SwapSelectOps;
+ if (!isConditionalZeroOrAllOnes(Slct.getNode(), AllOnes, CCOp, SwapSelectOps,
+ NonConstantVal, DAG))
+ return SDValue();
+
+ // Slct is now know to be the desired identity constant when CC is true.
+ SDValue TrueVal = OtherOp;
+ SDValue FalseVal = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
+ OtherOp, NonConstantVal);
+ // Unless SwapSelectOps says CC should be false.
+ if (SwapSelectOps)
+ std::swap(TrueVal, FalseVal);
+
+ return DAG.getNode(ISD::SELECT, SDLoc(N), VT,
+ CCOp, TrueVal, FalseVal);
+}
+
+// Attempt combineSelectAndUse on each operand of a commutative operator N.
+static
+SDValue combineSelectAndUseCommutative(SDNode *N, bool AllOnes,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ if (N0.getNode()->hasOneUse()) {
+ SDValue Result = combineSelectAndUse(N, N0, N1, DCI, AllOnes);
+ if (Result.getNode())
+ return Result;
+ }
+ if (N1.getNode()->hasOneUse()) {
+ SDValue Result = combineSelectAndUse(N, N1, N0, DCI, AllOnes);
+ if (Result.getNode())
+ return Result;
+ }
+ return SDValue();
+}
+
+// AddCombineToVPADDL- For pair-wise add on neon, use the vpaddl instruction
+// (only after legalization).
+static SDValue AddCombineToVPADDL(SDNode *N, SDValue N0, SDValue N1,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const ARMSubtarget *Subtarget) {
+
+ // Only perform optimization if after legalize, and if NEON is available. We
+ // also expected both operands to be BUILD_VECTORs.
+ if (DCI.isBeforeLegalize() || !Subtarget->hasNEON()
+ || N0.getOpcode() != ISD::BUILD_VECTOR
+ || N1.getOpcode() != ISD::BUILD_VECTOR)
+ return SDValue();
+
+ // Check output type since VPADDL operand elements can only be 8, 16, or 32.
+ EVT VT = N->getValueType(0);
+ if (!VT.isInteger() || VT.getVectorElementType() == MVT::i64)
+ return SDValue();
+
+ // Check that the vector operands are of the right form.
+ // N0 and N1 are BUILD_VECTOR nodes with N number of EXTRACT_VECTOR
+ // operands, where N is the size of the formed vector.
+ // Each EXTRACT_VECTOR should have the same input vector and odd or even
+ // index such that we have a pair wise add pattern.
+
+ // Grab the vector that all EXTRACT_VECTOR nodes should be referencing.
+ if (N0->getOperand(0)->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
+ return SDValue();
+ SDValue Vec = N0->getOperand(0)->getOperand(0);
+ SDNode *V = Vec.getNode();
+ unsigned nextIndex = 0;
+
+ // For each operands to the ADD which are BUILD_VECTORs,
+ // check to see if each of their operands are an EXTRACT_VECTOR with
+ // the same vector and appropriate index.
+ for (unsigned i = 0, e = N0->getNumOperands(); i != e; ++i) {
+ if (N0->getOperand(i)->getOpcode() == ISD::EXTRACT_VECTOR_ELT
+ && N1->getOperand(i)->getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
+
+ SDValue ExtVec0 = N0->getOperand(i);
+ SDValue ExtVec1 = N1->getOperand(i);
+
+ // First operand is the vector, verify its the same.
+ if (V != ExtVec0->getOperand(0).getNode() ||
+ V != ExtVec1->getOperand(0).getNode())
+ return SDValue();
+
+ // Second is the constant, verify its correct.
+ ConstantSDNode *C0 = dyn_cast<ConstantSDNode>(ExtVec0->getOperand(1));
+ ConstantSDNode *C1 = dyn_cast<ConstantSDNode>(ExtVec1->getOperand(1));
+
+ // For the constant, we want to see all the even or all the odd.
+ if (!C0 || !C1 || C0->getZExtValue() != nextIndex
+ || C1->getZExtValue() != nextIndex+1)
+ return SDValue();
+
+ // Increment index.
+ nextIndex+=2;
+ } else
+ return SDValue();
+ }
+
+ // Create VPADDL node.
+ SelectionDAG &DAG = DCI.DAG;
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+
+ SDLoc dl(N);
+
+ // Build operand list.
+ SmallVector<SDValue, 8> Ops;
+ Ops.push_back(DAG.getConstant(Intrinsic::arm_neon_vpaddls, dl,
+ TLI.getPointerTy(DAG.getDataLayout())));
+
+ // Input is the vector.
+ Ops.push_back(Vec);
+
+ // Get widened type and narrowed type.
+ MVT widenType;
+ unsigned numElem = VT.getVectorNumElements();
+
+ EVT inputLaneType = Vec.getValueType().getVectorElementType();
+ switch (inputLaneType.getSimpleVT().SimpleTy) {
+ case MVT::i8: widenType = MVT::getVectorVT(MVT::i16, numElem); break;
+ case MVT::i16: widenType = MVT::getVectorVT(MVT::i32, numElem); break;
+ case MVT::i32: widenType = MVT::getVectorVT(MVT::i64, numElem); break;
+ default:
+ llvm_unreachable("Invalid vector element type for padd optimization.");
+ }
+
+ SDValue tmp = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, widenType, Ops);
+ unsigned ExtOp = VT.bitsGT(tmp.getValueType()) ? ISD::ANY_EXTEND : ISD::TRUNCATE;
+ return DAG.getNode(ExtOp, dl, VT, tmp);
+}
+
+static SDValue findMUL_LOHI(SDValue V) {
+ if (V->getOpcode() == ISD::UMUL_LOHI ||
+ V->getOpcode() == ISD::SMUL_LOHI)
+ return V;
+ return SDValue();
+}
+
+static SDValue AddCombineTo64bitMLAL(SDNode *AddcNode,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const ARMSubtarget *Subtarget) {
+
+ if (Subtarget->isThumb1Only()) return SDValue();
+
+ // Only perform the checks after legalize when the pattern is available.
+ if (DCI.isBeforeLegalize()) return SDValue();
+
+ // Look for multiply add opportunities.
+ // The pattern is a ISD::UMUL_LOHI followed by two add nodes, where
+ // each add nodes consumes a value from ISD::UMUL_LOHI and there is
+ // a glue link from the first add to the second add.
+ // If we find this pattern, we can replace the U/SMUL_LOHI, ADDC, and ADDE by
+ // a S/UMLAL instruction.
+ // UMUL_LOHI
+ // / :lo \ :hi
+ // / \ [no multiline comment]
+ // loAdd -> ADDE |
+ // \ :glue /
+ // \ /
+ // ADDC <- hiAdd
+ //
+ assert(AddcNode->getOpcode() == ISD::ADDC && "Expect an ADDC");
+ SDValue AddcOp0 = AddcNode->getOperand(0);
+ SDValue AddcOp1 = AddcNode->getOperand(1);
+
+ // Check if the two operands are from the same mul_lohi node.
+ if (AddcOp0.getNode() == AddcOp1.getNode())
+ return SDValue();
+
+ assert(AddcNode->getNumValues() == 2 &&
+ AddcNode->getValueType(0) == MVT::i32 &&
+ "Expect ADDC with two result values. First: i32");
+
+ // Check that we have a glued ADDC node.
+ if (AddcNode->getValueType(1) != MVT::Glue)
+ return SDValue();
+
+ // Check that the ADDC adds the low result of the S/UMUL_LOHI.
+ if (AddcOp0->getOpcode() != ISD::UMUL_LOHI &&
+ AddcOp0->getOpcode() != ISD::SMUL_LOHI &&
+ AddcOp1->getOpcode() != ISD::UMUL_LOHI &&
+ AddcOp1->getOpcode() != ISD::SMUL_LOHI)
+ return SDValue();
+
+ // Look for the glued ADDE.
+ SDNode* AddeNode = AddcNode->getGluedUser();
+ if (!AddeNode)
+ return SDValue();
+
+ // Make sure it is really an ADDE.
+ if (AddeNode->getOpcode() != ISD::ADDE)
+ return SDValue();
+
+ assert(AddeNode->getNumOperands() == 3 &&
+ AddeNode->getOperand(2).getValueType() == MVT::Glue &&
+ "ADDE node has the wrong inputs");
+
+ // Check for the triangle shape.
+ SDValue AddeOp0 = AddeNode->getOperand(0);
+ SDValue AddeOp1 = AddeNode->getOperand(1);
+
+ // Make sure that the ADDE operands are not coming from the same node.
+ if (AddeOp0.getNode() == AddeOp1.getNode())
+ return SDValue();
+
+ // Find the MUL_LOHI node walking up ADDE's operands.
+ bool IsLeftOperandMUL = false;
+ SDValue MULOp = findMUL_LOHI(AddeOp0);
+ if (MULOp == SDValue())
+ MULOp = findMUL_LOHI(AddeOp1);
+ else
+ IsLeftOperandMUL = true;
+ if (MULOp == SDValue())
+ return SDValue();
+
+ // Figure out the right opcode.
+ unsigned Opc = MULOp->getOpcode();
+ unsigned FinalOpc = (Opc == ISD::SMUL_LOHI) ? ARMISD::SMLAL : ARMISD::UMLAL;
+
+ // Figure out the high and low input values to the MLAL node.
+ SDValue* HiAdd = nullptr;
+ SDValue* LoMul = nullptr;
+ SDValue* LowAdd = nullptr;
+
+ // Ensure that ADDE is from high result of ISD::SMUL_LOHI.
+ if ((AddeOp0 != MULOp.getValue(1)) && (AddeOp1 != MULOp.getValue(1)))
+ return SDValue();
+
+ if (IsLeftOperandMUL)
+ HiAdd = &AddeOp1;
+ else
+ HiAdd = &AddeOp0;
+
+
+ // Ensure that LoMul and LowAdd are taken from correct ISD::SMUL_LOHI node
+ // whose low result is fed to the ADDC we are checking.
+
+ if (AddcOp0 == MULOp.getValue(0)) {
+ LoMul = &AddcOp0;
+ LowAdd = &AddcOp1;
+ }
+ if (AddcOp1 == MULOp.getValue(0)) {
+ LoMul = &AddcOp1;
+ LowAdd = &AddcOp0;
+ }
+
+ if (!LoMul)
+ return SDValue();
+
+ // Create the merged node.
+ SelectionDAG &DAG = DCI.DAG;
+
+ // Build operand list.
+ SmallVector<SDValue, 8> Ops;
+ Ops.push_back(LoMul->getOperand(0));
+ Ops.push_back(LoMul->getOperand(1));
+ Ops.push_back(*LowAdd);
+ Ops.push_back(*HiAdd);
+
+ SDValue MLALNode = DAG.getNode(FinalOpc, SDLoc(AddcNode),
+ DAG.getVTList(MVT::i32, MVT::i32), Ops);
+
+ // Replace the ADDs' nodes uses by the MLA node's values.
+ SDValue HiMLALResult(MLALNode.getNode(), 1);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(AddeNode, 0), HiMLALResult);
+
+ SDValue LoMLALResult(MLALNode.getNode(), 0);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(AddcNode, 0), LoMLALResult);
+
+ // Return original node to notify the driver to stop replacing.
+ SDValue resNode(AddcNode, 0);
+ return resNode;
+}
+
+/// PerformADDCCombine - Target-specific dag combine transform from
+/// ISD::ADDC, ISD::ADDE, and ISD::MUL_LOHI to MLAL.
+static SDValue PerformADDCCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const ARMSubtarget *Subtarget) {
+
+ return AddCombineTo64bitMLAL(N, DCI, Subtarget);
+
+}
+
+/// PerformADDCombineWithOperands - Try DAG combinations for an ADD with
+/// operands N0 and N1. This is a helper for PerformADDCombine that is
+/// called with the default operands, and if that fails, with commuted
+/// operands.
+static SDValue PerformADDCombineWithOperands(SDNode *N, SDValue N0, SDValue N1,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const ARMSubtarget *Subtarget){
+
+ // Attempt to create vpaddl for this add.
+ SDValue Result = AddCombineToVPADDL(N, N0, N1, DCI, Subtarget);
+ if (Result.getNode())
+ return Result;
+
+ // fold (add (select cc, 0, c), x) -> (select cc, x, (add, x, c))
+ if (N0.getNode()->hasOneUse()) {
+ SDValue Result = combineSelectAndUse(N, N0, N1, DCI);
+ if (Result.getNode()) return Result;
+ }
+ return SDValue();
+}
+
+/// PerformADDCombine - Target-specific dag combine xforms for ISD::ADD.
+///
+static SDValue PerformADDCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const ARMSubtarget *Subtarget) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+
+ // First try with the default operand order.
+ SDValue Result = PerformADDCombineWithOperands(N, N0, N1, DCI, Subtarget);
+ if (Result.getNode())
+ return Result;
+
+ // If that didn't work, try again with the operands commuted.
+ return PerformADDCombineWithOperands(N, N1, N0, DCI, Subtarget);
+}
+
+/// PerformSUBCombine - Target-specific dag combine xforms for ISD::SUB.
+///
+static SDValue PerformSUBCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+
+ // fold (sub x, (select cc, 0, c)) -> (select cc, x, (sub, x, c))
+ if (N1.getNode()->hasOneUse()) {
+ SDValue Result = combineSelectAndUse(N, N1, N0, DCI);
+ if (Result.getNode()) return Result;
+ }
+
+ return SDValue();
+}
+
+/// PerformVMULCombine
+/// Distribute (A + B) * C to (A * C) + (B * C) to take advantage of the
+/// special multiplier accumulator forwarding.
+/// vmul d3, d0, d2
+/// vmla d3, d1, d2
+/// is faster than
+/// vadd d3, d0, d1
+/// vmul d3, d3, d2
+// However, for (A + B) * (A + B),
+// vadd d2, d0, d1
+// vmul d3, d0, d2
+// vmla d3, d1, d2
+// is slower than
+// vadd d2, d0, d1
+// vmul d3, d2, d2
+static SDValue PerformVMULCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const ARMSubtarget *Subtarget) {
+ if (!Subtarget->hasVMLxForwarding())
+ return SDValue();
+
+ SelectionDAG &DAG = DCI.DAG;
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ unsigned Opcode = N0.getOpcode();
+ if (Opcode != ISD::ADD && Opcode != ISD::SUB &&
+ Opcode != ISD::FADD && Opcode != ISD::FSUB) {
+ Opcode = N1.getOpcode();
+ if (Opcode != ISD::ADD && Opcode != ISD::SUB &&
+ Opcode != ISD::FADD && Opcode != ISD::FSUB)
+ return SDValue();
+ std::swap(N0, N1);
+ }
+
+ if (N0 == N1)
+ return SDValue();
+
+ EVT VT = N->getValueType(0);
+ SDLoc DL(N);
+ SDValue N00 = N0->getOperand(0);
+ SDValue N01 = N0->getOperand(1);
+ return DAG.getNode(Opcode, DL, VT,
+ DAG.getNode(ISD::MUL, DL, VT, N00, N1),
+ DAG.getNode(ISD::MUL, DL, VT, N01, N1));
+}
+
+static SDValue PerformMULCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const ARMSubtarget *Subtarget) {
+ SelectionDAG &DAG = DCI.DAG;
+
+ if (Subtarget->isThumb1Only())
+ return SDValue();
+
+ if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
+ return SDValue();
+
+ EVT VT = N->getValueType(0);
+ if (VT.is64BitVector() || VT.is128BitVector())
+ return PerformVMULCombine(N, DCI, Subtarget);
+ if (VT != MVT::i32)
+ return SDValue();
+
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1));
+ if (!C)
+ return SDValue();
+
+ int64_t MulAmt = C->getSExtValue();
+ unsigned ShiftAmt = countTrailingZeros<uint64_t>(MulAmt);
+
+ ShiftAmt = ShiftAmt & (32 - 1);
+ SDValue V = N->getOperand(0);
+ SDLoc DL(N);
+
+ SDValue Res;
+ MulAmt >>= ShiftAmt;
+
+ if (MulAmt >= 0) {
+ if (isPowerOf2_32(MulAmt - 1)) {
+ // (mul x, 2^N + 1) => (add (shl x, N), x)
+ Res = DAG.getNode(ISD::ADD, DL, VT,
+ V,
+ DAG.getNode(ISD::SHL, DL, VT,
+ V,
+ DAG.getConstant(Log2_32(MulAmt - 1), DL,
+ MVT::i32)));
+ } else if (isPowerOf2_32(MulAmt + 1)) {
+ // (mul x, 2^N - 1) => (sub (shl x, N), x)
+ Res = DAG.getNode(ISD::SUB, DL, VT,
+ DAG.getNode(ISD::SHL, DL, VT,
+ V,
+ DAG.getConstant(Log2_32(MulAmt + 1), DL,
+ MVT::i32)),
+ V);
+ } else
+ return SDValue();
+ } else {
+ uint64_t MulAmtAbs = -MulAmt;
+ if (isPowerOf2_32(MulAmtAbs + 1)) {
+ // (mul x, -(2^N - 1)) => (sub x, (shl x, N))
+ Res = DAG.getNode(ISD::SUB, DL, VT,
+ V,
+ DAG.getNode(ISD::SHL, DL, VT,
+ V,
+ DAG.getConstant(Log2_32(MulAmtAbs + 1), DL,
+ MVT::i32)));
+ } else if (isPowerOf2_32(MulAmtAbs - 1)) {
+ // (mul x, -(2^N + 1)) => - (add (shl x, N), x)
+ Res = DAG.getNode(ISD::ADD, DL, VT,
+ V,
+ DAG.getNode(ISD::SHL, DL, VT,
+ V,
+ DAG.getConstant(Log2_32(MulAmtAbs - 1), DL,
+ MVT::i32)));
+ Res = DAG.getNode(ISD::SUB, DL, VT,
+ DAG.getConstant(0, DL, MVT::i32), Res);
+
+ } else
+ return SDValue();
+ }
+
+ if (ShiftAmt != 0)
+ Res = DAG.getNode(ISD::SHL, DL, VT,
+ Res, DAG.getConstant(ShiftAmt, DL, MVT::i32));
+
+ // Do not add new nodes to DAG combiner worklist.
+ DCI.CombineTo(N, Res, false);
+ return SDValue();
+}
+
+static SDValue PerformANDCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const ARMSubtarget *Subtarget) {
+
+ // Attempt to use immediate-form VBIC
+ BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N->getOperand(1));
+ SDLoc dl(N);
+ EVT VT = N->getValueType(0);
+ SelectionDAG &DAG = DCI.DAG;
+
+ if(!DAG.getTargetLoweringInfo().isTypeLegal(VT))
+ return SDValue();
+
+ APInt SplatBits, SplatUndef;
+ unsigned SplatBitSize;
+ bool HasAnyUndefs;
+ if (BVN &&
+ BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
+ if (SplatBitSize <= 64) {
+ EVT VbicVT;
+ SDValue Val = isNEONModifiedImm((~SplatBits).getZExtValue(),
+ SplatUndef.getZExtValue(), SplatBitSize,
+ DAG, dl, VbicVT, VT.is128BitVector(),
+ OtherModImm);
+ if (Val.getNode()) {
+ SDValue Input =
+ DAG.getNode(ISD::BITCAST, dl, VbicVT, N->getOperand(0));
+ SDValue Vbic = DAG.getNode(ARMISD::VBICIMM, dl, VbicVT, Input, Val);
+ return DAG.getNode(ISD::BITCAST, dl, VT, Vbic);
+ }
+ }
+ }
+
+ if (!Subtarget->isThumb1Only()) {
+ // fold (and (select cc, -1, c), x) -> (select cc, x, (and, x, c))
+ SDValue Result = combineSelectAndUseCommutative(N, true, DCI);
+ if (Result.getNode())
+ return Result;
+ }
+
+ return SDValue();
+}
+
+/// PerformORCombine - Target-specific dag combine xforms for ISD::OR
+static SDValue PerformORCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const ARMSubtarget *Subtarget) {
+ // Attempt to use immediate-form VORR
+ BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N->getOperand(1));
+ SDLoc dl(N);
+ EVT VT = N->getValueType(0);
+ SelectionDAG &DAG = DCI.DAG;
+
+ if(!DAG.getTargetLoweringInfo().isTypeLegal(VT))
+ return SDValue();
+
+ APInt SplatBits, SplatUndef;
+ unsigned SplatBitSize;
+ bool HasAnyUndefs;
+ if (BVN && Subtarget->hasNEON() &&
+ BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
+ if (SplatBitSize <= 64) {
+ EVT VorrVT;
+ SDValue Val = isNEONModifiedImm(SplatBits.getZExtValue(),
+ SplatUndef.getZExtValue(), SplatBitSize,
+ DAG, dl, VorrVT, VT.is128BitVector(),
+ OtherModImm);
+ if (Val.getNode()) {
+ SDValue Input =
+ DAG.getNode(ISD::BITCAST, dl, VorrVT, N->getOperand(0));
+ SDValue Vorr = DAG.getNode(ARMISD::VORRIMM, dl, VorrVT, Input, Val);
+ return DAG.getNode(ISD::BITCAST, dl, VT, Vorr);
+ }
+ }
+ }
+
+ if (!Subtarget->isThumb1Only()) {
+ // fold (or (select cc, 0, c), x) -> (select cc, x, (or, x, c))
+ SDValue Result = combineSelectAndUseCommutative(N, false, DCI);
+ if (Result.getNode())
+ return Result;
+ }
+
+ // The code below optimizes (or (and X, Y), Z).
+ // The AND operand needs to have a single user to make these optimizations
+ // profitable.
+ SDValue N0 = N->getOperand(0);
+ if (N0.getOpcode() != ISD::AND || !N0.hasOneUse())
+ return SDValue();
+ SDValue N1 = N->getOperand(1);
+
+ // (or (and B, A), (and C, ~A)) => (VBSL A, B, C) when A is a constant.
+ if (Subtarget->hasNEON() && N1.getOpcode() == ISD::AND && VT.isVector() &&
+ DAG.getTargetLoweringInfo().isTypeLegal(VT)) {
+ APInt SplatUndef;
+ unsigned SplatBitSize;
+ bool HasAnyUndefs;
+
+ APInt SplatBits0, SplatBits1;
+ BuildVectorSDNode *BVN0 = dyn_cast<BuildVectorSDNode>(N0->getOperand(1));
+ BuildVectorSDNode *BVN1 = dyn_cast<BuildVectorSDNode>(N1->getOperand(1));
+ // Ensure that the second operand of both ands are constants
+ if (BVN0 && BVN0->isConstantSplat(SplatBits0, SplatUndef, SplatBitSize,
+ HasAnyUndefs) && !HasAnyUndefs) {
+ if (BVN1 && BVN1->isConstantSplat(SplatBits1, SplatUndef, SplatBitSize,
+ HasAnyUndefs) && !HasAnyUndefs) {
+ // Ensure that the bit width of the constants are the same and that
+ // the splat arguments are logical inverses as per the pattern we
+ // are trying to simplify.
+ if (SplatBits0.getBitWidth() == SplatBits1.getBitWidth() &&
+ SplatBits0 == ~SplatBits1) {
+ // Canonicalize the vector type to make instruction selection
+ // simpler.
+ EVT CanonicalVT = VT.is128BitVector() ? MVT::v4i32 : MVT::v2i32;
+ SDValue Result = DAG.getNode(ARMISD::VBSL, dl, CanonicalVT,
+ N0->getOperand(1),
+ N0->getOperand(0),
+ N1->getOperand(0));
+ return DAG.getNode(ISD::BITCAST, dl, VT, Result);
+ }
+ }
+ }
+ }
+
+ // Try to use the ARM/Thumb2 BFI (bitfield insert) instruction when
+ // reasonable.
+
+ // BFI is only available on V6T2+
+ if (Subtarget->isThumb1Only() || !Subtarget->hasV6T2Ops())
+ return SDValue();
+
+ SDLoc DL(N);
+ // 1) or (and A, mask), val => ARMbfi A, val, mask
+ // iff (val & mask) == val
+ //
+ // 2) or (and A, mask), (and B, mask2) => ARMbfi A, (lsr B, amt), mask
+ // 2a) iff isBitFieldInvertedMask(mask) && isBitFieldInvertedMask(~mask2)
+ // && mask == ~mask2
+ // 2b) iff isBitFieldInvertedMask(~mask) && isBitFieldInvertedMask(mask2)
+ // && ~mask == mask2
+ // (i.e., copy a bitfield value into another bitfield of the same width)
+
+ if (VT != MVT::i32)
+ return SDValue();
+
+ SDValue N00 = N0.getOperand(0);
+
+ // The value and the mask need to be constants so we can verify this is
+ // actually a bitfield set. If the mask is 0xffff, we can do better
+ // via a movt instruction, so don't use BFI in that case.
+ SDValue MaskOp = N0.getOperand(1);
+ ConstantSDNode *MaskC = dyn_cast<ConstantSDNode>(MaskOp);
+ if (!MaskC)
+ return SDValue();
+ unsigned Mask = MaskC->getZExtValue();
+ if (Mask == 0xffff)
+ return SDValue();
+ SDValue Res;
+ // Case (1): or (and A, mask), val => ARMbfi A, val, mask
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+ if (N1C) {
+ unsigned Val = N1C->getZExtValue();
+ if ((Val & ~Mask) != Val)
+ return SDValue();
+
+ if (ARM::isBitFieldInvertedMask(Mask)) {
+ Val >>= countTrailingZeros(~Mask);
+
+ Res = DAG.getNode(ARMISD::BFI, DL, VT, N00,
+ DAG.getConstant(Val, DL, MVT::i32),
+ DAG.getConstant(Mask, DL, MVT::i32));
+
+ // Do not add new nodes to DAG combiner worklist.
+ DCI.CombineTo(N, Res, false);
+ return SDValue();
+ }
+ } else if (N1.getOpcode() == ISD::AND) {
+ // case (2) or (and A, mask), (and B, mask2) => ARMbfi A, (lsr B, amt), mask
+ ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
+ if (!N11C)
+ return SDValue();
+ unsigned Mask2 = N11C->getZExtValue();
+
+ // Mask and ~Mask2 (or reverse) must be equivalent for the BFI pattern
+ // as is to match.
+ if (ARM::isBitFieldInvertedMask(Mask) &&
+ (Mask == ~Mask2)) {
+ // The pack halfword instruction works better for masks that fit it,
+ // so use that when it's available.
+ if (Subtarget->hasT2ExtractPack() &&
+ (Mask == 0xffff || Mask == 0xffff0000))
+ return SDValue();
+ // 2a
+ unsigned amt = countTrailingZeros(Mask2);
+ Res = DAG.getNode(ISD::SRL, DL, VT, N1.getOperand(0),
+ DAG.getConstant(amt, DL, MVT::i32));
+ Res = DAG.getNode(ARMISD::BFI, DL, VT, N00, Res,
+ DAG.getConstant(Mask, DL, MVT::i32));
+ // Do not add new nodes to DAG combiner worklist.
+ DCI.CombineTo(N, Res, false);
+ return SDValue();
+ } else if (ARM::isBitFieldInvertedMask(~Mask) &&
+ (~Mask == Mask2)) {
+ // The pack halfword instruction works better for masks that fit it,
+ // so use that when it's available.
+ if (Subtarget->hasT2ExtractPack() &&
+ (Mask2 == 0xffff || Mask2 == 0xffff0000))
+ return SDValue();
+ // 2b
+ unsigned lsb = countTrailingZeros(Mask);
+ Res = DAG.getNode(ISD::SRL, DL, VT, N00,
+ DAG.getConstant(lsb, DL, MVT::i32));
+ Res = DAG.getNode(ARMISD::BFI, DL, VT, N1.getOperand(0), Res,
+ DAG.getConstant(Mask2, DL, MVT::i32));
+ // Do not add new nodes to DAG combiner worklist.
+ DCI.CombineTo(N, Res, false);
+ return SDValue();
+ }
+ }
+
+ if (DAG.MaskedValueIsZero(N1, MaskC->getAPIntValue()) &&
+ N00.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N00.getOperand(1)) &&
+ ARM::isBitFieldInvertedMask(~Mask)) {
+ // Case (3): or (and (shl A, #shamt), mask), B => ARMbfi B, A, ~mask
+ // where lsb(mask) == #shamt and masked bits of B are known zero.
+ SDValue ShAmt = N00.getOperand(1);
+ unsigned ShAmtC = cast<ConstantSDNode>(ShAmt)->getZExtValue();
+ unsigned LSB = countTrailingZeros(Mask);
+ if (ShAmtC != LSB)
+ return SDValue();
+
+ Res = DAG.getNode(ARMISD::BFI, DL, VT, N1, N00.getOperand(0),
+ DAG.getConstant(~Mask, DL, MVT::i32));
+
+ // Do not add new nodes to DAG combiner worklist.
+ DCI.CombineTo(N, Res, false);
+ }
+
+ return SDValue();
+}
+
+static SDValue PerformXORCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const ARMSubtarget *Subtarget) {
+ EVT VT = N->getValueType(0);
+ SelectionDAG &DAG = DCI.DAG;
+
+ if(!DAG.getTargetLoweringInfo().isTypeLegal(VT))
+ return SDValue();
+
+ if (!Subtarget->isThumb1Only()) {
+ // fold (xor (select cc, 0, c), x) -> (select cc, x, (xor, x, c))
+ SDValue Result = combineSelectAndUseCommutative(N, false, DCI);
+ if (Result.getNode())
+ return Result;
+ }
+
+ return SDValue();
+}
+
+// ParseBFI - given a BFI instruction in N, extract the "from" value (Rn) and return it,
+// and fill in FromMask and ToMask with (consecutive) bits in "from" to be extracted and
+// their position in "to" (Rd).
+static SDValue ParseBFI(SDNode *N, APInt &ToMask, APInt &FromMask) {
+ assert(N->getOpcode() == ARMISD::BFI);
+
+ SDValue From = N->getOperand(1);
+ ToMask = ~cast<ConstantSDNode>(N->getOperand(2))->getAPIntValue();
+ FromMask = APInt::getLowBitsSet(ToMask.getBitWidth(), ToMask.countPopulation());
+
+ // If the Base came from a SHR #C, we can deduce that it is really testing bit
+ // #C in the base of the SHR.
+ if (From->getOpcode() == ISD::SRL &&
+ isa<ConstantSDNode>(From->getOperand(1))) {
+ APInt Shift = cast<ConstantSDNode>(From->getOperand(1))->getAPIntValue();
+ assert(Shift.getLimitedValue() < 32 && "Shift too large!");
+ FromMask <<= Shift.getLimitedValue(31);
+ From = From->getOperand(0);
+ }
+
+ return From;
+}
+
+// If A and B contain one contiguous set of bits, does A | B == A . B?
+//
+// Neither A nor B must be zero.
+static bool BitsProperlyConcatenate(const APInt &A, const APInt &B) {
+ unsigned LastActiveBitInA = A.countTrailingZeros();
+ unsigned FirstActiveBitInB = B.getBitWidth() - B.countLeadingZeros() - 1;
+ return LastActiveBitInA - 1 == FirstActiveBitInB;
+}
+
+static SDValue FindBFIToCombineWith(SDNode *N) {
+ // We have a BFI in N. Follow a possible chain of BFIs and find a BFI it can combine with,
+ // if one exists.
+ APInt ToMask, FromMask;
+ SDValue From = ParseBFI(N, ToMask, FromMask);
+ SDValue To = N->getOperand(0);
+
+ // Now check for a compatible BFI to merge with. We can pass through BFIs that
+ // aren't compatible, but not if they set the same bit in their destination as
+ // we do (or that of any BFI we're going to combine with).
+ SDValue V = To;
+ APInt CombinedToMask = ToMask;
+ while (V.getOpcode() == ARMISD::BFI) {
+ APInt NewToMask, NewFromMask;
+ SDValue NewFrom = ParseBFI(V.getNode(), NewToMask, NewFromMask);
+ if (NewFrom != From) {
+ // This BFI has a different base. Keep going.
+ CombinedToMask |= NewToMask;
+ V = V.getOperand(0);
+ continue;
+ }
+
+ // Do the written bits conflict with any we've seen so far?
+ if ((NewToMask & CombinedToMask).getBoolValue())
+ // Conflicting bits - bail out because going further is unsafe.
+ return SDValue();
+
+ // Are the new bits contiguous when combined with the old bits?
+ if (BitsProperlyConcatenate(ToMask, NewToMask) &&
+ BitsProperlyConcatenate(FromMask, NewFromMask))
+ return V;
+ if (BitsProperlyConcatenate(NewToMask, ToMask) &&
+ BitsProperlyConcatenate(NewFromMask, FromMask))
+ return V;
+
+ // We've seen a write to some bits, so track it.
+ CombinedToMask |= NewToMask;
+ // Keep going...
+ V = V.getOperand(0);
+ }
+
+ return SDValue();
+}
+
+static SDValue PerformBFICombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ SDValue N1 = N->getOperand(1);
+ if (N1.getOpcode() == ISD::AND) {
+ // (bfi A, (and B, Mask1), Mask2) -> (bfi A, B, Mask2) iff
+ // the bits being cleared by the AND are not demanded by the BFI.
+ ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
+ if (!N11C)
+ return SDValue();
+ unsigned InvMask = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue();
+ unsigned LSB = countTrailingZeros(~InvMask);
+ unsigned Width = (32 - countLeadingZeros(~InvMask)) - LSB;
+ assert(Width <
+ static_cast<unsigned>(std::numeric_limits<unsigned>::digits) &&
+ "undefined behavior");
+ unsigned Mask = (1u << Width) - 1;
+ unsigned Mask2 = N11C->getZExtValue();
+ if ((Mask & (~Mask2)) == 0)
+ return DCI.DAG.getNode(ARMISD::BFI, SDLoc(N), N->getValueType(0),
+ N->getOperand(0), N1.getOperand(0),
+ N->getOperand(2));
+ } else if (N->getOperand(0).getOpcode() == ARMISD::BFI) {
+ // We have a BFI of a BFI. Walk up the BFI chain to see how long it goes.
+ // Keep track of any consecutive bits set that all come from the same base
+ // value. We can combine these together into a single BFI.
+ SDValue CombineBFI = FindBFIToCombineWith(N);
+ if (CombineBFI == SDValue())
+ return SDValue();
+
+ // We've found a BFI.
+ APInt ToMask1, FromMask1;
+ SDValue From1 = ParseBFI(N, ToMask1, FromMask1);
+
+ APInt ToMask2, FromMask2;
+ SDValue From2 = ParseBFI(CombineBFI.getNode(), ToMask2, FromMask2);
+ assert(From1 == From2);
+ (void)From2;
+
+ // First, unlink CombineBFI.
+ DCI.DAG.ReplaceAllUsesWith(CombineBFI, CombineBFI.getOperand(0));
+ // Then create a new BFI, combining the two together.
+ APInt NewFromMask = FromMask1 | FromMask2;
+ APInt NewToMask = ToMask1 | ToMask2;
+
+ EVT VT = N->getValueType(0);
+ SDLoc dl(N);
+
+ if (NewFromMask[0] == 0)
+ From1 = DCI.DAG.getNode(
+ ISD::SRL, dl, VT, From1,
+ DCI.DAG.getConstant(NewFromMask.countTrailingZeros(), dl, VT));
+ return DCI.DAG.getNode(ARMISD::BFI, dl, VT, N->getOperand(0), From1,
+ DCI.DAG.getConstant(~NewToMask, dl, VT));
+ }
+ return SDValue();
+}
+
+/// PerformVMOVRRDCombine - Target-specific dag combine xforms for
+/// ARMISD::VMOVRRD.
+static SDValue PerformVMOVRRDCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const ARMSubtarget *Subtarget) {
+ // vmovrrd(vmovdrr x, y) -> x,y
+ SDValue InDouble = N->getOperand(0);
+ if (InDouble.getOpcode() == ARMISD::VMOVDRR && !Subtarget->isFPOnlySP())
+ return DCI.CombineTo(N, InDouble.getOperand(0), InDouble.getOperand(1));
+
+ // vmovrrd(load f64) -> (load i32), (load i32)
+ SDNode *InNode = InDouble.getNode();
+ if (ISD::isNormalLoad(InNode) && InNode->hasOneUse() &&
+ InNode->getValueType(0) == MVT::f64 &&
+ InNode->getOperand(1).getOpcode() == ISD::FrameIndex &&
+ !cast<LoadSDNode>(InNode)->isVolatile()) {
+ // TODO: Should this be done for non-FrameIndex operands?
+ LoadSDNode *LD = cast<LoadSDNode>(InNode);
+
+ SelectionDAG &DAG = DCI.DAG;
+ SDLoc DL(LD);
+ SDValue BasePtr = LD->getBasePtr();
+ SDValue NewLD1 = DAG.getLoad(MVT::i32, DL, LD->getChain(), BasePtr,
+ LD->getPointerInfo(), LD->isVolatile(),
+ LD->isNonTemporal(), LD->isInvariant(),
+ LD->getAlignment());
+
+ SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr,
+ DAG.getConstant(4, DL, MVT::i32));
+ SDValue NewLD2 = DAG.getLoad(MVT::i32, DL, NewLD1.getValue(1), OffsetPtr,
+ LD->getPointerInfo(), LD->isVolatile(),
+ LD->isNonTemporal(), LD->isInvariant(),
+ std::min(4U, LD->getAlignment() / 2));
+
+ DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), NewLD2.getValue(1));
+ if (DCI.DAG.getDataLayout().isBigEndian())
+ std::swap (NewLD1, NewLD2);
+ SDValue Result = DCI.CombineTo(N, NewLD1, NewLD2);
+ return Result;
+ }
+
+ return SDValue();
+}
+
+/// PerformVMOVDRRCombine - Target-specific dag combine xforms for
+/// ARMISD::VMOVDRR. This is also used for BUILD_VECTORs with 2 operands.
+static SDValue PerformVMOVDRRCombine(SDNode *N, SelectionDAG &DAG) {
+ // N=vmovrrd(X); vmovdrr(N:0, N:1) -> bit_convert(X)
+ SDValue Op0 = N->getOperand(0);
+ SDValue Op1 = N->getOperand(1);
+ if (Op0.getOpcode() == ISD::BITCAST)
+ Op0 = Op0.getOperand(0);
+ if (Op1.getOpcode() == ISD::BITCAST)
+ Op1 = Op1.getOperand(0);
+ if (Op0.getOpcode() == ARMISD::VMOVRRD &&
+ Op0.getNode() == Op1.getNode() &&
+ Op0.getResNo() == 0 && Op1.getResNo() == 1)
+ return DAG.getNode(ISD::BITCAST, SDLoc(N),
+ N->getValueType(0), Op0.getOperand(0));
+ return SDValue();
+}
+
+/// hasNormalLoadOperand - Check if any of the operands of a BUILD_VECTOR node
+/// are normal, non-volatile loads. If so, it is profitable to bitcast an
+/// i64 vector to have f64 elements, since the value can then be loaded
+/// directly into a VFP register.
+static bool hasNormalLoadOperand(SDNode *N) {
+ unsigned NumElts = N->getValueType(0).getVectorNumElements();
+ for (unsigned i = 0; i < NumElts; ++i) {
+ SDNode *Elt = N->getOperand(i).getNode();
+ if (ISD::isNormalLoad(Elt) && !cast<LoadSDNode>(Elt)->isVolatile())
+ return true;
+ }
+ return false;
+}
+
+/// PerformBUILD_VECTORCombine - Target-specific dag combine xforms for
+/// ISD::BUILD_VECTOR.
+static SDValue PerformBUILD_VECTORCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const ARMSubtarget *Subtarget) {
+ // build_vector(N=ARMISD::VMOVRRD(X), N:1) -> bit_convert(X):
+ // VMOVRRD is introduced when legalizing i64 types. It forces the i64 value
+ // into a pair of GPRs, which is fine when the value is used as a scalar,
+ // but if the i64 value is converted to a vector, we need to undo the VMOVRRD.
+ SelectionDAG &DAG = DCI.DAG;
+ if (N->getNumOperands() == 2) {
+ SDValue RV = PerformVMOVDRRCombine(N, DAG);
+ if (RV.getNode())
+ return RV;
+ }
+
+ // Load i64 elements as f64 values so that type legalization does not split
+ // them up into i32 values.
+ EVT VT = N->getValueType(0);
+ if (VT.getVectorElementType() != MVT::i64 || !hasNormalLoadOperand(N))
+ return SDValue();
+ SDLoc dl(N);
+ SmallVector<SDValue, 8> Ops;
+ unsigned NumElts = VT.getVectorNumElements();
+ for (unsigned i = 0; i < NumElts; ++i) {
+ SDValue V = DAG.getNode(ISD::BITCAST, dl, MVT::f64, N->getOperand(i));
+ Ops.push_back(V);
+ // Make the DAGCombiner fold the bitcast.
+ DCI.AddToWorklist(V.getNode());
+ }
+ EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64, NumElts);
+ SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, FloatVT, Ops);
+ return DAG.getNode(ISD::BITCAST, dl, VT, BV);
+}
+
+/// \brief Target-specific dag combine xforms for ARMISD::BUILD_VECTOR.
+static SDValue
+PerformARMBUILD_VECTORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
+ // ARMISD::BUILD_VECTOR is introduced when legalizing ISD::BUILD_VECTOR.
+ // At that time, we may have inserted bitcasts from integer to float.
+ // If these bitcasts have survived DAGCombine, change the lowering of this
+ // BUILD_VECTOR in something more vector friendly, i.e., that does not
+ // force to use floating point types.
+
+ // Make sure we can change the type of the vector.
+ // This is possible iff:
+ // 1. The vector is only used in a bitcast to a integer type. I.e.,
+ // 1.1. Vector is used only once.
+ // 1.2. Use is a bit convert to an integer type.
+ // 2. The size of its operands are 32-bits (64-bits are not legal).
+ EVT VT = N->getValueType(0);
+ EVT EltVT = VT.getVectorElementType();
+
+ // Check 1.1. and 2.
+ if (EltVT.getSizeInBits() != 32 || !N->hasOneUse())
+ return SDValue();
+
+ // By construction, the input type must be float.
+ assert(EltVT == MVT::f32 && "Unexpected type!");
+
+ // Check 1.2.
+ SDNode *Use = *N->use_begin();
+ if (Use->getOpcode() != ISD::BITCAST ||
+ Use->getValueType(0).isFloatingPoint())
+ return SDValue();
+
+ // Check profitability.
+ // Model is, if more than half of the relevant operands are bitcast from
+ // i32, turn the build_vector into a sequence of insert_vector_elt.
+ // Relevant operands are everything that is not statically
+ // (i.e., at compile time) bitcasted.
+ unsigned NumOfBitCastedElts = 0;
+ unsigned NumElts = VT.getVectorNumElements();
+ unsigned NumOfRelevantElts = NumElts;
+ for (unsigned Idx = 0; Idx < NumElts; ++Idx) {
+ SDValue Elt = N->getOperand(Idx);
+ if (Elt->getOpcode() == ISD::BITCAST) {
+ // Assume only bit cast to i32 will go away.
+ if (Elt->getOperand(0).getValueType() == MVT::i32)
+ ++NumOfBitCastedElts;
+ } else if (Elt.getOpcode() == ISD::UNDEF || isa<ConstantSDNode>(Elt))
+ // Constants are statically casted, thus do not count them as
+ // relevant operands.
+ --NumOfRelevantElts;
+ }
+
+ // Check if more than half of the elements require a non-free bitcast.
+ if (NumOfBitCastedElts <= NumOfRelevantElts / 2)
+ return SDValue();
+
+ SelectionDAG &DAG = DCI.DAG;
+ // Create the new vector type.
+ EVT VecVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, NumElts);
+ // Check if the type is legal.
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ if (!TLI.isTypeLegal(VecVT))
+ return SDValue();
+
+ // Combine:
+ // ARMISD::BUILD_VECTOR E1, E2, ..., EN.
+ // => BITCAST INSERT_VECTOR_ELT
+ // (INSERT_VECTOR_ELT (...), (BITCAST EN-1), N-1),
+ // (BITCAST EN), N.
+ SDValue Vec = DAG.getUNDEF(VecVT);
+ SDLoc dl(N);
+ for (unsigned Idx = 0 ; Idx < NumElts; ++Idx) {
+ SDValue V = N->getOperand(Idx);
+ if (V.getOpcode() == ISD::UNDEF)
+ continue;
+ if (V.getOpcode() == ISD::BITCAST &&
+ V->getOperand(0).getValueType() == MVT::i32)
+ // Fold obvious case.
+ V = V.getOperand(0);
+ else {
+ V = DAG.getNode(ISD::BITCAST, SDLoc(V), MVT::i32, V);
+ // Make the DAGCombiner fold the bitcasts.
+ DCI.AddToWorklist(V.getNode());
+ }
+ SDValue LaneIdx = DAG.getConstant(Idx, dl, MVT::i32);
+ Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VecVT, Vec, V, LaneIdx);
+ }
+ Vec = DAG.getNode(ISD::BITCAST, dl, VT, Vec);
+ // Make the DAGCombiner fold the bitcasts.
+ DCI.AddToWorklist(Vec.getNode());
+ return Vec;
+}
+
+/// PerformInsertEltCombine - Target-specific dag combine xforms for
+/// ISD::INSERT_VECTOR_ELT.
+static SDValue PerformInsertEltCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ // Bitcast an i64 load inserted into a vector to f64.
+ // Otherwise, the i64 value will be legalized to a pair of i32 values.
+ EVT VT = N->getValueType(0);
+ SDNode *Elt = N->getOperand(1).getNode();
+ if (VT.getVectorElementType() != MVT::i64 ||
+ !ISD::isNormalLoad(Elt) || cast<LoadSDNode>(Elt)->isVolatile())
+ return SDValue();
+
+ SelectionDAG &DAG = DCI.DAG;
+ SDLoc dl(N);
+ EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64,
+ VT.getVectorNumElements());
+ SDValue Vec = DAG.getNode(ISD::BITCAST, dl, FloatVT, N->getOperand(0));
+ SDValue V = DAG.getNode(ISD::BITCAST, dl, MVT::f64, N->getOperand(1));
+ // Make the DAGCombiner fold the bitcasts.
+ DCI.AddToWorklist(Vec.getNode());
+ DCI.AddToWorklist(V.getNode());
+ SDValue InsElt = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, FloatVT,
+ Vec, V, N->getOperand(2));
+ return DAG.getNode(ISD::BITCAST, dl, VT, InsElt);
+}
+
+/// PerformVECTOR_SHUFFLECombine - Target-specific dag combine xforms for
+/// ISD::VECTOR_SHUFFLE.
+static SDValue PerformVECTOR_SHUFFLECombine(SDNode *N, SelectionDAG &DAG) {
+ // The LLVM shufflevector instruction does not require the shuffle mask
+ // length to match the operand vector length, but ISD::VECTOR_SHUFFLE does
+ // have that requirement. When translating to ISD::VECTOR_SHUFFLE, if the
+ // operands do not match the mask length, they are extended by concatenating
+ // them with undef vectors. That is probably the right thing for other
+ // targets, but for NEON it is better to concatenate two double-register
+ // size vector operands into a single quad-register size vector. Do that
+ // transformation here:
+ // shuffle(concat(v1, undef), concat(v2, undef)) ->
+ // shuffle(concat(v1, v2), undef)
+ SDValue Op0 = N->getOperand(0);
+ SDValue Op1 = N->getOperand(1);
+ if (Op0.getOpcode() != ISD::CONCAT_VECTORS ||
+ Op1.getOpcode() != ISD::CONCAT_VECTORS ||
+ Op0.getNumOperands() != 2 ||
+ Op1.getNumOperands() != 2)
+ return SDValue();
+ SDValue Concat0Op1 = Op0.getOperand(1);
+ SDValue Concat1Op1 = Op1.getOperand(1);
+ if (Concat0Op1.getOpcode() != ISD::UNDEF ||
+ Concat1Op1.getOpcode() != ISD::UNDEF)
+ return SDValue();
+ // Skip the transformation if any of the types are illegal.
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ EVT VT = N->getValueType(0);
+ if (!TLI.isTypeLegal(VT) ||
+ !TLI.isTypeLegal(Concat0Op1.getValueType()) ||
+ !TLI.isTypeLegal(Concat1Op1.getValueType()))
+ return SDValue();
+
+ SDValue NewConcat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT,
+ Op0.getOperand(0), Op1.getOperand(0));
+ // Translate the shuffle mask.
+ SmallVector<int, 16> NewMask;
+ unsigned NumElts = VT.getVectorNumElements();
+ unsigned HalfElts = NumElts/2;
+ ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
+ for (unsigned n = 0; n < NumElts; ++n) {
+ int MaskElt = SVN->getMaskElt(n);
+ int NewElt = -1;
+ if (MaskElt < (int)HalfElts)
+ NewElt = MaskElt;
+ else if (MaskElt >= (int)NumElts && MaskElt < (int)(NumElts + HalfElts))
+ NewElt = HalfElts + MaskElt - NumElts;
+ NewMask.push_back(NewElt);
+ }
+ return DAG.getVectorShuffle(VT, SDLoc(N), NewConcat,
+ DAG.getUNDEF(VT), NewMask.data());
+}
+
+/// CombineBaseUpdate - Target-specific DAG combine function for VLDDUP,
+/// NEON load/store intrinsics, and generic vector load/stores, to merge
+/// base address updates.
+/// For generic load/stores, the memory type is assumed to be a vector.
+/// The caller is assumed to have checked legality.
+static SDValue CombineBaseUpdate(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ SelectionDAG &DAG = DCI.DAG;
+ const bool isIntrinsic = (N->getOpcode() == ISD::INTRINSIC_VOID ||
+ N->getOpcode() == ISD::INTRINSIC_W_CHAIN);
+ const bool isStore = N->getOpcode() == ISD::STORE;
+ const unsigned AddrOpIdx = ((isIntrinsic || isStore) ? 2 : 1);
+ SDValue Addr = N->getOperand(AddrOpIdx);
+ MemSDNode *MemN = cast<MemSDNode>(N);
+ SDLoc dl(N);
+
+ // Search for a use of the address operand that is an increment.
+ for (SDNode::use_iterator UI = Addr.getNode()->use_begin(),
+ UE = Addr.getNode()->use_end(); UI != UE; ++UI) {
+ SDNode *User = *UI;
+ if (User->getOpcode() != ISD::ADD ||
+ UI.getUse().getResNo() != Addr.getResNo())
+ continue;
+
+ // Check that the add is independent of the load/store. Otherwise, folding
+ // it would create a cycle.
+ if (User->isPredecessorOf(N) || N->isPredecessorOf(User))
+ continue;
+
+ // Find the new opcode for the updating load/store.
+ bool isLoadOp = true;
+ bool isLaneOp = false;
+ unsigned NewOpc = 0;
+ unsigned NumVecs = 0;
+ if (isIntrinsic) {
+ unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
+ switch (IntNo) {
+ default: llvm_unreachable("unexpected intrinsic for Neon base update");
+ case Intrinsic::arm_neon_vld1: NewOpc = ARMISD::VLD1_UPD;
+ NumVecs = 1; break;
+ case Intrinsic::arm_neon_vld2: NewOpc = ARMISD::VLD2_UPD;
+ NumVecs = 2; break;
+ case Intrinsic::arm_neon_vld3: NewOpc = ARMISD::VLD3_UPD;
+ NumVecs = 3; break;
+ case Intrinsic::arm_neon_vld4: NewOpc = ARMISD::VLD4_UPD;
+ NumVecs = 4; break;
+ case Intrinsic::arm_neon_vld2lane: NewOpc = ARMISD::VLD2LN_UPD;
+ NumVecs = 2; isLaneOp = true; break;
+ case Intrinsic::arm_neon_vld3lane: NewOpc = ARMISD::VLD3LN_UPD;
+ NumVecs = 3; isLaneOp = true; break;
+ case Intrinsic::arm_neon_vld4lane: NewOpc = ARMISD::VLD4LN_UPD;
+ NumVecs = 4; isLaneOp = true; break;
+ case Intrinsic::arm_neon_vst1: NewOpc = ARMISD::VST1_UPD;
+ NumVecs = 1; isLoadOp = false; break;
+ case Intrinsic::arm_neon_vst2: NewOpc = ARMISD::VST2_UPD;
+ NumVecs = 2; isLoadOp = false; break;
+ case Intrinsic::arm_neon_vst3: NewOpc = ARMISD::VST3_UPD;
+ NumVecs = 3; isLoadOp = false; break;
+ case Intrinsic::arm_neon_vst4: NewOpc = ARMISD::VST4_UPD;
+ NumVecs = 4; isLoadOp = false; break;
+ case Intrinsic::arm_neon_vst2lane: NewOpc = ARMISD::VST2LN_UPD;
+ NumVecs = 2; isLoadOp = false; isLaneOp = true; break;
+ case Intrinsic::arm_neon_vst3lane: NewOpc = ARMISD::VST3LN_UPD;
+ NumVecs = 3; isLoadOp = false; isLaneOp = true; break;
+ case Intrinsic::arm_neon_vst4lane: NewOpc = ARMISD::VST4LN_UPD;
+ NumVecs = 4; isLoadOp = false; isLaneOp = true; break;
+ }
+ } else {
+ isLaneOp = true;
+ switch (N->getOpcode()) {
+ default: llvm_unreachable("unexpected opcode for Neon base update");
+ case ARMISD::VLD2DUP: NewOpc = ARMISD::VLD2DUP_UPD; NumVecs = 2; break;
+ case ARMISD::VLD3DUP: NewOpc = ARMISD::VLD3DUP_UPD; NumVecs = 3; break;
+ case ARMISD::VLD4DUP: NewOpc = ARMISD::VLD4DUP_UPD; NumVecs = 4; break;
+ case ISD::LOAD: NewOpc = ARMISD::VLD1_UPD;
+ NumVecs = 1; isLaneOp = false; break;
+ case ISD::STORE: NewOpc = ARMISD::VST1_UPD;
+ NumVecs = 1; isLaneOp = false; isLoadOp = false; break;
+ }
+ }
+
+ // Find the size of memory referenced by the load/store.
+ EVT VecTy;
+ if (isLoadOp) {
+ VecTy = N->getValueType(0);
+ } else if (isIntrinsic) {
+ VecTy = N->getOperand(AddrOpIdx+1).getValueType();
+ } else {
+ assert(isStore && "Node has to be a load, a store, or an intrinsic!");
+ VecTy = N->getOperand(1).getValueType();
+ }
+
+ unsigned NumBytes = NumVecs * VecTy.getSizeInBits() / 8;
+ if (isLaneOp)
+ NumBytes /= VecTy.getVectorNumElements();
+
+ // If the increment is a constant, it must match the memory ref size.
+ SDValue Inc = User->getOperand(User->getOperand(0) == Addr ? 1 : 0);
+ if (ConstantSDNode *CInc = dyn_cast<ConstantSDNode>(Inc.getNode())) {
+ uint64_t IncVal = CInc->getZExtValue();
+ if (IncVal != NumBytes)
+ continue;
+ } else if (NumBytes >= 3 * 16) {
+ // VLD3/4 and VST3/4 for 128-bit vectors are implemented with two
+ // separate instructions that make it harder to use a non-constant update.
+ continue;
+ }
+
+ // OK, we found an ADD we can fold into the base update.
+ // Now, create a _UPD node, taking care of not breaking alignment.
+
+ EVT AlignedVecTy = VecTy;
+ unsigned Alignment = MemN->getAlignment();
+
+ // If this is a less-than-standard-aligned load/store, change the type to
+ // match the standard alignment.
+ // The alignment is overlooked when selecting _UPD variants; and it's
+ // easier to introduce bitcasts here than fix that.
+ // There are 3 ways to get to this base-update combine:
+ // - intrinsics: they are assumed to be properly aligned (to the standard
+ // alignment of the memory type), so we don't need to do anything.
+ // - ARMISD::VLDx nodes: they are only generated from the aforementioned
+ // intrinsics, so, likewise, there's nothing to do.
+ // - generic load/store instructions: the alignment is specified as an
+ // explicit operand, rather than implicitly as the standard alignment
+ // of the memory type (like the intrisics). We need to change the
+ // memory type to match the explicit alignment. That way, we don't
+ // generate non-standard-aligned ARMISD::VLDx nodes.
+ if (isa<LSBaseSDNode>(N)) {
+ if (Alignment == 0)
+ Alignment = 1;
+ if (Alignment < VecTy.getScalarSizeInBits() / 8) {
+ MVT EltTy = MVT::getIntegerVT(Alignment * 8);
+ assert(NumVecs == 1 && "Unexpected multi-element generic load/store.");
+ assert(!isLaneOp && "Unexpected generic load/store lane.");
+ unsigned NumElts = NumBytes / (EltTy.getSizeInBits() / 8);
+ AlignedVecTy = MVT::getVectorVT(EltTy, NumElts);
+ }
+ // Don't set an explicit alignment on regular load/stores that we want
+ // to transform to VLD/VST 1_UPD nodes.
+ // This matches the behavior of regular load/stores, which only get an
+ // explicit alignment if the MMO alignment is larger than the standard
+ // alignment of the memory type.
+ // Intrinsics, however, always get an explicit alignment, set to the
+ // alignment of the MMO.
+ Alignment = 1;
+ }
+
+ // Create the new updating load/store node.
+ // First, create an SDVTList for the new updating node's results.
+ EVT Tys[6];
+ unsigned NumResultVecs = (isLoadOp ? NumVecs : 0);
+ unsigned n;
+ for (n = 0; n < NumResultVecs; ++n)
+ Tys[n] = AlignedVecTy;
+ Tys[n++] = MVT::i32;
+ Tys[n] = MVT::Other;
+ SDVTList SDTys = DAG.getVTList(makeArrayRef(Tys, NumResultVecs+2));
+
+ // Then, gather the new node's operands.
+ SmallVector<SDValue, 8> Ops;
+ Ops.push_back(N->getOperand(0)); // incoming chain
+ Ops.push_back(N->getOperand(AddrOpIdx));
+ Ops.push_back(Inc);
+
+ if (StoreSDNode *StN = dyn_cast<StoreSDNode>(N)) {
+ // Try to match the intrinsic's signature
+ Ops.push_back(StN->getValue());
+ } else {
+ // Loads (and of course intrinsics) match the intrinsics' signature,
+ // so just add all but the alignment operand.
+ for (unsigned i = AddrOpIdx + 1; i < N->getNumOperands() - 1; ++i)
+ Ops.push_back(N->getOperand(i));
+ }
+
+ // For all node types, the alignment operand is always the last one.
+ Ops.push_back(DAG.getConstant(Alignment, dl, MVT::i32));
+
+ // If this is a non-standard-aligned STORE, the penultimate operand is the
+ // stored value. Bitcast it to the aligned type.
+ if (AlignedVecTy != VecTy && N->getOpcode() == ISD::STORE) {
+ SDValue &StVal = Ops[Ops.size()-2];
+ StVal = DAG.getNode(ISD::BITCAST, dl, AlignedVecTy, StVal);
+ }
+
+ SDValue UpdN = DAG.getMemIntrinsicNode(NewOpc, dl, SDTys,
+ Ops, AlignedVecTy,
+ MemN->getMemOperand());
+
+ // Update the uses.
+ SmallVector<SDValue, 5> NewResults;
+ for (unsigned i = 0; i < NumResultVecs; ++i)
+ NewResults.push_back(SDValue(UpdN.getNode(), i));
+
+ // If this is an non-standard-aligned LOAD, the first result is the loaded
+ // value. Bitcast it to the expected result type.
+ if (AlignedVecTy != VecTy && N->getOpcode() == ISD::LOAD) {
+ SDValue &LdVal = NewResults[0];
+ LdVal = DAG.getNode(ISD::BITCAST, dl, VecTy, LdVal);
+ }
+
+ NewResults.push_back(SDValue(UpdN.getNode(), NumResultVecs+1)); // chain
+ DCI.CombineTo(N, NewResults);
+ DCI.CombineTo(User, SDValue(UpdN.getNode(), NumResultVecs));
+
+ break;
+ }
+ return SDValue();
+}
+
+static SDValue PerformVLDCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
+ return SDValue();
+
+ return CombineBaseUpdate(N, DCI);
+}
+
+/// CombineVLDDUP - For a VDUPLANE node N, check if its source operand is a
+/// vldN-lane (N > 1) intrinsic, and if all the other uses of that intrinsic
+/// are also VDUPLANEs. If so, combine them to a vldN-dup operation and
+/// return true.
+static bool CombineVLDDUP(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
+ SelectionDAG &DAG = DCI.DAG;
+ EVT VT = N->getValueType(0);
+ // vldN-dup instructions only support 64-bit vectors for N > 1.
+ if (!VT.is64BitVector())
+ return false;
+
+ // Check if the VDUPLANE operand is a vldN-dup intrinsic.
+ SDNode *VLD = N->getOperand(0).getNode();
+ if (VLD->getOpcode() != ISD::INTRINSIC_W_CHAIN)
+ return false;
+ unsigned NumVecs = 0;
+ unsigned NewOpc = 0;
+ unsigned IntNo = cast<ConstantSDNode>(VLD->getOperand(1))->getZExtValue();
+ if (IntNo == Intrinsic::arm_neon_vld2lane) {
+ NumVecs = 2;
+ NewOpc = ARMISD::VLD2DUP;
+ } else if (IntNo == Intrinsic::arm_neon_vld3lane) {
+ NumVecs = 3;
+ NewOpc = ARMISD::VLD3DUP;
+ } else if (IntNo == Intrinsic::arm_neon_vld4lane) {
+ NumVecs = 4;
+ NewOpc = ARMISD::VLD4DUP;
+ } else {
+ return false;
+ }
+
+ // First check that all the vldN-lane uses are VDUPLANEs and that the lane
+ // numbers match the load.
+ unsigned VLDLaneNo =
+ cast<ConstantSDNode>(VLD->getOperand(NumVecs+3))->getZExtValue();
+ for (SDNode::use_iterator UI = VLD->use_begin(), UE = VLD->use_end();
+ UI != UE; ++UI) {
+ // Ignore uses of the chain result.
+ if (UI.getUse().getResNo() == NumVecs)
+ continue;
+ SDNode *User = *UI;
+ if (User->getOpcode() != ARMISD::VDUPLANE ||
+ VLDLaneNo != cast<ConstantSDNode>(User->getOperand(1))->getZExtValue())
+ return false;
+ }
+
+ // Create the vldN-dup node.
+ EVT Tys[5];
+ unsigned n;
+ for (n = 0; n < NumVecs; ++n)
+ Tys[n] = VT;
+ Tys[n] = MVT::Other;
+ SDVTList SDTys = DAG.getVTList(makeArrayRef(Tys, NumVecs+1));
+ SDValue Ops[] = { VLD->getOperand(0), VLD->getOperand(2) };
+ MemIntrinsicSDNode *VLDMemInt = cast<MemIntrinsicSDNode>(VLD);
+ SDValue VLDDup = DAG.getMemIntrinsicNode(NewOpc, SDLoc(VLD), SDTys,
+ Ops, VLDMemInt->getMemoryVT(),
+ VLDMemInt->getMemOperand());
+
+ // Update the uses.
+ for (SDNode::use_iterator UI = VLD->use_begin(), UE = VLD->use_end();
+ UI != UE; ++UI) {
+ unsigned ResNo = UI.getUse().getResNo();
+ // Ignore uses of the chain result.
+ if (ResNo == NumVecs)
+ continue;
+ SDNode *User = *UI;
+ DCI.CombineTo(User, SDValue(VLDDup.getNode(), ResNo));
+ }
+
+ // Now the vldN-lane intrinsic is dead except for its chain result.
+ // Update uses of the chain.
+ std::vector<SDValue> VLDDupResults;
+ for (unsigned n = 0; n < NumVecs; ++n)
+ VLDDupResults.push_back(SDValue(VLDDup.getNode(), n));
+ VLDDupResults.push_back(SDValue(VLDDup.getNode(), NumVecs));
+ DCI.CombineTo(VLD, VLDDupResults);
+
+ return true;
+}
+
+/// PerformVDUPLANECombine - Target-specific dag combine xforms for
+/// ARMISD::VDUPLANE.
+static SDValue PerformVDUPLANECombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ SDValue Op = N->getOperand(0);
+
+ // If the source is a vldN-lane (N > 1) intrinsic, and all the other uses
+ // of that intrinsic are also VDUPLANEs, combine them to a vldN-dup operation.
+ if (CombineVLDDUP(N, DCI))
+ return SDValue(N, 0);
+
+ // If the source is already a VMOVIMM or VMVNIMM splat, the VDUPLANE is
+ // redundant. Ignore bit_converts for now; element sizes are checked below.
+ while (Op.getOpcode() == ISD::BITCAST)
+ Op = Op.getOperand(0);
+ if (Op.getOpcode() != ARMISD::VMOVIMM && Op.getOpcode() != ARMISD::VMVNIMM)
+ return SDValue();
+
+ // Make sure the VMOV element size is not bigger than the VDUPLANE elements.
+ unsigned EltSize = Op.getValueType().getVectorElementType().getSizeInBits();
+ // The canonical VMOV for a zero vector uses a 32-bit element size.
+ unsigned Imm = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+ unsigned EltBits;
+ if (ARM_AM::decodeNEONModImm(Imm, EltBits) == 0)
+ EltSize = 8;
+ EVT VT = N->getValueType(0);
+ if (EltSize > VT.getVectorElementType().getSizeInBits())
+ return SDValue();
+
+ return DCI.DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Op);
+}
+
+static SDValue PerformLOADCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ EVT VT = N->getValueType(0);
+
+ // If this is a legal vector load, try to combine it into a VLD1_UPD.
+ if (ISD::isNormalLoad(N) && VT.isVector() &&
+ DCI.DAG.getTargetLoweringInfo().isTypeLegal(VT))
+ return CombineBaseUpdate(N, DCI);
+
+ return SDValue();
+}
+
+/// PerformSTORECombine - Target-specific dag combine xforms for
+/// ISD::STORE.
+static SDValue PerformSTORECombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ StoreSDNode *St = cast<StoreSDNode>(N);
+ if (St->isVolatile())
+ return SDValue();
+
+ // Optimize trunc store (of multiple scalars) to shuffle and store. First,
+ // pack all of the elements in one place. Next, store to memory in fewer
+ // chunks.
+ SDValue StVal = St->getValue();
+ EVT VT = StVal.getValueType();
+ if (St->isTruncatingStore() && VT.isVector()) {
+ SelectionDAG &DAG = DCI.DAG;
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ EVT StVT = St->getMemoryVT();
+ unsigned NumElems = VT.getVectorNumElements();
+ assert(StVT != VT && "Cannot truncate to the same type");
+ unsigned FromEltSz = VT.getVectorElementType().getSizeInBits();
+ unsigned ToEltSz = StVT.getVectorElementType().getSizeInBits();
+
+ // From, To sizes and ElemCount must be pow of two
+ if (!isPowerOf2_32(NumElems * FromEltSz * ToEltSz)) return SDValue();
+
+ // We are going to use the original vector elt for storing.
+ // Accumulated smaller vector elements must be a multiple of the store size.
+ if (0 != (NumElems * FromEltSz) % ToEltSz) return SDValue();
+
+ unsigned SizeRatio = FromEltSz / ToEltSz;
+ assert(SizeRatio * NumElems * ToEltSz == VT.getSizeInBits());
+
+ // Create a type on which we perform the shuffle.
+ EVT WideVecVT = EVT::getVectorVT(*DAG.getContext(), StVT.getScalarType(),
+ NumElems*SizeRatio);
+ assert(WideVecVT.getSizeInBits() == VT.getSizeInBits());
+
+ SDLoc DL(St);
+ SDValue WideVec = DAG.getNode(ISD::BITCAST, DL, WideVecVT, StVal);
+ SmallVector<int, 8> ShuffleVec(NumElems * SizeRatio, -1);
+ for (unsigned i = 0; i < NumElems; ++i)
+ ShuffleVec[i] = DAG.getDataLayout().isBigEndian()
+ ? (i + 1) * SizeRatio - 1
+ : i * SizeRatio;
+
+ // Can't shuffle using an illegal type.
+ if (!TLI.isTypeLegal(WideVecVT)) return SDValue();
+
+ SDValue Shuff = DAG.getVectorShuffle(WideVecVT, DL, WideVec,
+ DAG.getUNDEF(WideVec.getValueType()),
+ ShuffleVec.data());
+ // At this point all of the data is stored at the bottom of the
+ // register. We now need to save it to mem.
+
+ // Find the largest store unit
+ MVT StoreType = MVT::i8;
+ for (MVT Tp : MVT::integer_valuetypes()) {
+ if (TLI.isTypeLegal(Tp) && Tp.getSizeInBits() <= NumElems * ToEltSz)
+ StoreType = Tp;
+ }
+ // Didn't find a legal store type.
+ if (!TLI.isTypeLegal(StoreType))
+ return SDValue();
+
+ // Bitcast the original vector into a vector of store-size units
+ EVT StoreVecVT = EVT::getVectorVT(*DAG.getContext(),
+ StoreType, VT.getSizeInBits()/EVT(StoreType).getSizeInBits());
+ assert(StoreVecVT.getSizeInBits() == VT.getSizeInBits());
+ SDValue ShuffWide = DAG.getNode(ISD::BITCAST, DL, StoreVecVT, Shuff);
+ SmallVector<SDValue, 8> Chains;
+ SDValue Increment = DAG.getConstant(StoreType.getSizeInBits() / 8, DL,
+ TLI.getPointerTy(DAG.getDataLayout()));
+ SDValue BasePtr = St->getBasePtr();
+
+ // Perform one or more big stores into memory.
+ unsigned E = (ToEltSz*NumElems)/StoreType.getSizeInBits();
+ for (unsigned I = 0; I < E; I++) {
+ SDValue SubVec = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
+ StoreType, ShuffWide,
+ DAG.getIntPtrConstant(I, DL));
+ SDValue Ch = DAG.getStore(St->getChain(), DL, SubVec, BasePtr,
+ St->getPointerInfo(), St->isVolatile(),
+ St->isNonTemporal(), St->getAlignment());
+ BasePtr = DAG.getNode(ISD::ADD, DL, BasePtr.getValueType(), BasePtr,
+ Increment);
+ Chains.push_back(Ch);
+ }
+ return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
+ }
+
+ if (!ISD::isNormalStore(St))
+ return SDValue();
+
+ // Split a store of a VMOVDRR into two integer stores to avoid mixing NEON and
+ // ARM stores of arguments in the same cache line.
+ if (StVal.getNode()->getOpcode() == ARMISD::VMOVDRR &&
+ StVal.getNode()->hasOneUse()) {
+ SelectionDAG &DAG = DCI.DAG;
+ bool isBigEndian = DAG.getDataLayout().isBigEndian();
+ SDLoc DL(St);
+ SDValue BasePtr = St->getBasePtr();
+ SDValue NewST1 = DAG.getStore(St->getChain(), DL,
+ StVal.getNode()->getOperand(isBigEndian ? 1 : 0 ),
+ BasePtr, St->getPointerInfo(), St->isVolatile(),
+ St->isNonTemporal(), St->getAlignment());
+
+ SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr,
+ DAG.getConstant(4, DL, MVT::i32));
+ return DAG.getStore(NewST1.getValue(0), DL,
+ StVal.getNode()->getOperand(isBigEndian ? 0 : 1),
+ OffsetPtr, St->getPointerInfo(), St->isVolatile(),
+ St->isNonTemporal(),
+ std::min(4U, St->getAlignment() / 2));
+ }
+
+ if (StVal.getValueType() == MVT::i64 &&
+ StVal.getNode()->getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
+
+ // Bitcast an i64 store extracted from a vector to f64.
+ // Otherwise, the i64 value will be legalized to a pair of i32 values.
+ SelectionDAG &DAG = DCI.DAG;
+ SDLoc dl(StVal);
+ SDValue IntVec = StVal.getOperand(0);
+ EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64,
+ IntVec.getValueType().getVectorNumElements());
+ SDValue Vec = DAG.getNode(ISD::BITCAST, dl, FloatVT, IntVec);
+ SDValue ExtElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64,
+ Vec, StVal.getOperand(1));
+ dl = SDLoc(N);
+ SDValue V = DAG.getNode(ISD::BITCAST, dl, MVT::i64, ExtElt);
+ // Make the DAGCombiner fold the bitcasts.
+ DCI.AddToWorklist(Vec.getNode());
+ DCI.AddToWorklist(ExtElt.getNode());
+ DCI.AddToWorklist(V.getNode());
+ return DAG.getStore(St->getChain(), dl, V, St->getBasePtr(),
+ St->getPointerInfo(), St->isVolatile(),
+ St->isNonTemporal(), St->getAlignment(),
+ St->getAAInfo());
+ }
+
+ // If this is a legal vector store, try to combine it into a VST1_UPD.
+ if (ISD::isNormalStore(N) && VT.isVector() &&
+ DCI.DAG.getTargetLoweringInfo().isTypeLegal(VT))
+ return CombineBaseUpdate(N, DCI);
+
+ return SDValue();
+}
+
+/// PerformVCVTCombine - VCVT (floating-point to fixed-point, Advanced SIMD)
+/// can replace combinations of VMUL and VCVT (floating-point to integer)
+/// when the VMUL has a constant operand that is a power of 2.
+///
+/// Example (assume d17 = <float 8.000000e+00, float 8.000000e+00>):
+/// vmul.f32 d16, d17, d16
+/// vcvt.s32.f32 d16, d16
+/// becomes:
+/// vcvt.s32.f32 d16, d16, #3
+static SDValue PerformVCVTCombine(SDNode *N, SelectionDAG &DAG,
+ const ARMSubtarget *Subtarget) {
+ if (!Subtarget->hasNEON())
+ return SDValue();
+
+ SDValue Op = N->getOperand(0);
+ if (!Op.getValueType().isVector() || Op.getOpcode() != ISD::FMUL)
+ return SDValue();
+
+ SDValue ConstVec = Op->getOperand(1);
+ if (!isa<BuildVectorSDNode>(ConstVec))
+ return SDValue();
+
+ MVT FloatTy = Op.getSimpleValueType().getVectorElementType();
+ uint32_t FloatBits = FloatTy.getSizeInBits();
+ MVT IntTy = N->getSimpleValueType(0).getVectorElementType();
+ uint32_t IntBits = IntTy.getSizeInBits();
+ unsigned NumLanes = Op.getValueType().getVectorNumElements();
+ if (FloatBits != 32 || IntBits > 32 || NumLanes > 4) {
+ // These instructions only exist converting from f32 to i32. We can handle
+ // smaller integers by generating an extra truncate, but larger ones would
+ // be lossy. We also can't handle more then 4 lanes, since these intructions
+ // only support v2i32/v4i32 types.
+ return SDValue();
+ }
+
+ BitVector UndefElements;
+ BuildVectorSDNode *BV = cast<BuildVectorSDNode>(ConstVec);
+ int32_t C = BV->getConstantFPSplatPow2ToLog2Int(&UndefElements, 33);
+ if (C == -1 || C == 0 || C > 32)
+ return SDValue();
+
+ SDLoc dl(N);
+ bool isSigned = N->getOpcode() == ISD::FP_TO_SINT;
+ unsigned IntrinsicOpcode = isSigned ? Intrinsic::arm_neon_vcvtfp2fxs :
+ Intrinsic::arm_neon_vcvtfp2fxu;
+ SDValue FixConv = DAG.getNode(
+ ISD::INTRINSIC_WO_CHAIN, dl, NumLanes == 2 ? MVT::v2i32 : MVT::v4i32,
+ DAG.getConstant(IntrinsicOpcode, dl, MVT::i32), Op->getOperand(0),
+ DAG.getConstant(C, dl, MVT::i32));
+
+ if (IntBits < FloatBits)
+ FixConv = DAG.getNode(ISD::TRUNCATE, dl, N->getValueType(0), FixConv);
+
+ return FixConv;
+}
+
+/// PerformVDIVCombine - VCVT (fixed-point to floating-point, Advanced SIMD)
+/// can replace combinations of VCVT (integer to floating-point) and VDIV
+/// when the VDIV has a constant operand that is a power of 2.
+///
+/// Example (assume d17 = <float 8.000000e+00, float 8.000000e+00>):
+/// vcvt.f32.s32 d16, d16
+/// vdiv.f32 d16, d17, d16
+/// becomes:
+/// vcvt.f32.s32 d16, d16, #3
+static SDValue PerformVDIVCombine(SDNode *N, SelectionDAG &DAG,
+ const ARMSubtarget *Subtarget) {
+ if (!Subtarget->hasNEON())
+ return SDValue();
+
+ SDValue Op = N->getOperand(0);
+ unsigned OpOpcode = Op.getNode()->getOpcode();
+ if (!N->getValueType(0).isVector() ||
+ (OpOpcode != ISD::SINT_TO_FP && OpOpcode != ISD::UINT_TO_FP))
+ return SDValue();
+
+ SDValue ConstVec = N->getOperand(1);
+ if (!isa<BuildVectorSDNode>(ConstVec))
+ return SDValue();
+
+ MVT FloatTy = N->getSimpleValueType(0).getVectorElementType();
+ uint32_t FloatBits = FloatTy.getSizeInBits();
+ MVT IntTy = Op.getOperand(0).getSimpleValueType().getVectorElementType();
+ uint32_t IntBits = IntTy.getSizeInBits();
+ unsigned NumLanes = Op.getValueType().getVectorNumElements();
+ if (FloatBits != 32 || IntBits > 32 || NumLanes > 4) {
+ // These instructions only exist converting from i32 to f32. We can handle
+ // smaller integers by generating an extra extend, but larger ones would
+ // be lossy. We also can't handle more then 4 lanes, since these intructions
+ // only support v2i32/v4i32 types.
+ return SDValue();
+ }
+
+ BitVector UndefElements;
+ BuildVectorSDNode *BV = cast<BuildVectorSDNode>(ConstVec);
+ int32_t C = BV->getConstantFPSplatPow2ToLog2Int(&UndefElements, 33);
+ if (C == -1 || C == 0 || C > 32)
+ return SDValue();
+
+ SDLoc dl(N);
+ bool isSigned = OpOpcode == ISD::SINT_TO_FP;
+ SDValue ConvInput = Op.getOperand(0);
+ if (IntBits < FloatBits)
+ ConvInput = DAG.getNode(isSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND,
+ dl, NumLanes == 2 ? MVT::v2i32 : MVT::v4i32,
+ ConvInput);
+
+ unsigned IntrinsicOpcode = isSigned ? Intrinsic::arm_neon_vcvtfxs2fp :
+ Intrinsic::arm_neon_vcvtfxu2fp;
+ return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl,
+ Op.getValueType(),
+ DAG.getConstant(IntrinsicOpcode, dl, MVT::i32),
+ ConvInput, DAG.getConstant(C, dl, MVT::i32));
+}
+
+/// Getvshiftimm - Check if this is a valid build_vector for the immediate
+/// operand of a vector shift operation, where all the elements of the
+/// build_vector must have the same constant integer value.
+static bool getVShiftImm(SDValue Op, unsigned ElementBits, int64_t &Cnt) {
+ // Ignore bit_converts.
+ while (Op.getOpcode() == ISD::BITCAST)
+ Op = Op.getOperand(0);
+ BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
+ APInt SplatBits, SplatUndef;
+ unsigned SplatBitSize;
+ bool HasAnyUndefs;
+ if (! BVN || ! BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize,
+ HasAnyUndefs, ElementBits) ||
+ SplatBitSize > ElementBits)
+ return false;
+ Cnt = SplatBits.getSExtValue();
+ return true;
+}
+
+/// isVShiftLImm - Check if this is a valid build_vector for the immediate
+/// operand of a vector shift left operation. That value must be in the range:
+/// 0 <= Value < ElementBits for a left shift; or
+/// 0 <= Value <= ElementBits for a long left shift.
+static bool isVShiftLImm(SDValue Op, EVT VT, bool isLong, int64_t &Cnt) {
+ assert(VT.isVector() && "vector shift count is not a vector type");
+ int64_t ElementBits = VT.getVectorElementType().getSizeInBits();
+ if (! getVShiftImm(Op, ElementBits, Cnt))
+ return false;
+ return (Cnt >= 0 && (isLong ? Cnt-1 : Cnt) < ElementBits);
+}
+
+/// isVShiftRImm - Check if this is a valid build_vector for the immediate
+/// operand of a vector shift right operation. For a shift opcode, the value
+/// is positive, but for an intrinsic the value count must be negative. The
+/// absolute value must be in the range:
+/// 1 <= |Value| <= ElementBits for a right shift; or
+/// 1 <= |Value| <= ElementBits/2 for a narrow right shift.
+static bool isVShiftRImm(SDValue Op, EVT VT, bool isNarrow, bool isIntrinsic,
+ int64_t &Cnt) {
+ assert(VT.isVector() && "vector shift count is not a vector type");
+ int64_t ElementBits = VT.getVectorElementType().getSizeInBits();
+ if (! getVShiftImm(Op, ElementBits, Cnt))
+ return false;
+ if (!isIntrinsic)
+ return (Cnt >= 1 && Cnt <= (isNarrow ? ElementBits/2 : ElementBits));
+ if (Cnt >= -(isNarrow ? ElementBits/2 : ElementBits) && Cnt <= -1) {
+ Cnt = -Cnt;
+ return true;
+ }
+ return false;
+}
+
+/// PerformIntrinsicCombine - ARM-specific DAG combining for intrinsics.
+static SDValue PerformIntrinsicCombine(SDNode *N, SelectionDAG &DAG) {
+ unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
+ switch (IntNo) {
+ default:
+ // Don't do anything for most intrinsics.
+ break;
+
+ // Vector shifts: check for immediate versions and lower them.
+ // Note: This is done during DAG combining instead of DAG legalizing because
+ // the build_vectors for 64-bit vector element shift counts are generally
+ // not legal, and it is hard to see their values after they get legalized to
+ // loads from a constant pool.
+ case Intrinsic::arm_neon_vshifts:
+ case Intrinsic::arm_neon_vshiftu:
+ case Intrinsic::arm_neon_vrshifts:
+ case Intrinsic::arm_neon_vrshiftu:
+ case Intrinsic::arm_neon_vrshiftn:
+ case Intrinsic::arm_neon_vqshifts:
+ case Intrinsic::arm_neon_vqshiftu:
+ case Intrinsic::arm_neon_vqshiftsu:
+ case Intrinsic::arm_neon_vqshiftns:
+ case Intrinsic::arm_neon_vqshiftnu:
+ case Intrinsic::arm_neon_vqshiftnsu:
+ case Intrinsic::arm_neon_vqrshiftns:
+ case Intrinsic::arm_neon_vqrshiftnu:
+ case Intrinsic::arm_neon_vqrshiftnsu: {
+ EVT VT = N->getOperand(1).getValueType();
+ int64_t Cnt;
+ unsigned VShiftOpc = 0;
+
+ switch (IntNo) {
+ case Intrinsic::arm_neon_vshifts:
+ case Intrinsic::arm_neon_vshiftu:
+ if (isVShiftLImm(N->getOperand(2), VT, false, Cnt)) {
+ VShiftOpc = ARMISD::VSHL;
+ break;
+ }
+ if (isVShiftRImm(N->getOperand(2), VT, false, true, Cnt)) {
+ VShiftOpc = (IntNo == Intrinsic::arm_neon_vshifts ?
+ ARMISD::VSHRs : ARMISD::VSHRu);
+ break;
+ }
+ return SDValue();
+
+ case Intrinsic::arm_neon_vrshifts:
+ case Intrinsic::arm_neon_vrshiftu:
+ if (isVShiftRImm(N->getOperand(2), VT, false, true, Cnt))
+ break;
+ return SDValue();
+
+ case Intrinsic::arm_neon_vqshifts:
+ case Intrinsic::arm_neon_vqshiftu:
+ if (isVShiftLImm(N->getOperand(2), VT, false, Cnt))
+ break;
+ return SDValue();
+
+ case Intrinsic::arm_neon_vqshiftsu:
+ if (isVShiftLImm(N->getOperand(2), VT, false, Cnt))
+ break;
+ llvm_unreachable("invalid shift count for vqshlu intrinsic");
+
+ case Intrinsic::arm_neon_vrshiftn:
+ case Intrinsic::arm_neon_vqshiftns:
+ case Intrinsic::arm_neon_vqshiftnu:
+ case Intrinsic::arm_neon_vqshiftnsu:
+ case Intrinsic::arm_neon_vqrshiftns:
+ case Intrinsic::arm_neon_vqrshiftnu:
+ case Intrinsic::arm_neon_vqrshiftnsu:
+ // Narrowing shifts require an immediate right shift.
+ if (isVShiftRImm(N->getOperand(2), VT, true, true, Cnt))
+ break;
+ llvm_unreachable("invalid shift count for narrowing vector shift "
+ "intrinsic");
+
+ default:
+ llvm_unreachable("unhandled vector shift");
+ }
+
+ switch (IntNo) {
+ case Intrinsic::arm_neon_vshifts:
+ case Intrinsic::arm_neon_vshiftu:
+ // Opcode already set above.
+ break;
+ case Intrinsic::arm_neon_vrshifts:
+ VShiftOpc = ARMISD::VRSHRs; break;
+ case Intrinsic::arm_neon_vrshiftu:
+ VShiftOpc = ARMISD::VRSHRu; break;
+ case Intrinsic::arm_neon_vrshiftn:
+ VShiftOpc = ARMISD::VRSHRN; break;
+ case Intrinsic::arm_neon_vqshifts:
+ VShiftOpc = ARMISD::VQSHLs; break;
+ case Intrinsic::arm_neon_vqshiftu:
+ VShiftOpc = ARMISD::VQSHLu; break;
+ case Intrinsic::arm_neon_vqshiftsu:
+ VShiftOpc = ARMISD::VQSHLsu; break;
+ case Intrinsic::arm_neon_vqshiftns:
+ VShiftOpc = ARMISD::VQSHRNs; break;
+ case Intrinsic::arm_neon_vqshiftnu:
+ VShiftOpc = ARMISD::VQSHRNu; break;
+ case Intrinsic::arm_neon_vqshiftnsu:
+ VShiftOpc = ARMISD::VQSHRNsu; break;
+ case Intrinsic::arm_neon_vqrshiftns:
+ VShiftOpc = ARMISD::VQRSHRNs; break;
+ case Intrinsic::arm_neon_vqrshiftnu:
+ VShiftOpc = ARMISD::VQRSHRNu; break;
+ case Intrinsic::arm_neon_vqrshiftnsu:
+ VShiftOpc = ARMISD::VQRSHRNsu; break;
+ }
+
+ SDLoc dl(N);
+ return DAG.getNode(VShiftOpc, dl, N->getValueType(0),
+ N->getOperand(1), DAG.getConstant(Cnt, dl, MVT::i32));
+ }
+
+ case Intrinsic::arm_neon_vshiftins: {
+ EVT VT = N->getOperand(1).getValueType();
+ int64_t Cnt;
+ unsigned VShiftOpc = 0;
+
+ if (isVShiftLImm(N->getOperand(3), VT, false, Cnt))
+ VShiftOpc = ARMISD::VSLI;
+ else if (isVShiftRImm(N->getOperand(3), VT, false, true, Cnt))
+ VShiftOpc = ARMISD::VSRI;
+ else {
+ llvm_unreachable("invalid shift count for vsli/vsri intrinsic");
+ }
+
+ SDLoc dl(N);
+ return DAG.getNode(VShiftOpc, dl, N->getValueType(0),
+ N->getOperand(1), N->getOperand(2),
+ DAG.getConstant(Cnt, dl, MVT::i32));
+ }
+
+ case Intrinsic::arm_neon_vqrshifts:
+ case Intrinsic::arm_neon_vqrshiftu:
+ // No immediate versions of these to check for.
+ break;
+ }
+
+ return SDValue();
+}
+
+/// PerformShiftCombine - Checks for immediate versions of vector shifts and
+/// lowers them. As with the vector shift intrinsics, this is done during DAG
+/// combining instead of DAG legalizing because the build_vectors for 64-bit
+/// vector element shift counts are generally not legal, and it is hard to see
+/// their values after they get legalized to loads from a constant pool.
+static SDValue PerformShiftCombine(SDNode *N, SelectionDAG &DAG,
+ const ARMSubtarget *ST) {
+ EVT VT = N->getValueType(0);
+ if (N->getOpcode() == ISD::SRL && VT == MVT::i32 && ST->hasV6Ops()) {
+ // Canonicalize (srl (bswap x), 16) to (rotr (bswap x), 16) if the high
+ // 16-bits of x is zero. This optimizes rev + lsr 16 to rev16.
+ SDValue N1 = N->getOperand(1);
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) {
+ SDValue N0 = N->getOperand(0);
+ if (C->getZExtValue() == 16 && N0.getOpcode() == ISD::BSWAP &&
+ DAG.MaskedValueIsZero(N0.getOperand(0),
+ APInt::getHighBitsSet(32, 16)))
+ return DAG.getNode(ISD::ROTR, SDLoc(N), VT, N0, N1);
+ }
+ }
+
+ // Nothing to be done for scalar shifts.
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ if (!VT.isVector() || !TLI.isTypeLegal(VT))
+ return SDValue();
+
+ assert(ST->hasNEON() && "unexpected vector shift");
+ int64_t Cnt;
+
+ switch (N->getOpcode()) {
+ default: llvm_unreachable("unexpected shift opcode");
+
+ case ISD::SHL:
+ if (isVShiftLImm(N->getOperand(1), VT, false, Cnt)) {
+ SDLoc dl(N);
+ return DAG.getNode(ARMISD::VSHL, dl, VT, N->getOperand(0),
+ DAG.getConstant(Cnt, dl, MVT::i32));
+ }
+ break;
+
+ case ISD::SRA:
+ case ISD::SRL:
+ if (isVShiftRImm(N->getOperand(1), VT, false, false, Cnt)) {
+ unsigned VShiftOpc = (N->getOpcode() == ISD::SRA ?
+ ARMISD::VSHRs : ARMISD::VSHRu);
+ SDLoc dl(N);
+ return DAG.getNode(VShiftOpc, dl, VT, N->getOperand(0),
+ DAG.getConstant(Cnt, dl, MVT::i32));
+ }
+ }
+ return SDValue();
+}
+
+/// PerformExtendCombine - Target-specific DAG combining for ISD::SIGN_EXTEND,
+/// ISD::ZERO_EXTEND, and ISD::ANY_EXTEND.
+static SDValue PerformExtendCombine(SDNode *N, SelectionDAG &DAG,
+ const ARMSubtarget *ST) {
+ SDValue N0 = N->getOperand(0);
+
+ // Check for sign- and zero-extensions of vector extract operations of 8-
+ // and 16-bit vector elements. NEON supports these directly. They are
+ // handled during DAG combining because type legalization will promote them
+ // to 32-bit types and it is messy to recognize the operations after that.
+ if (ST->hasNEON() && N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
+ SDValue Vec = N0.getOperand(0);
+ SDValue Lane = N0.getOperand(1);
+ EVT VT = N->getValueType(0);
+ EVT EltVT = N0.getValueType();
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+
+ if (VT == MVT::i32 &&
+ (EltVT == MVT::i8 || EltVT == MVT::i16) &&
+ TLI.isTypeLegal(Vec.getValueType()) &&
+ isa<ConstantSDNode>(Lane)) {
+
+ unsigned Opc = 0;
+ switch (N->getOpcode()) {
+ default: llvm_unreachable("unexpected opcode");
+ case ISD::SIGN_EXTEND:
+ Opc = ARMISD::VGETLANEs;
+ break;
+ case ISD::ZERO_EXTEND:
+ case ISD::ANY_EXTEND:
+ Opc = ARMISD::VGETLANEu;
+ break;
+ }
+ return DAG.getNode(Opc, SDLoc(N), VT, Vec, Lane);
+ }
+ }
+
+ return SDValue();
+}
+
+static void computeKnownBits(SelectionDAG &DAG, SDValue Op, APInt &KnownZero,
+ APInt &KnownOne) {
+ if (Op.getOpcode() == ARMISD::BFI) {
+ // Conservatively, we can recurse down the first operand
+ // and just mask out all affected bits.
+ computeKnownBits(DAG, Op.getOperand(0), KnownZero, KnownOne);
+
+ // The operand to BFI is already a mask suitable for removing the bits it
+ // sets.
+ ConstantSDNode *CI = cast<ConstantSDNode>(Op.getOperand(2));
+ APInt Mask = CI->getAPIntValue();
+ KnownZero &= Mask;
+ KnownOne &= Mask;
+ return;
+ }
+ if (Op.getOpcode() == ARMISD::CMOV) {
+ APInt KZ2(KnownZero.getBitWidth(), 0);
+ APInt KO2(KnownOne.getBitWidth(), 0);
+ computeKnownBits(DAG, Op.getOperand(1), KnownZero, KnownOne);
+ computeKnownBits(DAG, Op.getOperand(2), KZ2, KO2);
+
+ KnownZero &= KZ2;
+ KnownOne &= KO2;
+ return;
+ }
+ return DAG.computeKnownBits(Op, KnownZero, KnownOne);
+}
+
+SDValue ARMTargetLowering::PerformCMOVToBFICombine(SDNode *CMOV, SelectionDAG &DAG) const {
+ // If we have a CMOV, OR and AND combination such as:
+ // if (x & CN)
+ // y |= CM;
+ //
+ // And:
+ // * CN is a single bit;
+ // * All bits covered by CM are known zero in y
+ //
+ // Then we can convert this into a sequence of BFI instructions. This will
+ // always be a win if CM is a single bit, will always be no worse than the
+ // TST&OR sequence if CM is two bits, and for thumb will be no worse if CM is
+ // three bits (due to the extra IT instruction).
+
+ SDValue Op0 = CMOV->getOperand(0);
+ SDValue Op1 = CMOV->getOperand(1);
+ auto CCNode = cast<ConstantSDNode>(CMOV->getOperand(2));
+ auto CC = CCNode->getAPIntValue().getLimitedValue();
+ SDValue CmpZ = CMOV->getOperand(4);
+
+ // The compare must be against zero.
+ if (!isNullConstant(CmpZ->getOperand(1)))
+ return SDValue();
+
+ assert(CmpZ->getOpcode() == ARMISD::CMPZ);
+ SDValue And = CmpZ->getOperand(0);
+ if (And->getOpcode() != ISD::AND)
+ return SDValue();
+ ConstantSDNode *AndC = dyn_cast<ConstantSDNode>(And->getOperand(1));
+ if (!AndC || !AndC->getAPIntValue().isPowerOf2())
+ return SDValue();
+ SDValue X = And->getOperand(0);
+
+ if (CC == ARMCC::EQ) {
+ // We're performing an "equal to zero" compare. Swap the operands so we
+ // canonicalize on a "not equal to zero" compare.
+ std::swap(Op0, Op1);
+ } else {
+ assert(CC == ARMCC::NE && "How can a CMPZ node not be EQ or NE?");
+ }
+
+ if (Op1->getOpcode() != ISD::OR)
+ return SDValue();
+
+ ConstantSDNode *OrC = dyn_cast<ConstantSDNode>(Op1->getOperand(1));
+ if (!OrC)
+ return SDValue();
+ SDValue Y = Op1->getOperand(0);
+
+ if (Op0 != Y)
+ return SDValue();
+
+ // Now, is it profitable to continue?
+ APInt OrCI = OrC->getAPIntValue();
+ unsigned Heuristic = Subtarget->isThumb() ? 3 : 2;
+ if (OrCI.countPopulation() > Heuristic)
+ return SDValue();
+
+ // Lastly, can we determine that the bits defined by OrCI
+ // are zero in Y?
+ APInt KnownZero, KnownOne;
+ computeKnownBits(DAG, Y, KnownZero, KnownOne);
+ if ((OrCI & KnownZero) != OrCI)
+ return SDValue();
+
+ // OK, we can do the combine.
+ SDValue V = Y;
+ SDLoc dl(X);
+ EVT VT = X.getValueType();
+ unsigned BitInX = AndC->getAPIntValue().logBase2();
+
+ if (BitInX != 0) {
+ // We must shift X first.
+ X = DAG.getNode(ISD::SRL, dl, VT, X,
+ DAG.getConstant(BitInX, dl, VT));
+ }
+
+ for (unsigned BitInY = 0, NumActiveBits = OrCI.getActiveBits();
+ BitInY < NumActiveBits; ++BitInY) {
+ if (OrCI[BitInY] == 0)
+ continue;
+ APInt Mask(VT.getSizeInBits(), 0);
+ Mask.setBit(BitInY);
+ V = DAG.getNode(ARMISD::BFI, dl, VT, V, X,
+ // Confusingly, the operand is an *inverted* mask.
+ DAG.getConstant(~Mask, dl, VT));
+ }
+
+ return V;
+}
+
+/// PerformCMOVCombine - Target-specific DAG combining for ARMISD::CMOV.
+SDValue
+ARMTargetLowering::PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const {
+ SDValue Cmp = N->getOperand(4);
+ if (Cmp.getOpcode() != ARMISD::CMPZ)
+ // Only looking at EQ and NE cases.
+ return SDValue();
+
+ EVT VT = N->getValueType(0);
+ SDLoc dl(N);
+ SDValue LHS = Cmp.getOperand(0);
+ SDValue RHS = Cmp.getOperand(1);
+ SDValue FalseVal = N->getOperand(0);
+ SDValue TrueVal = N->getOperand(1);
+ SDValue ARMcc = N->getOperand(2);
+ ARMCC::CondCodes CC =
+ (ARMCC::CondCodes)cast<ConstantSDNode>(ARMcc)->getZExtValue();
+
+ // BFI is only available on V6T2+.
+ if (!Subtarget->isThumb1Only() && Subtarget->hasV6T2Ops()) {
+ SDValue R = PerformCMOVToBFICombine(N, DAG);
+ if (R)
+ return R;
+ }
+
+ // Simplify
+ // mov r1, r0
+ // cmp r1, x
+ // mov r0, y
+ // moveq r0, x
+ // to
+ // cmp r0, x
+ // movne r0, y
+ //
+ // mov r1, r0
+ // cmp r1, x
+ // mov r0, x
+ // movne r0, y
+ // to
+ // cmp r0, x
+ // movne r0, y
+ /// FIXME: Turn this into a target neutral optimization?
+ SDValue Res;
+ if (CC == ARMCC::NE && FalseVal == RHS && FalseVal != LHS) {
+ Res = DAG.getNode(ARMISD::CMOV, dl, VT, LHS, TrueVal, ARMcc,
+ N->getOperand(3), Cmp);
+ } else if (CC == ARMCC::EQ && TrueVal == RHS) {
+ SDValue ARMcc;
+ SDValue NewCmp = getARMCmp(LHS, RHS, ISD::SETNE, ARMcc, DAG, dl);
+ Res = DAG.getNode(ARMISD::CMOV, dl, VT, LHS, FalseVal, ARMcc,
+ N->getOperand(3), NewCmp);
+ }
+
+ if (Res.getNode()) {
+ APInt KnownZero, KnownOne;
+ DAG.computeKnownBits(SDValue(N,0), KnownZero, KnownOne);
+ // Capture demanded bits information that would be otherwise lost.
+ if (KnownZero == 0xfffffffe)
+ Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res,
+ DAG.getValueType(MVT::i1));
+ else if (KnownZero == 0xffffff00)
+ Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res,
+ DAG.getValueType(MVT::i8));
+ else if (KnownZero == 0xffff0000)
+ Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res,
+ DAG.getValueType(MVT::i16));
+ }
+
+ return Res;
+}
+
+SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
+ DAGCombinerInfo &DCI) const {
+ switch (N->getOpcode()) {
+ default: break;
+ case ISD::ADDC: return PerformADDCCombine(N, DCI, Subtarget);
+ case ISD::ADD: return PerformADDCombine(N, DCI, Subtarget);
+ case ISD::SUB: return PerformSUBCombine(N, DCI);
+ case ISD::MUL: return PerformMULCombine(N, DCI, Subtarget);
+ case ISD::OR: return PerformORCombine(N, DCI, Subtarget);
+ case ISD::XOR: return PerformXORCombine(N, DCI, Subtarget);
+ case ISD::AND: return PerformANDCombine(N, DCI, Subtarget);
+ case ARMISD::BFI: return PerformBFICombine(N, DCI);
+ case ARMISD::VMOVRRD: return PerformVMOVRRDCombine(N, DCI, Subtarget);
+ case ARMISD::VMOVDRR: return PerformVMOVDRRCombine(N, DCI.DAG);
+ case ISD::STORE: return PerformSTORECombine(N, DCI);
+ case ISD::BUILD_VECTOR: return PerformBUILD_VECTORCombine(N, DCI, Subtarget);
+ case ISD::INSERT_VECTOR_ELT: return PerformInsertEltCombine(N, DCI);
+ case ISD::VECTOR_SHUFFLE: return PerformVECTOR_SHUFFLECombine(N, DCI.DAG);
+ case ARMISD::VDUPLANE: return PerformVDUPLANECombine(N, DCI);
+ case ISD::FP_TO_SINT:
+ case ISD::FP_TO_UINT:
+ return PerformVCVTCombine(N, DCI.DAG, Subtarget);
+ case ISD::FDIV:
+ return PerformVDIVCombine(N, DCI.DAG, Subtarget);
+ case ISD::INTRINSIC_WO_CHAIN: return PerformIntrinsicCombine(N, DCI.DAG);
+ case ISD::SHL:
+ case ISD::SRA:
+ case ISD::SRL: return PerformShiftCombine(N, DCI.DAG, Subtarget);
+ case ISD::SIGN_EXTEND:
+ case ISD::ZERO_EXTEND:
+ case ISD::ANY_EXTEND: return PerformExtendCombine(N, DCI.DAG, Subtarget);
+ case ARMISD::CMOV: return PerformCMOVCombine(N, DCI.DAG);
+ case ISD::LOAD: return PerformLOADCombine(N, DCI);
+ case ARMISD::VLD2DUP:
+ case ARMISD::VLD3DUP:
+ case ARMISD::VLD4DUP:
+ return PerformVLDCombine(N, DCI);
+ case ARMISD::BUILD_VECTOR:
+ return PerformARMBUILD_VECTORCombine(N, DCI);
+ case ISD::INTRINSIC_VOID:
+ case ISD::INTRINSIC_W_CHAIN:
+ switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
+ case Intrinsic::arm_neon_vld1:
+ case Intrinsic::arm_neon_vld2:
+ case Intrinsic::arm_neon_vld3:
+ case Intrinsic::arm_neon_vld4:
+ case Intrinsic::arm_neon_vld2lane:
+ case Intrinsic::arm_neon_vld3lane:
+ case Intrinsic::arm_neon_vld4lane:
+ case Intrinsic::arm_neon_vst1:
+ case Intrinsic::arm_neon_vst2:
+ case Intrinsic::arm_neon_vst3:
+ case Intrinsic::arm_neon_vst4:
+ case Intrinsic::arm_neon_vst2lane:
+ case Intrinsic::arm_neon_vst3lane:
+ case Intrinsic::arm_neon_vst4lane:
+ return PerformVLDCombine(N, DCI);
+ default: break;
+ }
+ break;
+ }
+ return SDValue();
+}
+
+bool ARMTargetLowering::isDesirableToTransformToIntegerOp(unsigned Opc,
+ EVT VT) const {
+ return (VT == MVT::f32) && (Opc == ISD::LOAD || Opc == ISD::STORE);
+}
+
+bool ARMTargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
+ unsigned,
+ unsigned,
+ bool *Fast) const {
+ // The AllowsUnaliged flag models the SCTLR.A setting in ARM cpus
+ bool AllowsUnaligned = Subtarget->allowsUnalignedMem();
+
+ switch (VT.getSimpleVT().SimpleTy) {
+ default:
+ return false;
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32: {
+ // Unaligned access can use (for example) LRDB, LRDH, LDR
+ if (AllowsUnaligned) {
+ if (Fast)
+ *Fast = Subtarget->hasV7Ops();
+ return true;
+ }
+ return false;
+ }
+ case MVT::f64:
+ case MVT::v2f64: {
+ // For any little-endian targets with neon, we can support unaligned ld/st
+ // of D and Q (e.g. {D0,D1}) registers by using vld1.i8/vst1.i8.
+ // A big-endian target may also explicitly support unaligned accesses
+ if (Subtarget->hasNEON() && (AllowsUnaligned || Subtarget->isLittle())) {
+ if (Fast)
+ *Fast = true;
+ return true;
+ }
+ return false;
+ }
+ }
+}
+
+static bool memOpAlign(unsigned DstAlign, unsigned SrcAlign,
+ unsigned AlignCheck) {
+ return ((SrcAlign == 0 || SrcAlign % AlignCheck == 0) &&
+ (DstAlign == 0 || DstAlign % AlignCheck == 0));
+}
+
+EVT ARMTargetLowering::getOptimalMemOpType(uint64_t Size,
+ unsigned DstAlign, unsigned SrcAlign,
+ bool IsMemset, bool ZeroMemset,
+ bool MemcpyStrSrc,
+ MachineFunction &MF) const {
+ const Function *F = MF.getFunction();
+
+ // See if we can use NEON instructions for this...
+ if ((!IsMemset || ZeroMemset) && Subtarget->hasNEON() &&
+ !F->hasFnAttribute(Attribute::NoImplicitFloat)) {
+ bool Fast;
+ if (Size >= 16 &&
+ (memOpAlign(SrcAlign, DstAlign, 16) ||
+ (allowsMisalignedMemoryAccesses(MVT::v2f64, 0, 1, &Fast) && Fast))) {
+ return MVT::v2f64;
+ } else if (Size >= 8 &&
+ (memOpAlign(SrcAlign, DstAlign, 8) ||
+ (allowsMisalignedMemoryAccesses(MVT::f64, 0, 1, &Fast) &&
+ Fast))) {
+ return MVT::f64;
+ }
+ }
+
+ // Lowering to i32/i16 if the size permits.
+ if (Size >= 4)
+ return MVT::i32;
+ else if (Size >= 2)
+ return MVT::i16;
+
+ // Let the target-independent logic figure it out.
+ return MVT::Other;
+}
+
+bool ARMTargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
+ if (Val.getOpcode() != ISD::LOAD)
+ return false;
+
+ EVT VT1 = Val.getValueType();
+ if (!VT1.isSimple() || !VT1.isInteger() ||
+ !VT2.isSimple() || !VT2.isInteger())
+ return false;
+
+ switch (VT1.getSimpleVT().SimpleTy) {
+ default: break;
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i16:
+ // 8-bit and 16-bit loads implicitly zero-extend to 32-bits.
+ return true;
+ }
+
+ return false;
+}
+
+bool ARMTargetLowering::isVectorLoadExtDesirable(SDValue ExtVal) const {
+ EVT VT = ExtVal.getValueType();
+
+ if (!isTypeLegal(VT))
+ return false;
+
+ // Don't create a loadext if we can fold the extension into a wide/long
+ // instruction.
+ // If there's more than one user instruction, the loadext is desirable no
+ // matter what. There can be two uses by the same instruction.
+ if (ExtVal->use_empty() ||
+ !ExtVal->use_begin()->isOnlyUserOf(ExtVal.getNode()))
+ return true;
+
+ SDNode *U = *ExtVal->use_begin();
+ if ((U->getOpcode() == ISD::ADD || U->getOpcode() == ISD::SUB ||
+ U->getOpcode() == ISD::SHL || U->getOpcode() == ARMISD::VSHL))
+ return false;
+
+ return true;
+}
+
+bool ARMTargetLowering::allowTruncateForTailCall(Type *Ty1, Type *Ty2) const {
+ if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
+ return false;
+
+ if (!isTypeLegal(EVT::getEVT(Ty1)))
+ return false;
+
+ assert(Ty1->getPrimitiveSizeInBits() <= 64 && "i128 is probably not a noop");
+
+ // Assuming the caller doesn't have a zeroext or signext return parameter,
+ // truncation all the way down to i1 is valid.
+ return true;
+}
+
+
+static bool isLegalT1AddressImmediate(int64_t V, EVT VT) {
+ if (V < 0)
+ return false;
+
+ unsigned Scale = 1;
+ switch (VT.getSimpleVT().SimpleTy) {
+ default: return false;
+ case MVT::i1:
+ case MVT::i8:
+ // Scale == 1;
+ break;
+ case MVT::i16:
+ // Scale == 2;
+ Scale = 2;
+ break;
+ case MVT::i32:
+ // Scale == 4;
+ Scale = 4;
+ break;
+ }
+
+ if ((V & (Scale - 1)) != 0)
+ return false;
+ V /= Scale;
+ return V == (V & ((1LL << 5) - 1));
+}
+
+static bool isLegalT2AddressImmediate(int64_t V, EVT VT,
+ const ARMSubtarget *Subtarget) {
+ bool isNeg = false;
+ if (V < 0) {
+ isNeg = true;
+ V = - V;
+ }
+
+ switch (VT.getSimpleVT().SimpleTy) {
+ default: return false;
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ // + imm12 or - imm8
+ if (isNeg)
+ return V == (V & ((1LL << 8) - 1));
+ return V == (V & ((1LL << 12) - 1));
+ case MVT::f32:
+ case MVT::f64:
+ // Same as ARM mode. FIXME: NEON?
+ if (!Subtarget->hasVFP2())
+ return false;
+ if ((V & 3) != 0)
+ return false;
+ V >>= 2;
+ return V == (V & ((1LL << 8) - 1));
+ }
+}
+
+/// isLegalAddressImmediate - Return true if the integer value can be used
+/// as the offset of the target addressing mode for load / store of the
+/// given type.
+static bool isLegalAddressImmediate(int64_t V, EVT VT,
+ const ARMSubtarget *Subtarget) {
+ if (V == 0)
+ return true;
+
+ if (!VT.isSimple())
+ return false;
+
+ if (Subtarget->isThumb1Only())
+ return isLegalT1AddressImmediate(V, VT);
+ else if (Subtarget->isThumb2())
+ return isLegalT2AddressImmediate(V, VT, Subtarget);
+
+ // ARM mode.
+ if (V < 0)
+ V = - V;
+ switch (VT.getSimpleVT().SimpleTy) {
+ default: return false;
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i32:
+ // +- imm12
+ return V == (V & ((1LL << 12) - 1));
+ case MVT::i16:
+ // +- imm8
+ return V == (V & ((1LL << 8) - 1));
+ case MVT::f32:
+ case MVT::f64:
+ if (!Subtarget->hasVFP2()) // FIXME: NEON?
+ return false;
+ if ((V & 3) != 0)
+ return false;
+ V >>= 2;
+ return V == (V & ((1LL << 8) - 1));
+ }
+}
+
+bool ARMTargetLowering::isLegalT2ScaledAddressingMode(const AddrMode &AM,
+ EVT VT) const {
+ int Scale = AM.Scale;
+ if (Scale < 0)
+ return false;
+
+ switch (VT.getSimpleVT().SimpleTy) {
+ default: return false;
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ if (Scale == 1)
+ return true;
+ // r + r << imm
+ Scale = Scale & ~1;
+ return Scale == 2 || Scale == 4 || Scale == 8;
+ case MVT::i64:
+ // r + r
+ if (((unsigned)AM.HasBaseReg + Scale) <= 2)
+ return true;
+ return false;
+ case MVT::isVoid:
+ // Note, we allow "void" uses (basically, uses that aren't loads or
+ // stores), because arm allows folding a scale into many arithmetic
+ // operations. This should be made more precise and revisited later.
+
+ // Allow r << imm, but the imm has to be a multiple of two.
+ if (Scale & 1) return false;
+ return isPowerOf2_32(Scale);
+ }
+}
+
+/// isLegalAddressingMode - Return true if the addressing mode represented
+/// by AM is legal for this target, for a load/store of the specified type.
+bool ARMTargetLowering::isLegalAddressingMode(const DataLayout &DL,
+ const AddrMode &AM, Type *Ty,
+ unsigned AS) const {
+ EVT VT = getValueType(DL, Ty, true);
+ if (!isLegalAddressImmediate(AM.BaseOffs, VT, Subtarget))
+ return false;
+
+ // Can never fold addr of global into load/store.
+ if (AM.BaseGV)
+ return false;
+
+ switch (AM.Scale) {
+ case 0: // no scale reg, must be "r+i" or "r", or "i".
+ break;
+ case 1:
+ if (Subtarget->isThumb1Only())
+ return false;
+ // FALL THROUGH.
+ default:
+ // ARM doesn't support any R+R*scale+imm addr modes.
+ if (AM.BaseOffs)
+ return false;
+
+ if (!VT.isSimple())
+ return false;
+
+ if (Subtarget->isThumb2())
+ return isLegalT2ScaledAddressingMode(AM, VT);
+
+ int Scale = AM.Scale;
+ switch (VT.getSimpleVT().SimpleTy) {
+ default: return false;
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i32:
+ if (Scale < 0) Scale = -Scale;
+ if (Scale == 1)
+ return true;
+ // r + r << imm
+ return isPowerOf2_32(Scale & ~1);
+ case MVT::i16:
+ case MVT::i64:
+ // r + r
+ if (((unsigned)AM.HasBaseReg + Scale) <= 2)
+ return true;
+ return false;
+
+ case MVT::isVoid:
+ // Note, we allow "void" uses (basically, uses that aren't loads or
+ // stores), because arm allows folding a scale into many arithmetic
+ // operations. This should be made more precise and revisited later.
+
+ // Allow r << imm, but the imm has to be a multiple of two.
+ if (Scale & 1) return false;
+ return isPowerOf2_32(Scale);
+ }
+ }
+ return true;
+}
+
+/// isLegalICmpImmediate - Return true if the specified immediate is legal
+/// icmp immediate, that is the target has icmp instructions which can compare
+/// a register against the immediate without having to materialize the
+/// immediate into a register.
+bool ARMTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
+ // Thumb2 and ARM modes can use cmn for negative immediates.
+ if (!Subtarget->isThumb())
+ return ARM_AM::getSOImmVal(std::abs(Imm)) != -1;
+ if (Subtarget->isThumb2())
+ return ARM_AM::getT2SOImmVal(std::abs(Imm)) != -1;
+ // Thumb1 doesn't have cmn, and only 8-bit immediates.
+ return Imm >= 0 && Imm <= 255;
+}
+
+/// isLegalAddImmediate - Return true if the specified immediate is a legal add
+/// *or sub* immediate, that is the target has add or sub instructions which can
+/// add a register with the immediate without having to materialize the
+/// immediate into a register.
+bool ARMTargetLowering::isLegalAddImmediate(int64_t Imm) const {
+ // Same encoding for add/sub, just flip the sign.
+ int64_t AbsImm = std::abs(Imm);
+ if (!Subtarget->isThumb())
+ return ARM_AM::getSOImmVal(AbsImm) != -1;
+ if (Subtarget->isThumb2())
+ return ARM_AM::getT2SOImmVal(AbsImm) != -1;
+ // Thumb1 only has 8-bit unsigned immediate.
+ return AbsImm >= 0 && AbsImm <= 255;
+}
+
+static bool getARMIndexedAddressParts(SDNode *Ptr, EVT VT,
+ bool isSEXTLoad, SDValue &Base,
+ SDValue &Offset, bool &isInc,
+ SelectionDAG &DAG) {
+ if (Ptr->getOpcode() != ISD::ADD && Ptr->getOpcode() != ISD::SUB)
+ return false;
+
+ if (VT == MVT::i16 || ((VT == MVT::i8 || VT == MVT::i1) && isSEXTLoad)) {
+ // AddressingMode 3
+ Base = Ptr->getOperand(0);
+ if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) {
+ int RHSC = (int)RHS->getZExtValue();
+ if (RHSC < 0 && RHSC > -256) {
+ assert(Ptr->getOpcode() == ISD::ADD);
+ isInc = false;
+ Offset = DAG.getConstant(-RHSC, SDLoc(Ptr), RHS->getValueType(0));
+ return true;
+ }
+ }
+ isInc = (Ptr->getOpcode() == ISD::ADD);
+ Offset = Ptr->getOperand(1);
+ return true;
+ } else if (VT == MVT::i32 || VT == MVT::i8 || VT == MVT::i1) {
+ // AddressingMode 2
+ if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) {
+ int RHSC = (int)RHS->getZExtValue();
+ if (RHSC < 0 && RHSC > -0x1000) {
+ assert(Ptr->getOpcode() == ISD::ADD);
+ isInc = false;
+ Offset = DAG.getConstant(-RHSC, SDLoc(Ptr), RHS->getValueType(0));
+ Base = Ptr->getOperand(0);
+ return true;
+ }
+ }
+
+ if (Ptr->getOpcode() == ISD::ADD) {
+ isInc = true;
+ ARM_AM::ShiftOpc ShOpcVal=
+ ARM_AM::getShiftOpcForNode(Ptr->getOperand(0).getOpcode());
+ if (ShOpcVal != ARM_AM::no_shift) {
+ Base = Ptr->getOperand(1);
+ Offset = Ptr->getOperand(0);
+ } else {
+ Base = Ptr->getOperand(0);
+ Offset = Ptr->getOperand(1);
+ }
+ return true;
+ }
+
+ isInc = (Ptr->getOpcode() == ISD::ADD);
+ Base = Ptr->getOperand(0);
+ Offset = Ptr->getOperand(1);
+ return true;
+ }
+
+ // FIXME: Use VLDM / VSTM to emulate indexed FP load / store.
+ return false;
+}
+
+static bool getT2IndexedAddressParts(SDNode *Ptr, EVT VT,
+ bool isSEXTLoad, SDValue &Base,
+ SDValue &Offset, bool &isInc,
+ SelectionDAG &DAG) {
+ if (Ptr->getOpcode() != ISD::ADD && Ptr->getOpcode() != ISD::SUB)
+ return false;
+
+ Base = Ptr->getOperand(0);
+ if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) {
+ int RHSC = (int)RHS->getZExtValue();
+ if (RHSC < 0 && RHSC > -0x100) { // 8 bits.
+ assert(Ptr->getOpcode() == ISD::ADD);
+ isInc = false;
+ Offset = DAG.getConstant(-RHSC, SDLoc(Ptr), RHS->getValueType(0));
+ return true;
+ } else if (RHSC > 0 && RHSC < 0x100) { // 8 bit, no zero.
+ isInc = Ptr->getOpcode() == ISD::ADD;
+ Offset = DAG.getConstant(RHSC, SDLoc(Ptr), RHS->getValueType(0));
+ return true;
+ }
+ }
+
+ return false;
+}
+
+/// getPreIndexedAddressParts - returns true by value, base pointer and
+/// offset pointer and addressing mode by reference if the node's address
+/// can be legally represented as pre-indexed load / store address.
+bool
+ARMTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
+ SDValue &Offset,
+ ISD::MemIndexedMode &AM,
+ SelectionDAG &DAG) const {
+ if (Subtarget->isThumb1Only())
+ return false;
+
+ EVT VT;
+ SDValue Ptr;
+ bool isSEXTLoad = false;
+ if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
+ Ptr = LD->getBasePtr();
+ VT = LD->getMemoryVT();
+ isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD;
+ } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
+ Ptr = ST->getBasePtr();
+ VT = ST->getMemoryVT();
+ } else
+ return false;
+
+ bool isInc;
+ bool isLegal = false;
+ if (Subtarget->isThumb2())
+ isLegal = getT2IndexedAddressParts(Ptr.getNode(), VT, isSEXTLoad, Base,
+ Offset, isInc, DAG);
+ else
+ isLegal = getARMIndexedAddressParts(Ptr.getNode(), VT, isSEXTLoad, Base,
+ Offset, isInc, DAG);
+ if (!isLegal)
+ return false;
+
+ AM = isInc ? ISD::PRE_INC : ISD::PRE_DEC;
+ return true;
+}
+
+/// getPostIndexedAddressParts - returns true by value, base pointer and
+/// offset pointer and addressing mode by reference if this node can be
+/// combined with a load / store to form a post-indexed load / store.
+bool ARMTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op,
+ SDValue &Base,
+ SDValue &Offset,
+ ISD::MemIndexedMode &AM,
+ SelectionDAG &DAG) const {
+ if (Subtarget->isThumb1Only())
+ return false;
+
+ EVT VT;
+ SDValue Ptr;
+ bool isSEXTLoad = false;
+ if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
+ VT = LD->getMemoryVT();
+ Ptr = LD->getBasePtr();
+ isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD;
+ } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
+ VT = ST->getMemoryVT();
+ Ptr = ST->getBasePtr();
+ } else
+ return false;
+
+ bool isInc;
+ bool isLegal = false;
+ if (Subtarget->isThumb2())
+ isLegal = getT2IndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset,
+ isInc, DAG);
+ else
+ isLegal = getARMIndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset,
+ isInc, DAG);
+ if (!isLegal)
+ return false;
+
+ if (Ptr != Base) {
+ // Swap base ptr and offset to catch more post-index load / store when
+ // it's legal. In Thumb2 mode, offset must be an immediate.
+ if (Ptr == Offset && Op->getOpcode() == ISD::ADD &&
+ !Subtarget->isThumb2())
+ std::swap(Base, Offset);
+
+ // Post-indexed load / store update the base pointer.
+ if (Ptr != Base)
+ return false;
+ }
+
+ AM = isInc ? ISD::POST_INC : ISD::POST_DEC;
+ return true;
+}
+
+void ARMTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
+ APInt &KnownZero,
+ APInt &KnownOne,
+ const SelectionDAG &DAG,
+ unsigned Depth) const {
+ unsigned BitWidth = KnownOne.getBitWidth();
+ KnownZero = KnownOne = APInt(BitWidth, 0);
+ switch (Op.getOpcode()) {
+ default: break;
+ case ARMISD::ADDC:
+ case ARMISD::ADDE:
+ case ARMISD::SUBC:
+ case ARMISD::SUBE:
+ // These nodes' second result is a boolean
+ if (Op.getResNo() == 0)
+ break;
+ KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - 1);
+ break;
+ case ARMISD::CMOV: {
+ // Bits are known zero/one if known on the LHS and RHS.
+ DAG.computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1);
+ if (KnownZero == 0 && KnownOne == 0) return;
+
+ APInt KnownZeroRHS, KnownOneRHS;
+ DAG.computeKnownBits(Op.getOperand(1), KnownZeroRHS, KnownOneRHS, Depth+1);
+ KnownZero &= KnownZeroRHS;
+ KnownOne &= KnownOneRHS;
+ return;
+ }
+ case ISD::INTRINSIC_W_CHAIN: {
+ ConstantSDNode *CN = cast<ConstantSDNode>(Op->getOperand(1));
+ Intrinsic::ID IntID = static_cast<Intrinsic::ID>(CN->getZExtValue());
+ switch (IntID) {
+ default: return;
+ case Intrinsic::arm_ldaex:
+ case Intrinsic::arm_ldrex: {
+ EVT VT = cast<MemIntrinsicSDNode>(Op)->getMemoryVT();
+ unsigned MemBits = VT.getScalarType().getSizeInBits();
+ KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits);
+ return;
+ }
+ }
+ }
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// ARM Inline Assembly Support
+//===----------------------------------------------------------------------===//
+
+bool ARMTargetLowering::ExpandInlineAsm(CallInst *CI) const {
+ // Looking for "rev" which is V6+.
+ if (!Subtarget->hasV6Ops())
+ return false;
+
+ InlineAsm *IA = cast<InlineAsm>(CI->getCalledValue());
+ std::string AsmStr = IA->getAsmString();
+ SmallVector<StringRef, 4> AsmPieces;
+ SplitString(AsmStr, AsmPieces, ";\n");
+
+ switch (AsmPieces.size()) {
+ default: return false;
+ case 1:
+ AsmStr = AsmPieces[0];
+ AsmPieces.clear();
+ SplitString(AsmStr, AsmPieces, " \t,");
+
+ // rev $0, $1
+ if (AsmPieces.size() == 3 &&
+ AsmPieces[0] == "rev" && AsmPieces[1] == "$0" && AsmPieces[2] == "$1" &&
+ IA->getConstraintString().compare(0, 4, "=l,l") == 0) {
+ IntegerType *Ty = dyn_cast<IntegerType>(CI->getType());
+ if (Ty && Ty->getBitWidth() == 32)
+ return IntrinsicLowering::LowerToByteSwap(CI);
+ }
+ break;
+ }
+
+ return false;
+}
+
+/// getConstraintType - Given a constraint letter, return the type of
+/// constraint it is for this target.
+ARMTargetLowering::ConstraintType
+ARMTargetLowering::getConstraintType(StringRef Constraint) const {
+ if (Constraint.size() == 1) {
+ switch (Constraint[0]) {
+ default: break;
+ case 'l': return C_RegisterClass;
+ case 'w': return C_RegisterClass;
+ case 'h': return C_RegisterClass;
+ case 'x': return C_RegisterClass;
+ case 't': return C_RegisterClass;
+ case 'j': return C_Other; // Constant for movw.
+ // An address with a single base register. Due to the way we
+ // currently handle addresses it is the same as an 'r' memory constraint.
+ case 'Q': return C_Memory;
+ }
+ } else if (Constraint.size() == 2) {
+ switch (Constraint[0]) {
+ default: break;
+ // All 'U+' constraints are addresses.
+ case 'U': return C_Memory;
+ }
+ }
+ return TargetLowering::getConstraintType(Constraint);
+}
+
+/// Examine constraint type and operand type and determine a weight value.
+/// This object must already have been set up with the operand type
+/// and the current alternative constraint selected.
+TargetLowering::ConstraintWeight
+ARMTargetLowering::getSingleConstraintMatchWeight(
+ AsmOperandInfo &info, const char *constraint) const {
+ ConstraintWeight weight = CW_Invalid;
+ Value *CallOperandVal = info.CallOperandVal;
+ // If we don't have a value, we can't do a match,
+ // but allow it at the lowest weight.
+ if (!CallOperandVal)
+ return CW_Default;
+ Type *type = CallOperandVal->getType();
+ // Look at the constraint type.
+ switch (*constraint) {
+ default:
+ weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
+ break;
+ case 'l':
+ if (type->isIntegerTy()) {
+ if (Subtarget->isThumb())
+ weight = CW_SpecificReg;
+ else
+ weight = CW_Register;
+ }
+ break;
+ case 'w':
+ if (type->isFloatingPointTy())
+ weight = CW_Register;
+ break;
+ }
+ return weight;
+}
+
+typedef std::pair<unsigned, const TargetRegisterClass*> RCPair;
+RCPair ARMTargetLowering::getRegForInlineAsmConstraint(
+ const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
+ if (Constraint.size() == 1) {
+ // GCC ARM Constraint Letters
+ switch (Constraint[0]) {
+ case 'l': // Low regs or general regs.
+ if (Subtarget->isThumb())
+ return RCPair(0U, &ARM::tGPRRegClass);
+ return RCPair(0U, &ARM::GPRRegClass);
+ case 'h': // High regs or no regs.
+ if (Subtarget->isThumb())
+ return RCPair(0U, &ARM::hGPRRegClass);
+ break;
+ case 'r':
+ if (Subtarget->isThumb1Only())
+ return RCPair(0U, &ARM::tGPRRegClass);
+ return RCPair(0U, &ARM::GPRRegClass);
+ case 'w':
+ if (VT == MVT::Other)
+ break;
+ if (VT == MVT::f32)
+ return RCPair(0U, &ARM::SPRRegClass);
+ if (VT.getSizeInBits() == 64)
+ return RCPair(0U, &ARM::DPRRegClass);
+ if (VT.getSizeInBits() == 128)
+ return RCPair(0U, &ARM::QPRRegClass);
+ break;
+ case 'x':
+ if (VT == MVT::Other)
+ break;
+ if (VT == MVT::f32)
+ return RCPair(0U, &ARM::SPR_8RegClass);
+ if (VT.getSizeInBits() == 64)
+ return RCPair(0U, &ARM::DPR_8RegClass);
+ if (VT.getSizeInBits() == 128)
+ return RCPair(0U, &ARM::QPR_8RegClass);
+ break;
+ case 't':
+ if (VT == MVT::f32)
+ return RCPair(0U, &ARM::SPRRegClass);
+ break;
+ }
+ }
+ if (StringRef("{cc}").equals_lower(Constraint))
+ return std::make_pair(unsigned(ARM::CPSR), &ARM::CCRRegClass);
+
+ return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
+}
+
+/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
+/// vector. If it is invalid, don't add anything to Ops.
+void ARMTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
+ std::string &Constraint,
+ std::vector<SDValue>&Ops,
+ SelectionDAG &DAG) const {
+ SDValue Result;
+
+ // Currently only support length 1 constraints.
+ if (Constraint.length() != 1) return;
+
+ char ConstraintLetter = Constraint[0];
+ switch (ConstraintLetter) {
+ default: break;
+ case 'j':
+ case 'I': case 'J': case 'K': case 'L':
+ case 'M': case 'N': case 'O':
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
+ if (!C)
+ return;
+
+ int64_t CVal64 = C->getSExtValue();
+ int CVal = (int) CVal64;
+ // None of these constraints allow values larger than 32 bits. Check
+ // that the value fits in an int.
+ if (CVal != CVal64)
+ return;
+
+ switch (ConstraintLetter) {
+ case 'j':
+ // Constant suitable for movw, must be between 0 and
+ // 65535.
+ if (Subtarget->hasV6T2Ops())
+ if (CVal >= 0 && CVal <= 65535)
+ break;
+ return;
+ case 'I':
+ if (Subtarget->isThumb1Only()) {
+ // This must be a constant between 0 and 255, for ADD
+ // immediates.
+ if (CVal >= 0 && CVal <= 255)
+ break;
+ } else if (Subtarget->isThumb2()) {
+ // A constant that can be used as an immediate value in a
+ // data-processing instruction.
+ if (ARM_AM::getT2SOImmVal(CVal) != -1)
+ break;
+ } else {
+ // A constant that can be used as an immediate value in a
+ // data-processing instruction.
+ if (ARM_AM::getSOImmVal(CVal) != -1)
+ break;
+ }
+ return;
+
+ case 'J':
+ if (Subtarget->isThumb()) { // FIXME thumb2
+ // This must be a constant between -255 and -1, for negated ADD
+ // immediates. This can be used in GCC with an "n" modifier that
+ // prints the negated value, for use with SUB instructions. It is
+ // not useful otherwise but is implemented for compatibility.
+ if (CVal >= -255 && CVal <= -1)
+ break;
+ } else {
+ // This must be a constant between -4095 and 4095. It is not clear
+ // what this constraint is intended for. Implemented for
+ // compatibility with GCC.
+ if (CVal >= -4095 && CVal <= 4095)
+ break;
+ }
+ return;
+
+ case 'K':
+ if (Subtarget->isThumb1Only()) {
+ // A 32-bit value where only one byte has a nonzero value. Exclude
+ // zero to match GCC. This constraint is used by GCC internally for
+ // constants that can be loaded with a move/shift combination.
+ // It is not useful otherwise but is implemented for compatibility.
+ if (CVal != 0 && ARM_AM::isThumbImmShiftedVal(CVal))
+ break;
+ } else if (Subtarget->isThumb2()) {
+ // A constant whose bitwise inverse can be used as an immediate
+ // value in a data-processing instruction. This can be used in GCC
+ // with a "B" modifier that prints the inverted value, for use with
+ // BIC and MVN instructions. It is not useful otherwise but is
+ // implemented for compatibility.
+ if (ARM_AM::getT2SOImmVal(~CVal) != -1)
+ break;
+ } else {
+ // A constant whose bitwise inverse can be used as an immediate
+ // value in a data-processing instruction. This can be used in GCC
+ // with a "B" modifier that prints the inverted value, for use with
+ // BIC and MVN instructions. It is not useful otherwise but is
+ // implemented for compatibility.
+ if (ARM_AM::getSOImmVal(~CVal) != -1)
+ break;
+ }
+ return;
+
+ case 'L':
+ if (Subtarget->isThumb1Only()) {
+ // This must be a constant between -7 and 7,
+ // for 3-operand ADD/SUB immediate instructions.
+ if (CVal >= -7 && CVal < 7)
+ break;
+ } else if (Subtarget->isThumb2()) {
+ // A constant whose negation can be used as an immediate value in a
+ // data-processing instruction. This can be used in GCC with an "n"
+ // modifier that prints the negated value, for use with SUB
+ // instructions. It is not useful otherwise but is implemented for
+ // compatibility.
+ if (ARM_AM::getT2SOImmVal(-CVal) != -1)
+ break;
+ } else {
+ // A constant whose negation can be used as an immediate value in a
+ // data-processing instruction. This can be used in GCC with an "n"
+ // modifier that prints the negated value, for use with SUB
+ // instructions. It is not useful otherwise but is implemented for
+ // compatibility.
+ if (ARM_AM::getSOImmVal(-CVal) != -1)
+ break;
+ }
+ return;
+
+ case 'M':
+ if (Subtarget->isThumb()) { // FIXME thumb2
+ // This must be a multiple of 4 between 0 and 1020, for
+ // ADD sp + immediate.
+ if ((CVal >= 0 && CVal <= 1020) && ((CVal & 3) == 0))
+ break;
+ } else {
+ // A power of two or a constant between 0 and 32. This is used in
+ // GCC for the shift amount on shifted register operands, but it is
+ // useful in general for any shift amounts.
+ if ((CVal >= 0 && CVal <= 32) || ((CVal & (CVal - 1)) == 0))
+ break;
+ }
+ return;
+
+ case 'N':
+ if (Subtarget->isThumb()) { // FIXME thumb2
+ // This must be a constant between 0 and 31, for shift amounts.
+ if (CVal >= 0 && CVal <= 31)
+ break;
+ }
+ return;
+
+ case 'O':
+ if (Subtarget->isThumb()) { // FIXME thumb2
+ // This must be a multiple of 4 between -508 and 508, for
+ // ADD/SUB sp = sp + immediate.
+ if ((CVal >= -508 && CVal <= 508) && ((CVal & 3) == 0))
+ break;
+ }
+ return;
+ }
+ Result = DAG.getTargetConstant(CVal, SDLoc(Op), Op.getValueType());
+ break;
+ }
+
+ if (Result.getNode()) {
+ Ops.push_back(Result);
+ return;
+ }
+ return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
+}
+
+static RTLIB::Libcall getDivRemLibcall(
+ const SDNode *N, MVT::SimpleValueType SVT) {
+ assert((N->getOpcode() == ISD::SDIVREM || N->getOpcode() == ISD::UDIVREM ||
+ N->getOpcode() == ISD::SREM || N->getOpcode() == ISD::UREM) &&
+ "Unhandled Opcode in getDivRemLibcall");
+ bool isSigned = N->getOpcode() == ISD::SDIVREM ||
+ N->getOpcode() == ISD::SREM;
+ RTLIB::Libcall LC;
+ switch (SVT) {
+ default: llvm_unreachable("Unexpected request for libcall!");
+ case MVT::i8: LC = isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break;
+ case MVT::i16: LC = isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
+ case MVT::i32: LC = isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
+ case MVT::i64: LC = isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
+ }
+ return LC;
+}
+
+static TargetLowering::ArgListTy getDivRemArgList(
+ const SDNode *N, LLVMContext *Context) {
+ assert((N->getOpcode() == ISD::SDIVREM || N->getOpcode() == ISD::UDIVREM ||
+ N->getOpcode() == ISD::SREM || N->getOpcode() == ISD::UREM) &&
+ "Unhandled Opcode in getDivRemArgList");
+ bool isSigned = N->getOpcode() == ISD::SDIVREM ||
+ N->getOpcode() == ISD::SREM;
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+ EVT ArgVT = N->getOperand(i).getValueType();
+ Type *ArgTy = ArgVT.getTypeForEVT(*Context);
+ Entry.Node = N->getOperand(i);
+ Entry.Ty = ArgTy;
+ Entry.isSExt = isSigned;
+ Entry.isZExt = !isSigned;
+ Args.push_back(Entry);
+ }
+ return Args;
+}
+
+SDValue ARMTargetLowering::LowerDivRem(SDValue Op, SelectionDAG &DAG) const {
+ assert((Subtarget->isTargetAEABI() || Subtarget->isTargetAndroid()) &&
+ "Register-based DivRem lowering only");
+ unsigned Opcode = Op->getOpcode();
+ assert((Opcode == ISD::SDIVREM || Opcode == ISD::UDIVREM) &&
+ "Invalid opcode for Div/Rem lowering");
+ bool isSigned = (Opcode == ISD::SDIVREM);
+ EVT VT = Op->getValueType(0);
+ Type *Ty = VT.getTypeForEVT(*DAG.getContext());
+
+ RTLIB::Libcall LC = getDivRemLibcall(Op.getNode(),
+ VT.getSimpleVT().SimpleTy);
+ SDValue InChain = DAG.getEntryNode();
+
+ TargetLowering::ArgListTy Args = getDivRemArgList(Op.getNode(),
+ DAG.getContext());
+
+ SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC),
+ getPointerTy(DAG.getDataLayout()));
+
+ Type *RetTy = (Type*)StructType::get(Ty, Ty, nullptr);
+
+ SDLoc dl(Op);
+ TargetLowering::CallLoweringInfo CLI(DAG);
+ CLI.setDebugLoc(dl).setChain(InChain)
+ .setCallee(getLibcallCallingConv(LC), RetTy, Callee, std::move(Args), 0)
+ .setInRegister().setSExtResult(isSigned).setZExtResult(!isSigned);
+
+ std::pair<SDValue, SDValue> CallInfo = LowerCallTo(CLI);
+ return CallInfo.first;
+}
+
+// Lowers REM using divmod helpers
+// see RTABI section 4.2/4.3
+SDValue ARMTargetLowering::LowerREM(SDNode *N, SelectionDAG &DAG) const {
+ // Build return types (div and rem)
+ std::vector<Type*> RetTyParams;
+ Type *RetTyElement;
+
+ switch (N->getValueType(0).getSimpleVT().SimpleTy) {
+ default: llvm_unreachable("Unexpected request for libcall!");
+ case MVT::i8: RetTyElement = Type::getInt8Ty(*DAG.getContext()); break;
+ case MVT::i16: RetTyElement = Type::getInt16Ty(*DAG.getContext()); break;
+ case MVT::i32: RetTyElement = Type::getInt32Ty(*DAG.getContext()); break;
+ case MVT::i64: RetTyElement = Type::getInt64Ty(*DAG.getContext()); break;
+ }
+
+ RetTyParams.push_back(RetTyElement);
+ RetTyParams.push_back(RetTyElement);
+ ArrayRef<Type*> ret = ArrayRef<Type*>(RetTyParams);
+ Type *RetTy = StructType::get(*DAG.getContext(), ret);
+
+ RTLIB::Libcall LC = getDivRemLibcall(N, N->getValueType(0).getSimpleVT().
+ SimpleTy);
+ SDValue InChain = DAG.getEntryNode();
+ TargetLowering::ArgListTy Args = getDivRemArgList(N, DAG.getContext());
+ bool isSigned = N->getOpcode() == ISD::SREM;
+ SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC),
+ getPointerTy(DAG.getDataLayout()));
+
+ // Lower call
+ CallLoweringInfo CLI(DAG);
+ CLI.setChain(InChain)
+ .setCallee(CallingConv::ARM_AAPCS, RetTy, Callee, std::move(Args), 0)
+ .setSExtResult(isSigned).setZExtResult(!isSigned).setDebugLoc(SDLoc(N));
+ std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
+
+ // Return second (rem) result operand (first contains div)
+ SDNode *ResNode = CallResult.first.getNode();
+ assert(ResNode->getNumOperands() == 2 && "divmod should return two operands");
+ return ResNode->getOperand(1);
+}
+
+SDValue
+ARMTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const {
+ assert(Subtarget->isTargetWindows() && "unsupported target platform");
+ SDLoc DL(Op);
+
+ // Get the inputs.
+ SDValue Chain = Op.getOperand(0);
+ SDValue Size = Op.getOperand(1);
+
+ SDValue Words = DAG.getNode(ISD::SRL, DL, MVT::i32, Size,
+ DAG.getConstant(2, DL, MVT::i32));
+
+ SDValue Flag;
+ Chain = DAG.getCopyToReg(Chain, DL, ARM::R4, Words, Flag);
+ Flag = Chain.getValue(1);
+
+ SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
+ Chain = DAG.getNode(ARMISD::WIN__CHKSTK, DL, NodeTys, Chain, Flag);
+
+ SDValue NewSP = DAG.getCopyFromReg(Chain, DL, ARM::SP, MVT::i32);
+ Chain = NewSP.getValue(1);
+
+ SDValue Ops[2] = { NewSP, Chain };
+ return DAG.getMergeValues(Ops, DL);
+}
+
+SDValue ARMTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
+ assert(Op.getValueType() == MVT::f64 && Subtarget->isFPOnlySP() &&
+ "Unexpected type for custom-lowering FP_EXTEND");
+
+ RTLIB::Libcall LC;
+ LC = RTLIB::getFPEXT(Op.getOperand(0).getValueType(), Op.getValueType());
+
+ SDValue SrcVal = Op.getOperand(0);
+ return makeLibCall(DAG, LC, Op.getValueType(), SrcVal, /*isSigned*/ false,
+ SDLoc(Op)).first;
+}
+
+SDValue ARMTargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const {
+ assert(Op.getOperand(0).getValueType() == MVT::f64 &&
+ Subtarget->isFPOnlySP() &&
+ "Unexpected type for custom-lowering FP_ROUND");
+
+ RTLIB::Libcall LC;
+ LC = RTLIB::getFPROUND(Op.getOperand(0).getValueType(), Op.getValueType());
+
+ SDValue SrcVal = Op.getOperand(0);
+ return makeLibCall(DAG, LC, Op.getValueType(), SrcVal, /*isSigned*/ false,
+ SDLoc(Op)).first;
+}
+
+bool
+ARMTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
+ // The ARM target isn't yet aware of offsets.
+ return false;
+}
+
+bool ARM::isBitFieldInvertedMask(unsigned v) {
+ if (v == 0xffffffff)
+ return false;
+
+ // there can be 1's on either or both "outsides", all the "inside"
+ // bits must be 0's
+ return isShiftedMask_32(~v);
+}
+
+/// isFPImmLegal - Returns true if the target can instruction select the
+/// specified FP immediate natively. If false, the legalizer will
+/// materialize the FP immediate as a load from a constant pool.
+bool ARMTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
+ if (!Subtarget->hasVFP3())
+ return false;
+ if (VT == MVT::f32)
+ return ARM_AM::getFP32Imm(Imm) != -1;
+ if (VT == MVT::f64 && !Subtarget->isFPOnlySP())
+ return ARM_AM::getFP64Imm(Imm) != -1;
+ return false;
+}
+
+/// getTgtMemIntrinsic - Represent NEON load and store intrinsics as
+/// MemIntrinsicNodes. The associated MachineMemOperands record the alignment
+/// specified in the intrinsic calls.
+bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
+ const CallInst &I,
+ unsigned Intrinsic) const {
+ switch (Intrinsic) {
+ case Intrinsic::arm_neon_vld1:
+ case Intrinsic::arm_neon_vld2:
+ case Intrinsic::arm_neon_vld3:
+ case Intrinsic::arm_neon_vld4:
+ case Intrinsic::arm_neon_vld2lane:
+ case Intrinsic::arm_neon_vld3lane:
+ case Intrinsic::arm_neon_vld4lane: {
+ Info.opc = ISD::INTRINSIC_W_CHAIN;
+ // Conservatively set memVT to the entire set of vectors loaded.
+ auto &DL = I.getCalledFunction()->getParent()->getDataLayout();
+ uint64_t NumElts = DL.getTypeSizeInBits(I.getType()) / 64;
+ Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
+ Info.ptrVal = I.getArgOperand(0);
+ Info.offset = 0;
+ Value *AlignArg = I.getArgOperand(I.getNumArgOperands() - 1);
+ Info.align = cast<ConstantInt>(AlignArg)->getZExtValue();
+ Info.vol = false; // volatile loads with NEON intrinsics not supported
+ Info.readMem = true;
+ Info.writeMem = false;
+ return true;
+ }
+ case Intrinsic::arm_neon_vst1:
+ case Intrinsic::arm_neon_vst2:
+ case Intrinsic::arm_neon_vst3:
+ case Intrinsic::arm_neon_vst4:
+ case Intrinsic::arm_neon_vst2lane:
+ case Intrinsic::arm_neon_vst3lane:
+ case Intrinsic::arm_neon_vst4lane: {
+ Info.opc = ISD::INTRINSIC_VOID;
+ // Conservatively set memVT to the entire set of vectors stored.
+ auto &DL = I.getCalledFunction()->getParent()->getDataLayout();
+ unsigned NumElts = 0;
+ for (unsigned ArgI = 1, ArgE = I.getNumArgOperands(); ArgI < ArgE; ++ArgI) {
+ Type *ArgTy = I.getArgOperand(ArgI)->getType();
+ if (!ArgTy->isVectorTy())
+ break;
+ NumElts += DL.getTypeSizeInBits(ArgTy) / 64;
+ }
+ Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
+ Info.ptrVal = I.getArgOperand(0);
+ Info.offset = 0;
+ Value *AlignArg = I.getArgOperand(I.getNumArgOperands() - 1);
+ Info.align = cast<ConstantInt>(AlignArg)->getZExtValue();
+ Info.vol = false; // volatile stores with NEON intrinsics not supported
+ Info.readMem = false;
+ Info.writeMem = true;
+ return true;
+ }
+ case Intrinsic::arm_ldaex:
+ case Intrinsic::arm_ldrex: {
+ auto &DL = I.getCalledFunction()->getParent()->getDataLayout();
+ PointerType *PtrTy = cast<PointerType>(I.getArgOperand(0)->getType());
+ Info.opc = ISD::INTRINSIC_W_CHAIN;
+ Info.memVT = MVT::getVT(PtrTy->getElementType());
+ Info.ptrVal = I.getArgOperand(0);
+ Info.offset = 0;
+ Info.align = DL.getABITypeAlignment(PtrTy->getElementType());
+ Info.vol = true;
+ Info.readMem = true;
+ Info.writeMem = false;
+ return true;
+ }
+ case Intrinsic::arm_stlex:
+ case Intrinsic::arm_strex: {
+ auto &DL = I.getCalledFunction()->getParent()->getDataLayout();
+ PointerType *PtrTy = cast<PointerType>(I.getArgOperand(1)->getType());
+ Info.opc = ISD::INTRINSIC_W_CHAIN;
+ Info.memVT = MVT::getVT(PtrTy->getElementType());
+ Info.ptrVal = I.getArgOperand(1);
+ Info.offset = 0;
+ Info.align = DL.getABITypeAlignment(PtrTy->getElementType());
+ Info.vol = true;
+ Info.readMem = false;
+ Info.writeMem = true;
+ return true;
+ }
+ case Intrinsic::arm_stlexd:
+ case Intrinsic::arm_strexd: {
+ Info.opc = ISD::INTRINSIC_W_CHAIN;
+ Info.memVT = MVT::i64;
+ Info.ptrVal = I.getArgOperand(2);
+ Info.offset = 0;
+ Info.align = 8;
+ Info.vol = true;
+ Info.readMem = false;
+ Info.writeMem = true;
+ return true;
+ }
+ case Intrinsic::arm_ldaexd:
+ case Intrinsic::arm_ldrexd: {
+ Info.opc = ISD::INTRINSIC_W_CHAIN;
+ Info.memVT = MVT::i64;
+ Info.ptrVal = I.getArgOperand(0);
+ Info.offset = 0;
+ Info.align = 8;
+ Info.vol = true;
+ Info.readMem = true;
+ Info.writeMem = false;
+ return true;
+ }
+ default:
+ break;
+ }
+
+ return false;
+}
+
+/// \brief Returns true if it is beneficial to convert a load of a constant
+/// to just the constant itself.
+bool ARMTargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
+ Type *Ty) const {
+ assert(Ty->isIntegerTy());
+
+ unsigned Bits = Ty->getPrimitiveSizeInBits();
+ if (Bits == 0 || Bits > 32)
+ return false;
+ return true;
+}
+
+Instruction* ARMTargetLowering::makeDMB(IRBuilder<> &Builder,
+ ARM_MB::MemBOpt Domain) const {
+ Module *M = Builder.GetInsertBlock()->getParent()->getParent();
+
+ // First, if the target has no DMB, see what fallback we can use.
+ if (!Subtarget->hasDataBarrier()) {
+ // Some ARMv6 cpus can support data barriers with an mcr instruction.
+ // Thumb1 and pre-v6 ARM mode use a libcall instead and should never get
+ // here.
+ if (Subtarget->hasV6Ops() && !Subtarget->isThumb()) {
+ Function *MCR = llvm::Intrinsic::getDeclaration(M, Intrinsic::arm_mcr);
+ Value* args[6] = {Builder.getInt32(15), Builder.getInt32(0),
+ Builder.getInt32(0), Builder.getInt32(7),
+ Builder.getInt32(10), Builder.getInt32(5)};
+ return Builder.CreateCall(MCR, args);
+ } else {
+ // Instead of using barriers, atomic accesses on these subtargets use
+ // libcalls.
+ llvm_unreachable("makeDMB on a target so old that it has no barriers");
+ }
+ } else {
+ Function *DMB = llvm::Intrinsic::getDeclaration(M, Intrinsic::arm_dmb);
+ // Only a full system barrier exists in the M-class architectures.
+ Domain = Subtarget->isMClass() ? ARM_MB::SY : Domain;
+ Constant *CDomain = Builder.getInt32(Domain);
+ return Builder.CreateCall(DMB, CDomain);
+ }
+}
+
+// Based on http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
+Instruction* ARMTargetLowering::emitLeadingFence(IRBuilder<> &Builder,
+ AtomicOrdering Ord, bool IsStore,
+ bool IsLoad) const {
+ if (!getInsertFencesForAtomic())
+ return nullptr;
+
+ switch (Ord) {
+ case NotAtomic:
+ case Unordered:
+ llvm_unreachable("Invalid fence: unordered/non-atomic");
+ case Monotonic:
+ case Acquire:
+ return nullptr; // Nothing to do
+ case SequentiallyConsistent:
+ if (!IsStore)
+ return nullptr; // Nothing to do
+ /*FALLTHROUGH*/
+ case Release:
+ case AcquireRelease:
+ if (Subtarget->isSwift())
+ return makeDMB(Builder, ARM_MB::ISHST);
+ // FIXME: add a comment with a link to documentation justifying this.
+ else
+ return makeDMB(Builder, ARM_MB::ISH);
+ }
+ llvm_unreachable("Unknown fence ordering in emitLeadingFence");
+}
+
+Instruction* ARMTargetLowering::emitTrailingFence(IRBuilder<> &Builder,
+ AtomicOrdering Ord, bool IsStore,
+ bool IsLoad) const {
+ if (!getInsertFencesForAtomic())
+ return nullptr;
+
+ switch (Ord) {
+ case NotAtomic:
+ case Unordered:
+ llvm_unreachable("Invalid fence: unordered/not-atomic");
+ case Monotonic:
+ case Release:
+ return nullptr; // Nothing to do
+ case Acquire:
+ case AcquireRelease:
+ case SequentiallyConsistent:
+ return makeDMB(Builder, ARM_MB::ISH);
+ }
+ llvm_unreachable("Unknown fence ordering in emitTrailingFence");
+}
+
+// Loads and stores less than 64-bits are already atomic; ones above that
+// are doomed anyway, so defer to the default libcall and blame the OS when
+// things go wrong. Cortex M doesn't have ldrexd/strexd though, so don't emit
+// anything for those.
+bool ARMTargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const {
+ unsigned Size = SI->getValueOperand()->getType()->getPrimitiveSizeInBits();
+ return (Size == 64) && !Subtarget->isMClass();
+}
+
+// Loads and stores less than 64-bits are already atomic; ones above that
+// are doomed anyway, so defer to the default libcall and blame the OS when
+// things go wrong. Cortex M doesn't have ldrexd/strexd though, so don't emit
+// anything for those.
+// FIXME: ldrd and strd are atomic if the CPU has LPAE (e.g. A15 has that
+// guarantee, see DDI0406C ARM architecture reference manual,
+// sections A8.8.72-74 LDRD)
+TargetLowering::AtomicExpansionKind
+ARMTargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const {
+ unsigned Size = LI->getType()->getPrimitiveSizeInBits();
+ return ((Size == 64) && !Subtarget->isMClass()) ? AtomicExpansionKind::LLOnly
+ : AtomicExpansionKind::None;
+}
+
+// For the real atomic operations, we have ldrex/strex up to 32 bits,
+// and up to 64 bits on the non-M profiles
+TargetLowering::AtomicExpansionKind
+ARMTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
+ unsigned Size = AI->getType()->getPrimitiveSizeInBits();
+ return (Size <= (Subtarget->isMClass() ? 32U : 64U))
+ ? AtomicExpansionKind::LLSC
+ : AtomicExpansionKind::None;
+}
+
+bool ARMTargetLowering::shouldExpandAtomicCmpXchgInIR(
+ AtomicCmpXchgInst *AI) const {
+ return true;
+}
+
+// This has so far only been implemented for MachO.
+bool ARMTargetLowering::useLoadStackGuardNode() const {
+ return Subtarget->isTargetMachO();
+}
+
+bool ARMTargetLowering::canCombineStoreAndExtract(Type *VectorTy, Value *Idx,
+ unsigned &Cost) const {
+ // If we do not have NEON, vector types are not natively supported.
+ if (!Subtarget->hasNEON())
+ return false;
+
+ // Floating point values and vector values map to the same register file.
+ // Therefore, although we could do a store extract of a vector type, this is
+ // better to leave at float as we have more freedom in the addressing mode for
+ // those.
+ if (VectorTy->isFPOrFPVectorTy())
+ return false;
+
+ // If the index is unknown at compile time, this is very expensive to lower
+ // and it is not possible to combine the store with the extract.
+ if (!isa<ConstantInt>(Idx))
+ return false;
+
+ assert(VectorTy->isVectorTy() && "VectorTy is not a vector type");
+ unsigned BitWidth = cast<VectorType>(VectorTy)->getBitWidth();
+ // We can do a store + vector extract on any vector that fits perfectly in a D
+ // or Q register.
+ if (BitWidth == 64 || BitWidth == 128) {
+ Cost = 0;
+ return true;
+ }
+ return false;
+}
+
+bool ARMTargetLowering::isCheapToSpeculateCttz() const {
+ return Subtarget->hasV6T2Ops();
+}
+
+bool ARMTargetLowering::isCheapToSpeculateCtlz() const {
+ return Subtarget->hasV6T2Ops();
+}
+
+Value *ARMTargetLowering::emitLoadLinked(IRBuilder<> &Builder, Value *Addr,
+ AtomicOrdering Ord) const {
+ Module *M = Builder.GetInsertBlock()->getParent()->getParent();
+ Type *ValTy = cast<PointerType>(Addr->getType())->getElementType();
+ bool IsAcquire = isAtLeastAcquire(Ord);
+
+ // Since i64 isn't legal and intrinsics don't get type-lowered, the ldrexd
+ // intrinsic must return {i32, i32} and we have to recombine them into a
+ // single i64 here.
+ if (ValTy->getPrimitiveSizeInBits() == 64) {
+ Intrinsic::ID Int =
+ IsAcquire ? Intrinsic::arm_ldaexd : Intrinsic::arm_ldrexd;
+ Function *Ldrex = llvm::Intrinsic::getDeclaration(M, Int);
+
+ Addr = Builder.CreateBitCast(Addr, Type::getInt8PtrTy(M->getContext()));
+ Value *LoHi = Builder.CreateCall(Ldrex, Addr, "lohi");
+
+ Value *Lo = Builder.CreateExtractValue(LoHi, 0, "lo");
+ Value *Hi = Builder.CreateExtractValue(LoHi, 1, "hi");
+ if (!Subtarget->isLittle())
+ std::swap (Lo, Hi);
+ Lo = Builder.CreateZExt(Lo, ValTy, "lo64");
+ Hi = Builder.CreateZExt(Hi, ValTy, "hi64");
+ return Builder.CreateOr(
+ Lo, Builder.CreateShl(Hi, ConstantInt::get(ValTy, 32)), "val64");
+ }
+
+ Type *Tys[] = { Addr->getType() };
+ Intrinsic::ID Int = IsAcquire ? Intrinsic::arm_ldaex : Intrinsic::arm_ldrex;
+ Function *Ldrex = llvm::Intrinsic::getDeclaration(M, Int, Tys);
+
+ return Builder.CreateTruncOrBitCast(
+ Builder.CreateCall(Ldrex, Addr),
+ cast<PointerType>(Addr->getType())->getElementType());
+}
+
+void ARMTargetLowering::emitAtomicCmpXchgNoStoreLLBalance(
+ IRBuilder<> &Builder) const {
+ if (!Subtarget->hasV7Ops())
+ return;
+ Module *M = Builder.GetInsertBlock()->getParent()->getParent();
+ Builder.CreateCall(llvm::Intrinsic::getDeclaration(M, Intrinsic::arm_clrex));
+}
+
+Value *ARMTargetLowering::emitStoreConditional(IRBuilder<> &Builder, Value *Val,
+ Value *Addr,
+ AtomicOrdering Ord) const {
+ Module *M = Builder.GetInsertBlock()->getParent()->getParent();
+ bool IsRelease = isAtLeastRelease(Ord);
+
+ // Since the intrinsics must have legal type, the i64 intrinsics take two
+ // parameters: "i32, i32". We must marshal Val into the appropriate form
+ // before the call.
+ if (Val->getType()->getPrimitiveSizeInBits() == 64) {
+ Intrinsic::ID Int =
+ IsRelease ? Intrinsic::arm_stlexd : Intrinsic::arm_strexd;
+ Function *Strex = Intrinsic::getDeclaration(M, Int);
+ Type *Int32Ty = Type::getInt32Ty(M->getContext());
+
+ Value *Lo = Builder.CreateTrunc(Val, Int32Ty, "lo");
+ Value *Hi = Builder.CreateTrunc(Builder.CreateLShr(Val, 32), Int32Ty, "hi");
+ if (!Subtarget->isLittle())
+ std::swap (Lo, Hi);
+ Addr = Builder.CreateBitCast(Addr, Type::getInt8PtrTy(M->getContext()));
+ return Builder.CreateCall(Strex, {Lo, Hi, Addr});
+ }
+
+ Intrinsic::ID Int = IsRelease ? Intrinsic::arm_stlex : Intrinsic::arm_strex;
+ Type *Tys[] = { Addr->getType() };
+ Function *Strex = Intrinsic::getDeclaration(M, Int, Tys);
+
+ return Builder.CreateCall(
+ Strex, {Builder.CreateZExtOrBitCast(
+ Val, Strex->getFunctionType()->getParamType(0)),
+ Addr});
+}
+
+/// \brief Lower an interleaved load into a vldN intrinsic.
+///
+/// E.g. Lower an interleaved load (Factor = 2):
+/// %wide.vec = load <8 x i32>, <8 x i32>* %ptr, align 4
+/// %v0 = shuffle %wide.vec, undef, <0, 2, 4, 6> ; Extract even elements
+/// %v1 = shuffle %wide.vec, undef, <1, 3, 5, 7> ; Extract odd elements
+///
+/// Into:
+/// %vld2 = { <4 x i32>, <4 x i32> } call llvm.arm.neon.vld2(%ptr, 4)
+/// %vec0 = extractelement { <4 x i32>, <4 x i32> } %vld2, i32 0
+/// %vec1 = extractelement { <4 x i32>, <4 x i32> } %vld2, i32 1
+bool ARMTargetLowering::lowerInterleavedLoad(
+ LoadInst *LI, ArrayRef<ShuffleVectorInst *> Shuffles,
+ ArrayRef<unsigned> Indices, unsigned Factor) const {
+ assert(Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() &&
+ "Invalid interleave factor");
+ assert(!Shuffles.empty() && "Empty shufflevector input");
+ assert(Shuffles.size() == Indices.size() &&
+ "Unmatched number of shufflevectors and indices");
+
+ VectorType *VecTy = Shuffles[0]->getType();
+ Type *EltTy = VecTy->getVectorElementType();
+
+ const DataLayout &DL = LI->getModule()->getDataLayout();
+ unsigned VecSize = DL.getTypeSizeInBits(VecTy);
+ bool EltIs64Bits = DL.getTypeSizeInBits(EltTy) == 64;
+
+ // Skip if we do not have NEON and skip illegal vector types and vector types
+ // with i64/f64 elements (vldN doesn't support i64/f64 elements).
+ if (!Subtarget->hasNEON() || (VecSize != 64 && VecSize != 128) || EltIs64Bits)
+ return false;
+
+ // A pointer vector can not be the return type of the ldN intrinsics. Need to
+ // load integer vectors first and then convert to pointer vectors.
+ if (EltTy->isPointerTy())
+ VecTy =
+ VectorType::get(DL.getIntPtrType(EltTy), VecTy->getVectorNumElements());
+
+ static const Intrinsic::ID LoadInts[3] = {Intrinsic::arm_neon_vld2,
+ Intrinsic::arm_neon_vld3,
+ Intrinsic::arm_neon_vld4};
+
+ IRBuilder<> Builder(LI);
+ SmallVector<Value *, 2> Ops;
+
+ Type *Int8Ptr = Builder.getInt8PtrTy(LI->getPointerAddressSpace());
+ Ops.push_back(Builder.CreateBitCast(LI->getPointerOperand(), Int8Ptr));
+ Ops.push_back(Builder.getInt32(LI->getAlignment()));
+
+ Type *Tys[] = { VecTy, Int8Ptr };
+ Function *VldnFunc =
+ Intrinsic::getDeclaration(LI->getModule(), LoadInts[Factor - 2], Tys);
+ CallInst *VldN = Builder.CreateCall(VldnFunc, Ops, "vldN");
+
+ // Replace uses of each shufflevector with the corresponding vector loaded
+ // by ldN.
+ for (unsigned i = 0; i < Shuffles.size(); i++) {
+ ShuffleVectorInst *SV = Shuffles[i];
+ unsigned Index = Indices[i];
+
+ Value *SubVec = Builder.CreateExtractValue(VldN, Index);
+
+ // Convert the integer vector to pointer vector if the element is pointer.
+ if (EltTy->isPointerTy())
+ SubVec = Builder.CreateIntToPtr(SubVec, SV->getType());
+
+ SV->replaceAllUsesWith(SubVec);
+ }
+
+ return true;
+}
+
+/// \brief Get a mask consisting of sequential integers starting from \p Start.
+///
+/// I.e. <Start, Start + 1, ..., Start + NumElts - 1>
+static Constant *getSequentialMask(IRBuilder<> &Builder, unsigned Start,
+ unsigned NumElts) {
+ SmallVector<Constant *, 16> Mask;
+ for (unsigned i = 0; i < NumElts; i++)
+ Mask.push_back(Builder.getInt32(Start + i));
+
+ return ConstantVector::get(Mask);
+}
+
+/// \brief Lower an interleaved store into a vstN intrinsic.
+///
+/// E.g. Lower an interleaved store (Factor = 3):
+/// %i.vec = shuffle <8 x i32> %v0, <8 x i32> %v1,
+/// <0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11>
+/// store <12 x i32> %i.vec, <12 x i32>* %ptr, align 4
+///
+/// Into:
+/// %sub.v0 = shuffle <8 x i32> %v0, <8 x i32> v1, <0, 1, 2, 3>
+/// %sub.v1 = shuffle <8 x i32> %v0, <8 x i32> v1, <4, 5, 6, 7>
+/// %sub.v2 = shuffle <8 x i32> %v0, <8 x i32> v1, <8, 9, 10, 11>
+/// call void llvm.arm.neon.vst3(%ptr, %sub.v0, %sub.v1, %sub.v2, 4)
+///
+/// Note that the new shufflevectors will be removed and we'll only generate one
+/// vst3 instruction in CodeGen.
+bool ARMTargetLowering::lowerInterleavedStore(StoreInst *SI,
+ ShuffleVectorInst *SVI,
+ unsigned Factor) const {
+ assert(Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() &&
+ "Invalid interleave factor");
+
+ VectorType *VecTy = SVI->getType();
+ assert(VecTy->getVectorNumElements() % Factor == 0 &&
+ "Invalid interleaved store");
+
+ unsigned NumSubElts = VecTy->getVectorNumElements() / Factor;
+ Type *EltTy = VecTy->getVectorElementType();
+ VectorType *SubVecTy = VectorType::get(EltTy, NumSubElts);
+
+ const DataLayout &DL = SI->getModule()->getDataLayout();
+ unsigned SubVecSize = DL.getTypeSizeInBits(SubVecTy);
+ bool EltIs64Bits = DL.getTypeSizeInBits(EltTy) == 64;
+
+ // Skip if we do not have NEON and skip illegal vector types and vector types
+ // with i64/f64 elements (vstN doesn't support i64/f64 elements).
+ if (!Subtarget->hasNEON() || (SubVecSize != 64 && SubVecSize != 128) ||
+ EltIs64Bits)
+ return false;
+
+ Value *Op0 = SVI->getOperand(0);
+ Value *Op1 = SVI->getOperand(1);
+ IRBuilder<> Builder(SI);
+
+ // StN intrinsics don't support pointer vectors as arguments. Convert pointer
+ // vectors to integer vectors.
+ if (EltTy->isPointerTy()) {
+ Type *IntTy = DL.getIntPtrType(EltTy);
+
+ // Convert to the corresponding integer vector.
+ Type *IntVecTy =
+ VectorType::get(IntTy, Op0->getType()->getVectorNumElements());
+ Op0 = Builder.CreatePtrToInt(Op0, IntVecTy);
+ Op1 = Builder.CreatePtrToInt(Op1, IntVecTy);
+
+ SubVecTy = VectorType::get(IntTy, NumSubElts);
+ }
+
+ static const Intrinsic::ID StoreInts[3] = {Intrinsic::arm_neon_vst2,
+ Intrinsic::arm_neon_vst3,
+ Intrinsic::arm_neon_vst4};
+ SmallVector<Value *, 6> Ops;
+
+ Type *Int8Ptr = Builder.getInt8PtrTy(SI->getPointerAddressSpace());
+ Ops.push_back(Builder.CreateBitCast(SI->getPointerOperand(), Int8Ptr));
+
+ Type *Tys[] = { Int8Ptr, SubVecTy };
+ Function *VstNFunc = Intrinsic::getDeclaration(
+ SI->getModule(), StoreInts[Factor - 2], Tys);
+
+ // Split the shufflevector operands into sub vectors for the new vstN call.
+ for (unsigned i = 0; i < Factor; i++)
+ Ops.push_back(Builder.CreateShuffleVector(
+ Op0, Op1, getSequentialMask(Builder, NumSubElts * i, NumSubElts)));
+
+ Ops.push_back(Builder.getInt32(SI->getAlignment()));
+ Builder.CreateCall(VstNFunc, Ops);
+ return true;
+}
+
+enum HABaseType {
+ HA_UNKNOWN = 0,
+ HA_FLOAT,
+ HA_DOUBLE,
+ HA_VECT64,
+ HA_VECT128
+};
+
+static bool isHomogeneousAggregate(Type *Ty, HABaseType &Base,
+ uint64_t &Members) {
+ if (auto *ST = dyn_cast<StructType>(Ty)) {
+ for (unsigned i = 0; i < ST->getNumElements(); ++i) {
+ uint64_t SubMembers = 0;
+ if (!isHomogeneousAggregate(ST->getElementType(i), Base, SubMembers))
+ return false;
+ Members += SubMembers;
+ }
+ } else if (auto *AT = dyn_cast<ArrayType>(Ty)) {
+ uint64_t SubMembers = 0;
+ if (!isHomogeneousAggregate(AT->getElementType(), Base, SubMembers))
+ return false;
+ Members += SubMembers * AT->getNumElements();
+ } else if (Ty->isFloatTy()) {
+ if (Base != HA_UNKNOWN && Base != HA_FLOAT)
+ return false;
+ Members = 1;
+ Base = HA_FLOAT;
+ } else if (Ty->isDoubleTy()) {
+ if (Base != HA_UNKNOWN && Base != HA_DOUBLE)
+ return false;
+ Members = 1;
+ Base = HA_DOUBLE;
+ } else if (auto *VT = dyn_cast<VectorType>(Ty)) {
+ Members = 1;
+ switch (Base) {
+ case HA_FLOAT:
+ case HA_DOUBLE:
+ return false;
+ case HA_VECT64:
+ return VT->getBitWidth() == 64;
+ case HA_VECT128:
+ return VT->getBitWidth() == 128;
+ case HA_UNKNOWN:
+ switch (VT->getBitWidth()) {
+ case 64:
+ Base = HA_VECT64;
+ return true;
+ case 128:
+ Base = HA_VECT128;
+ return true;
+ default:
+ return false;
+ }
+ }
+ }
+
+ return (Members > 0 && Members <= 4);
+}
+
+/// \brief Return true if a type is an AAPCS-VFP homogeneous aggregate or one of
+/// [N x i32] or [N x i64]. This allows front-ends to skip emitting padding when
+/// passing according to AAPCS rules.
+bool ARMTargetLowering::functionArgumentNeedsConsecutiveRegisters(
+ Type *Ty, CallingConv::ID CallConv, bool isVarArg) const {
+ if (getEffectiveCallingConv(CallConv, isVarArg) !=
+ CallingConv::ARM_AAPCS_VFP)
+ return false;
+
+ HABaseType Base = HA_UNKNOWN;
+ uint64_t Members = 0;
+ bool IsHA = isHomogeneousAggregate(Ty, Base, Members);
+ DEBUG(dbgs() << "isHA: " << IsHA << " "; Ty->dump());
+
+ bool IsIntArray = Ty->isArrayTy() && Ty->getArrayElementType()->isIntegerTy();
+ return IsHA || IsIntArray;
+}
+
+unsigned ARMTargetLowering::getExceptionPointerRegister(
+ const Constant *PersonalityFn) const {
+ // Platforms which do not use SjLj EH may return values in these registers
+ // via the personality function.
+ return Subtarget->useSjLjEH() ? ARM::NoRegister : ARM::R0;
+}
+
+unsigned ARMTargetLowering::getExceptionSelectorRegister(
+ const Constant *PersonalityFn) const {
+ // Platforms which do not use SjLj EH may return values in these registers
+ // via the personality function.
+ return Subtarget->useSjLjEH() ? ARM::NoRegister : ARM::R1;
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMISelLowering.h b/contrib/llvm/lib/Target/ARM/ARMISelLowering.h
new file mode 100644
index 0000000..b764624
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMISelLowering.h
@@ -0,0 +1,677 @@
+//===-- ARMISelLowering.h - ARM DAG Lowering Interface ----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that ARM uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_ARM_ARMISELLOWERING_H
+#define LLVM_LIB_TARGET_ARM_ARMISELLOWERING_H
+
+#include "MCTargetDesc/ARMBaseInfo.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/Target/TargetLowering.h"
+#include <vector>
+
+namespace llvm {
+ class ARMConstantPoolValue;
+ class ARMSubtarget;
+
+ namespace ARMISD {
+ // ARM Specific DAG Nodes
+ enum NodeType : unsigned {
+ // Start the numbering where the builtin ops and target ops leave off.
+ FIRST_NUMBER = ISD::BUILTIN_OP_END,
+
+ Wrapper, // Wrapper - A wrapper node for TargetConstantPool,
+ // TargetExternalSymbol, and TargetGlobalAddress.
+ WrapperPIC, // WrapperPIC - A wrapper node for TargetGlobalAddress in
+ // PIC mode.
+ WrapperJT, // WrapperJT - A wrapper node for TargetJumpTable
+
+ // Add pseudo op to model memcpy for struct byval.
+ COPY_STRUCT_BYVAL,
+
+ CALL, // Function call.
+ CALL_PRED, // Function call that's predicable.
+ CALL_NOLINK, // Function call with branch not branch-and-link.
+ tCALL, // Thumb function call.
+ BRCOND, // Conditional branch.
+ BR_JT, // Jumptable branch.
+ BR2_JT, // Jumptable branch (2 level - jumptable entry is a jump).
+ RET_FLAG, // Return with a flag operand.
+ INTRET_FLAG, // Interrupt return with an LR-offset and a flag operand.
+
+ PIC_ADD, // Add with a PC operand and a PIC label.
+
+ CMP, // ARM compare instructions.
+ CMN, // ARM CMN instructions.
+ CMPZ, // ARM compare that sets only Z flag.
+ CMPFP, // ARM VFP compare instruction, sets FPSCR.
+ CMPFPw0, // ARM VFP compare against zero instruction, sets FPSCR.
+ FMSTAT, // ARM fmstat instruction.
+
+ CMOV, // ARM conditional move instructions.
+
+ BCC_i64,
+
+ SRL_FLAG, // V,Flag = srl_flag X -> srl X, 1 + save carry out.
+ SRA_FLAG, // V,Flag = sra_flag X -> sra X, 1 + save carry out.
+ RRX, // V = RRX X, Flag -> srl X, 1 + shift in carry flag.
+
+ ADDC, // Add with carry
+ ADDE, // Add using carry
+ SUBC, // Sub with carry
+ SUBE, // Sub using carry
+
+ VMOVRRD, // double to two gprs.
+ VMOVDRR, // Two gprs to double.
+
+ EH_SJLJ_SETJMP, // SjLj exception handling setjmp.
+ EH_SJLJ_LONGJMP, // SjLj exception handling longjmp.
+ EH_SJLJ_SETUP_DISPATCH, // SjLj exception handling setup_dispatch.
+
+ TC_RETURN, // Tail call return pseudo.
+
+ THREAD_POINTER,
+
+ DYN_ALLOC, // Dynamic allocation on the stack.
+
+ MEMBARRIER_MCR, // Memory barrier (MCR)
+
+ PRELOAD, // Preload
+
+ WIN__CHKSTK, // Windows' __chkstk call to do stack probing.
+ WIN__DBZCHK, // Windows' divide by zero check
+
+ VCEQ, // Vector compare equal.
+ VCEQZ, // Vector compare equal to zero.
+ VCGE, // Vector compare greater than or equal.
+ VCGEZ, // Vector compare greater than or equal to zero.
+ VCLEZ, // Vector compare less than or equal to zero.
+ VCGEU, // Vector compare unsigned greater than or equal.
+ VCGT, // Vector compare greater than.
+ VCGTZ, // Vector compare greater than zero.
+ VCLTZ, // Vector compare less than zero.
+ VCGTU, // Vector compare unsigned greater than.
+ VTST, // Vector test bits.
+
+ // Vector shift by immediate:
+ VSHL, // ...left
+ VSHRs, // ...right (signed)
+ VSHRu, // ...right (unsigned)
+
+ // Vector rounding shift by immediate:
+ VRSHRs, // ...right (signed)
+ VRSHRu, // ...right (unsigned)
+ VRSHRN, // ...right narrow
+
+ // Vector saturating shift by immediate:
+ VQSHLs, // ...left (signed)
+ VQSHLu, // ...left (unsigned)
+ VQSHLsu, // ...left (signed to unsigned)
+ VQSHRNs, // ...right narrow (signed)
+ VQSHRNu, // ...right narrow (unsigned)
+ VQSHRNsu, // ...right narrow (signed to unsigned)
+
+ // Vector saturating rounding shift by immediate:
+ VQRSHRNs, // ...right narrow (signed)
+ VQRSHRNu, // ...right narrow (unsigned)
+ VQRSHRNsu, // ...right narrow (signed to unsigned)
+
+ // Vector shift and insert:
+ VSLI, // ...left
+ VSRI, // ...right
+
+ // Vector get lane (VMOV scalar to ARM core register)
+ // (These are used for 8- and 16-bit element types only.)
+ VGETLANEu, // zero-extend vector extract element
+ VGETLANEs, // sign-extend vector extract element
+
+ // Vector move immediate and move negated immediate:
+ VMOVIMM,
+ VMVNIMM,
+
+ // Vector move f32 immediate:
+ VMOVFPIMM,
+
+ // Vector duplicate:
+ VDUP,
+ VDUPLANE,
+
+ // Vector shuffles:
+ VEXT, // extract
+ VREV64, // reverse elements within 64-bit doublewords
+ VREV32, // reverse elements within 32-bit words
+ VREV16, // reverse elements within 16-bit halfwords
+ VZIP, // zip (interleave)
+ VUZP, // unzip (deinterleave)
+ VTRN, // transpose
+ VTBL1, // 1-register shuffle with mask
+ VTBL2, // 2-register shuffle with mask
+
+ // Vector multiply long:
+ VMULLs, // ...signed
+ VMULLu, // ...unsigned
+
+ UMLAL, // 64bit Unsigned Accumulate Multiply
+ SMLAL, // 64bit Signed Accumulate Multiply
+
+ // Operands of the standard BUILD_VECTOR node are not legalized, which
+ // is fine if BUILD_VECTORs are always lowered to shuffles or other
+ // operations, but for ARM some BUILD_VECTORs are legal as-is and their
+ // operands need to be legalized. Define an ARM-specific version of
+ // BUILD_VECTOR for this purpose.
+ BUILD_VECTOR,
+
+ // Bit-field insert
+ BFI,
+
+ // Vector OR with immediate
+ VORRIMM,
+ // Vector AND with NOT of immediate
+ VBICIMM,
+
+ // Vector bitwise select
+ VBSL,
+
+ // Pseudo-instruction representing a memory copy using ldm/stm
+ // instructions.
+ MEMCPY,
+
+ // Vector load N-element structure to all lanes:
+ VLD2DUP = ISD::FIRST_TARGET_MEMORY_OPCODE,
+ VLD3DUP,
+ VLD4DUP,
+
+ // NEON loads with post-increment base updates:
+ VLD1_UPD,
+ VLD2_UPD,
+ VLD3_UPD,
+ VLD4_UPD,
+ VLD2LN_UPD,
+ VLD3LN_UPD,
+ VLD4LN_UPD,
+ VLD2DUP_UPD,
+ VLD3DUP_UPD,
+ VLD4DUP_UPD,
+
+ // NEON stores with post-increment base updates:
+ VST1_UPD,
+ VST2_UPD,
+ VST3_UPD,
+ VST4_UPD,
+ VST2LN_UPD,
+ VST3LN_UPD,
+ VST4LN_UPD
+ };
+ }
+
+ /// Define some predicates that are used for node matching.
+ namespace ARM {
+ bool isBitFieldInvertedMask(unsigned v);
+ }
+
+ //===--------------------------------------------------------------------===//
+ // ARMTargetLowering - ARM Implementation of the TargetLowering interface
+
+ class ARMTargetLowering : public TargetLowering {
+ public:
+ explicit ARMTargetLowering(const TargetMachine &TM,
+ const ARMSubtarget &STI);
+
+ unsigned getJumpTableEncoding() const override;
+ bool useSoftFloat() const override;
+
+ SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
+
+ /// ReplaceNodeResults - Replace the results of node with an illegal result
+ /// type with new values built out of custom code.
+ ///
+ void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results,
+ SelectionDAG &DAG) const override;
+
+ const char *getTargetNodeName(unsigned Opcode) const override;
+
+ bool isSelectSupported(SelectSupportKind Kind) const override {
+ // ARM does not support scalar condition selects on vectors.
+ return (Kind != ScalarCondVectorVal);
+ }
+
+ /// getSetCCResultType - Return the value type to use for ISD::SETCC.
+ EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
+ EVT VT) const override;
+
+ MachineBasicBlock *
+ EmitInstrWithCustomInserter(MachineInstr *MI,
+ MachineBasicBlock *MBB) const override;
+
+ void AdjustInstrPostInstrSelection(MachineInstr *MI,
+ SDNode *Node) const override;
+
+ SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const;
+ SDValue PerformCMOVToBFICombine(SDNode *N, SelectionDAG &DAG) const;
+ SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
+
+ bool isDesirableToTransformToIntegerOp(unsigned Opc, EVT VT) const override;
+
+ /// allowsMisalignedMemoryAccesses - Returns true if the target allows
+ /// unaligned memory accesses of the specified type. Returns whether it
+ /// is "fast" by reference in the second argument.
+ bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace,
+ unsigned Align,
+ bool *Fast) const override;
+
+ EVT getOptimalMemOpType(uint64_t Size,
+ unsigned DstAlign, unsigned SrcAlign,
+ bool IsMemset, bool ZeroMemset,
+ bool MemcpyStrSrc,
+ MachineFunction &MF) const override;
+
+ using TargetLowering::isZExtFree;
+ bool isZExtFree(SDValue Val, EVT VT2) const override;
+
+ bool isVectorLoadExtDesirable(SDValue ExtVal) const override;
+
+ bool allowTruncateForTailCall(Type *Ty1, Type *Ty2) const override;
+
+
+ /// isLegalAddressingMode - Return true if the addressing mode represented
+ /// by AM is legal for this target, for a load/store of the specified type.
+ bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM,
+ Type *Ty, unsigned AS) const override;
+ bool isLegalT2ScaledAddressingMode(const AddrMode &AM, EVT VT) const;
+
+ /// isLegalICmpImmediate - Return true if the specified immediate is legal
+ /// icmp immediate, that is the target has icmp instructions which can
+ /// compare a register against the immediate without having to materialize
+ /// the immediate into a register.
+ bool isLegalICmpImmediate(int64_t Imm) const override;
+
+ /// isLegalAddImmediate - Return true if the specified immediate is legal
+ /// add immediate, that is the target has add instructions which can
+ /// add a register and the immediate without having to materialize
+ /// the immediate into a register.
+ bool isLegalAddImmediate(int64_t Imm) const override;
+
+ /// getPreIndexedAddressParts - returns true by value, base pointer and
+ /// offset pointer and addressing mode by reference if the node's address
+ /// can be legally represented as pre-indexed load / store address.
+ bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset,
+ ISD::MemIndexedMode &AM,
+ SelectionDAG &DAG) const override;
+
+ /// getPostIndexedAddressParts - returns true by value, base pointer and
+ /// offset pointer and addressing mode by reference if this node can be
+ /// combined with a load / store to form a post-indexed load / store.
+ bool getPostIndexedAddressParts(SDNode *N, SDNode *Op, SDValue &Base,
+ SDValue &Offset, ISD::MemIndexedMode &AM,
+ SelectionDAG &DAG) const override;
+
+ void computeKnownBitsForTargetNode(const SDValue Op, APInt &KnownZero,
+ APInt &KnownOne,
+ const SelectionDAG &DAG,
+ unsigned Depth) const override;
+
+
+ bool ExpandInlineAsm(CallInst *CI) const override;
+
+ ConstraintType getConstraintType(StringRef Constraint) const override;
+
+ /// Examine constraint string and operand type and determine a weight value.
+ /// The operand object must already have been set up with the operand type.
+ ConstraintWeight getSingleConstraintMatchWeight(
+ AsmOperandInfo &info, const char *constraint) const override;
+
+ std::pair<unsigned, const TargetRegisterClass *>
+ getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
+ StringRef Constraint, MVT VT) const override;
+
+ /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
+ /// vector. If it is invalid, don't add anything to Ops. If hasMemory is
+ /// true it means one of the asm constraint of the inline asm instruction
+ /// being processed is 'm'.
+ void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint,
+ std::vector<SDValue> &Ops,
+ SelectionDAG &DAG) const override;
+
+ unsigned
+ getInlineAsmMemConstraint(StringRef ConstraintCode) const override {
+ if (ConstraintCode == "Q")
+ return InlineAsm::Constraint_Q;
+ else if (ConstraintCode == "o")
+ return InlineAsm::Constraint_o;
+ else if (ConstraintCode.size() == 2) {
+ if (ConstraintCode[0] == 'U') {
+ switch(ConstraintCode[1]) {
+ default:
+ break;
+ case 'm':
+ return InlineAsm::Constraint_Um;
+ case 'n':
+ return InlineAsm::Constraint_Un;
+ case 'q':
+ return InlineAsm::Constraint_Uq;
+ case 's':
+ return InlineAsm::Constraint_Us;
+ case 't':
+ return InlineAsm::Constraint_Ut;
+ case 'v':
+ return InlineAsm::Constraint_Uv;
+ case 'y':
+ return InlineAsm::Constraint_Uy;
+ }
+ }
+ }
+ return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
+ }
+
+ const ARMSubtarget* getSubtarget() const {
+ return Subtarget;
+ }
+
+ /// getRegClassFor - Return the register class that should be used for the
+ /// specified value type.
+ const TargetRegisterClass *getRegClassFor(MVT VT) const override;
+
+ /// Returns true if a cast between SrcAS and DestAS is a noop.
+ bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override {
+ // Addrspacecasts are always noops.
+ return true;
+ }
+
+ bool shouldAlignPointerArgs(CallInst *CI, unsigned &MinSize,
+ unsigned &PrefAlign) const override;
+
+ /// createFastISel - This method returns a target specific FastISel object,
+ /// or null if the target does not support "fast" ISel.
+ FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
+ const TargetLibraryInfo *libInfo) const override;
+
+ Sched::Preference getSchedulingPreference(SDNode *N) const override;
+
+ bool
+ isShuffleMaskLegal(const SmallVectorImpl<int> &M, EVT VT) const override;
+ bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override;
+
+ /// isFPImmLegal - Returns true if the target can instruction select the
+ /// specified FP immediate natively. If false, the legalizer will
+ /// materialize the FP immediate as a load from a constant pool.
+ bool isFPImmLegal(const APFloat &Imm, EVT VT) const override;
+
+ bool getTgtMemIntrinsic(IntrinsicInfo &Info,
+ const CallInst &I,
+ unsigned Intrinsic) const override;
+
+ /// \brief Returns true if it is beneficial to convert a load of a constant
+ /// to just the constant itself.
+ bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
+ Type *Ty) const override;
+
+ /// \brief Returns true if an argument of type Ty needs to be passed in a
+ /// contiguous block of registers in calling convention CallConv.
+ bool functionArgumentNeedsConsecutiveRegisters(
+ Type *Ty, CallingConv::ID CallConv, bool isVarArg) const override;
+
+ /// If a physical register, this returns the register that receives the
+ /// exception address on entry to an EH pad.
+ unsigned
+ getExceptionPointerRegister(const Constant *PersonalityFn) const override;
+
+ /// If a physical register, this returns the register that receives the
+ /// exception typeid on entry to a landing pad.
+ unsigned
+ getExceptionSelectorRegister(const Constant *PersonalityFn) const override;
+
+ Instruction *makeDMB(IRBuilder<> &Builder, ARM_MB::MemBOpt Domain) const;
+ Value *emitLoadLinked(IRBuilder<> &Builder, Value *Addr,
+ AtomicOrdering Ord) const override;
+ Value *emitStoreConditional(IRBuilder<> &Builder, Value *Val,
+ Value *Addr, AtomicOrdering Ord) const override;
+
+ void emitAtomicCmpXchgNoStoreLLBalance(IRBuilder<> &Builder) const override;
+
+ Instruction* emitLeadingFence(IRBuilder<> &Builder, AtomicOrdering Ord,
+ bool IsStore, bool IsLoad) const override;
+ Instruction* emitTrailingFence(IRBuilder<> &Builder, AtomicOrdering Ord,
+ bool IsStore, bool IsLoad) const override;
+
+ unsigned getMaxSupportedInterleaveFactor() const override { return 4; }
+
+ bool lowerInterleavedLoad(LoadInst *LI,
+ ArrayRef<ShuffleVectorInst *> Shuffles,
+ ArrayRef<unsigned> Indices,
+ unsigned Factor) const override;
+ bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI,
+ unsigned Factor) const override;
+
+ TargetLoweringBase::AtomicExpansionKind
+ shouldExpandAtomicLoadInIR(LoadInst *LI) const override;
+ bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override;
+ TargetLoweringBase::AtomicExpansionKind
+ shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
+ bool shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const override;
+
+ bool useLoadStackGuardNode() const override;
+
+ bool canCombineStoreAndExtract(Type *VectorTy, Value *Idx,
+ unsigned &Cost) const override;
+
+ bool isCheapToSpeculateCttz() const override;
+ bool isCheapToSpeculateCtlz() const override;
+
+ protected:
+ std::pair<const TargetRegisterClass *, uint8_t>
+ findRepresentativeClass(const TargetRegisterInfo *TRI,
+ MVT VT) const override;
+
+ private:
+ /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
+ /// make the right decision when generating code for different targets.
+ const ARMSubtarget *Subtarget;
+
+ const TargetRegisterInfo *RegInfo;
+
+ const InstrItineraryData *Itins;
+
+ /// ARMPCLabelIndex - Keep track of the number of ARM PC labels created.
+ ///
+ unsigned ARMPCLabelIndex;
+
+ void addTypeForNEON(MVT VT, MVT PromotedLdStVT, MVT PromotedBitwiseVT);
+ void addDRTypeForNEON(MVT VT);
+ void addQRTypeForNEON(MVT VT);
+ std::pair<SDValue, SDValue> getARMXALUOOp(SDValue Op, SelectionDAG &DAG, SDValue &ARMcc) const;
+
+ typedef SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPassVector;
+ void PassF64ArgInRegs(SDLoc dl, SelectionDAG &DAG,
+ SDValue Chain, SDValue &Arg,
+ RegsToPassVector &RegsToPass,
+ CCValAssign &VA, CCValAssign &NextVA,
+ SDValue &StackPtr,
+ SmallVectorImpl<SDValue> &MemOpChains,
+ ISD::ArgFlagsTy Flags) const;
+ SDValue GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA,
+ SDValue &Root, SelectionDAG &DAG,
+ SDLoc dl) const;
+
+ CallingConv::ID getEffectiveCallingConv(CallingConv::ID CC,
+ bool isVarArg) const;
+ CCAssignFn *CCAssignFnForNode(CallingConv::ID CC, bool Return,
+ bool isVarArg) const;
+ SDValue LowerMemOpCallTo(SDValue Chain, SDValue StackPtr, SDValue Arg,
+ SDLoc dl, SelectionDAG &DAG,
+ const CCValAssign &VA,
+ ISD::ArgFlagsTy Flags) const;
+ SDValue LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerEH_SJLJ_SETUP_DISPATCH(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
+ const ARMSubtarget *Subtarget) const;
+ SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerGlobalAddressDarwin(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerGlobalAddressELF(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerGlobalAddressWindows(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
+ SelectionDAG &DAG) const;
+ SDValue LowerToTLSExecModels(GlobalAddressSDNode *GA,
+ SelectionDAG &DAG,
+ TLSModel::Model model) const;
+ SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerShiftRightParts(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerConstantFP(SDValue Op, SelectionDAG &DAG,
+ const ARMSubtarget *ST) const;
+ SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
+ const ARMSubtarget *ST) const;
+ SDValue LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerDivRem(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerDIV_Windows(SDValue Op, SelectionDAG &DAG, bool Signed) const;
+ void ExpandDIV_Windows(SDValue Op, SelectionDAG &DAG, bool Signed,
+ SmallVectorImpl<SDValue> &Results) const;
+ SDValue LowerWindowsDIVLibCall(SDValue Op, SelectionDAG &DAG, bool Signed,
+ SDValue &Chain) const;
+ SDValue LowerREM(SDNode *N, SelectionDAG &DAG) const;
+ SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
+
+ unsigned getRegisterByName(const char* RegName, EVT VT,
+ SelectionDAG &DAG) const override;
+
+ /// isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster
+ /// than a pair of fmul and fadd instructions. fmuladd intrinsics will be
+ /// expanded to FMAs when this method returns true, otherwise fmuladd is
+ /// expanded to fmul + fadd.
+ ///
+ /// ARM supports both fused and unfused multiply-add operations; we already
+ /// lower a pair of fmul and fadd to the latter so it's not clear that there
+ /// would be a gain or that the gain would be worthwhile enough to risk
+ /// correctness bugs.
+ bool isFMAFasterThanFMulAndFAdd(EVT VT) const override { return false; }
+
+ SDValue ReconstructShuffle(SDValue Op, SelectionDAG &DAG) const;
+
+ SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ SDLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals,
+ bool isThisReturn, SDValue ThisVal) const;
+
+ SDValue
+ LowerFormalArguments(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ SDLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const override;
+
+ int StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG,
+ SDLoc dl, SDValue &Chain,
+ const Value *OrigArg,
+ unsigned InRegsParamRecordIdx,
+ int ArgOffset,
+ unsigned ArgSize) const;
+
+ void VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG,
+ SDLoc dl, SDValue &Chain,
+ unsigned ArgOffset,
+ unsigned TotalArgRegsSaveSize,
+ bool ForceMutable = false) const;
+
+ SDValue
+ LowerCall(TargetLowering::CallLoweringInfo &CLI,
+ SmallVectorImpl<SDValue> &InVals) const override;
+
+ /// HandleByVal - Target-specific cleanup for ByVal support.
+ void HandleByVal(CCState *, unsigned &, unsigned) const override;
+
+ /// IsEligibleForTailCallOptimization - Check whether the call is eligible
+ /// for tail call optimization. Targets which want to do tail call
+ /// optimization should implement this function.
+ bool IsEligibleForTailCallOptimization(SDValue Callee,
+ CallingConv::ID CalleeCC,
+ bool isVarArg,
+ bool isCalleeStructRet,
+ bool isCallerStructRet,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ SelectionDAG& DAG) const;
+
+ bool CanLowerReturn(CallingConv::ID CallConv,
+ MachineFunction &MF, bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ LLVMContext &Context) const override;
+
+ SDValue
+ LowerReturn(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ SDLoc dl, SelectionDAG &DAG) const override;
+
+ bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override;
+
+ bool mayBeEmittedAsTailCall(CallInst *CI) const override;
+
+ SDValue getCMOV(SDLoc dl, EVT VT, SDValue FalseVal, SDValue TrueVal,
+ SDValue ARMcc, SDValue CCR, SDValue Cmp,
+ SelectionDAG &DAG) const;
+ SDValue getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
+ SDValue &ARMcc, SelectionDAG &DAG, SDLoc dl) const;
+ SDValue getVFPCmp(SDValue LHS, SDValue RHS,
+ SelectionDAG &DAG, SDLoc dl) const;
+ SDValue duplicateCmp(SDValue Cmp, SelectionDAG &DAG) const;
+
+ SDValue OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const;
+
+ void SetupEntryBlockForSjLj(MachineInstr *MI,
+ MachineBasicBlock *MBB,
+ MachineBasicBlock *DispatchBB, int FI) const;
+
+ void EmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const;
+
+ bool RemapAddSubWithFlags(MachineInstr *MI, MachineBasicBlock *BB) const;
+
+ MachineBasicBlock *EmitStructByval(MachineInstr *MI,
+ MachineBasicBlock *MBB) const;
+
+ MachineBasicBlock *EmitLowered__chkstk(MachineInstr *MI,
+ MachineBasicBlock *MBB) const;
+ MachineBasicBlock *EmitLowered__dbzchk(MachineInstr *MI,
+ MachineBasicBlock *MBB) const;
+ };
+
+ enum NEONModImmType {
+ VMOVModImm,
+ VMVNModImm,
+ OtherModImm
+ };
+
+ namespace ARM {
+ FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
+ const TargetLibraryInfo *libInfo);
+ }
+}
+
+#endif // ARMISELLOWERING_H
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrFormats.td b/contrib/llvm/lib/Target/ARM/ARMInstrFormats.td
new file mode 100644
index 0000000..e79608d
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMInstrFormats.td
@@ -0,0 +1,2372 @@
+//===-- ARMInstrFormats.td - ARM Instruction Formats -------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//
+// ARM Instruction Format Definitions.
+//
+
+// Format specifies the encoding used by the instruction. This is part of the
+// ad-hoc solution used to emit machine instruction encodings by our machine
+// code emitter.
+class Format<bits<6> val> {
+ bits<6> Value = val;
+}
+
+def Pseudo : Format<0>;
+def MulFrm : Format<1>;
+def BrFrm : Format<2>;
+def BrMiscFrm : Format<3>;
+
+def DPFrm : Format<4>;
+def DPSoRegRegFrm : Format<5>;
+
+def LdFrm : Format<6>;
+def StFrm : Format<7>;
+def LdMiscFrm : Format<8>;
+def StMiscFrm : Format<9>;
+def LdStMulFrm : Format<10>;
+
+def LdStExFrm : Format<11>;
+
+def ArithMiscFrm : Format<12>;
+def SatFrm : Format<13>;
+def ExtFrm : Format<14>;
+
+def VFPUnaryFrm : Format<15>;
+def VFPBinaryFrm : Format<16>;
+def VFPConv1Frm : Format<17>;
+def VFPConv2Frm : Format<18>;
+def VFPConv3Frm : Format<19>;
+def VFPConv4Frm : Format<20>;
+def VFPConv5Frm : Format<21>;
+def VFPLdStFrm : Format<22>;
+def VFPLdStMulFrm : Format<23>;
+def VFPMiscFrm : Format<24>;
+
+def ThumbFrm : Format<25>;
+def MiscFrm : Format<26>;
+
+def NGetLnFrm : Format<27>;
+def NSetLnFrm : Format<28>;
+def NDupFrm : Format<29>;
+def NLdStFrm : Format<30>;
+def N1RegModImmFrm: Format<31>;
+def N2RegFrm : Format<32>;
+def NVCVTFrm : Format<33>;
+def NVDupLnFrm : Format<34>;
+def N2RegVShLFrm : Format<35>;
+def N2RegVShRFrm : Format<36>;
+def N3RegFrm : Format<37>;
+def N3RegVShFrm : Format<38>;
+def NVExtFrm : Format<39>;
+def NVMulSLFrm : Format<40>;
+def NVTBLFrm : Format<41>;
+def DPSoRegImmFrm : Format<42>;
+
+// Misc flags.
+
+// The instruction has an Rn register operand.
+// UnaryDP - Indicates this is a unary data processing instruction, i.e.
+// it doesn't have a Rn operand.
+class UnaryDP { bit isUnaryDataProc = 1; }
+
+// Xform16Bit - Indicates this Thumb2 instruction may be transformed into
+// a 16-bit Thumb instruction if certain conditions are met.
+class Xform16Bit { bit canXformTo16Bit = 1; }
+
+//===----------------------------------------------------------------------===//
+// ARM Instruction flags. These need to match ARMBaseInstrInfo.h.
+//
+
+// FIXME: Once the JIT is MC-ized, these can go away.
+// Addressing mode.
+class AddrMode<bits<5> val> {
+ bits<5> Value = val;
+}
+def AddrModeNone : AddrMode<0>;
+def AddrMode1 : AddrMode<1>;
+def AddrMode2 : AddrMode<2>;
+def AddrMode3 : AddrMode<3>;
+def AddrMode4 : AddrMode<4>;
+def AddrMode5 : AddrMode<5>;
+def AddrMode6 : AddrMode<6>;
+def AddrModeT1_1 : AddrMode<7>;
+def AddrModeT1_2 : AddrMode<8>;
+def AddrModeT1_4 : AddrMode<9>;
+def AddrModeT1_s : AddrMode<10>;
+def AddrModeT2_i12 : AddrMode<11>;
+def AddrModeT2_i8 : AddrMode<12>;
+def AddrModeT2_so : AddrMode<13>;
+def AddrModeT2_pc : AddrMode<14>;
+def AddrModeT2_i8s4 : AddrMode<15>;
+def AddrMode_i12 : AddrMode<16>;
+
+// Load / store index mode.
+class IndexMode<bits<2> val> {
+ bits<2> Value = val;
+}
+def IndexModeNone : IndexMode<0>;
+def IndexModePre : IndexMode<1>;
+def IndexModePost : IndexMode<2>;
+def IndexModeUpd : IndexMode<3>;
+
+// Instruction execution domain.
+class Domain<bits<3> val> {
+ bits<3> Value = val;
+}
+def GenericDomain : Domain<0>;
+def VFPDomain : Domain<1>; // Instructions in VFP domain only
+def NeonDomain : Domain<2>; // Instructions in Neon domain only
+def VFPNeonDomain : Domain<3>; // Instructions in both VFP & Neon domains
+def VFPNeonA8Domain : Domain<5>; // Instructions in VFP & Neon under A8
+
+//===----------------------------------------------------------------------===//
+// ARM special operands.
+//
+
+// ARM imod and iflag operands, used only by the CPS instruction.
+def imod_op : Operand<i32> {
+ let PrintMethod = "printCPSIMod";
+}
+
+def ProcIFlagsOperand : AsmOperandClass {
+ let Name = "ProcIFlags";
+ let ParserMethod = "parseProcIFlagsOperand";
+}
+def iflags_op : Operand<i32> {
+ let PrintMethod = "printCPSIFlag";
+ let ParserMatchClass = ProcIFlagsOperand;
+}
+
+// ARM Predicate operand. Default to 14 = always (AL). Second part is CC
+// register whose default is 0 (no register).
+def CondCodeOperand : AsmOperandClass { let Name = "CondCode"; }
+def pred : PredicateOperand<OtherVT, (ops i32imm, i32imm),
+ (ops (i32 14), (i32 zero_reg))> {
+ let PrintMethod = "printPredicateOperand";
+ let ParserMatchClass = CondCodeOperand;
+ let DecoderMethod = "DecodePredicateOperand";
+}
+
+// Selectable predicate operand for CMOV instructions. We can't use a normal
+// predicate because the default values interfere with instruction selection. In
+// all other respects it is identical though: pseudo-instruction expansion
+// relies on the MachineOperands being compatible.
+def cmovpred : Operand<i32>, PredicateOp,
+ ComplexPattern<i32, 2, "SelectCMOVPred"> {
+ let MIOperandInfo = (ops i32imm, i32imm);
+ let PrintMethod = "printPredicateOperand";
+}
+
+// Conditional code result for instructions whose 's' bit is set, e.g. subs.
+def CCOutOperand : AsmOperandClass { let Name = "CCOut"; }
+def cc_out : OptionalDefOperand<OtherVT, (ops CCR), (ops (i32 zero_reg))> {
+ let EncoderMethod = "getCCOutOpValue";
+ let PrintMethod = "printSBitModifierOperand";
+ let ParserMatchClass = CCOutOperand;
+ let DecoderMethod = "DecodeCCOutOperand";
+}
+
+// Same as cc_out except it defaults to setting CPSR.
+def s_cc_out : OptionalDefOperand<OtherVT, (ops CCR), (ops (i32 CPSR))> {
+ let EncoderMethod = "getCCOutOpValue";
+ let PrintMethod = "printSBitModifierOperand";
+ let ParserMatchClass = CCOutOperand;
+ let DecoderMethod = "DecodeCCOutOperand";
+}
+
+// ARM special operands for disassembly only.
+//
+def SetEndAsmOperand : ImmAsmOperand {
+ let Name = "SetEndImm";
+ let ParserMethod = "parseSetEndImm";
+}
+def setend_op : Operand<i32> {
+ let PrintMethod = "printSetendOperand";
+ let ParserMatchClass = SetEndAsmOperand;
+}
+
+def MSRMaskOperand : AsmOperandClass {
+ let Name = "MSRMask";
+ let ParserMethod = "parseMSRMaskOperand";
+}
+def msr_mask : Operand<i32> {
+ let PrintMethod = "printMSRMaskOperand";
+ let DecoderMethod = "DecodeMSRMask";
+ let ParserMatchClass = MSRMaskOperand;
+}
+
+def BankedRegOperand : AsmOperandClass {
+ let Name = "BankedReg";
+ let ParserMethod = "parseBankedRegOperand";
+}
+def banked_reg : Operand<i32> {
+ let PrintMethod = "printBankedRegOperand";
+ let DecoderMethod = "DecodeBankedReg";
+ let ParserMatchClass = BankedRegOperand;
+}
+
+// Shift Right Immediate - A shift right immediate is encoded differently from
+// other shift immediates. The imm6 field is encoded like so:
+//
+// Offset Encoding
+// 8 imm6<5:3> = '001', 8 - <imm> is encoded in imm6<2:0>
+// 16 imm6<5:4> = '01', 16 - <imm> is encoded in imm6<3:0>
+// 32 imm6<5> = '1', 32 - <imm> is encoded in imm6<4:0>
+// 64 64 - <imm> is encoded in imm6<5:0>
+def shr_imm8_asm_operand : ImmAsmOperand { let Name = "ShrImm8"; }
+def shr_imm8 : Operand<i32>, ImmLeaf<i32, [{ return Imm > 0 && Imm <= 8; }]> {
+ let EncoderMethod = "getShiftRight8Imm";
+ let DecoderMethod = "DecodeShiftRight8Imm";
+ let ParserMatchClass = shr_imm8_asm_operand;
+}
+def shr_imm16_asm_operand : ImmAsmOperand { let Name = "ShrImm16"; }
+def shr_imm16 : Operand<i32>, ImmLeaf<i32, [{ return Imm > 0 && Imm <= 16; }]> {
+ let EncoderMethod = "getShiftRight16Imm";
+ let DecoderMethod = "DecodeShiftRight16Imm";
+ let ParserMatchClass = shr_imm16_asm_operand;
+}
+def shr_imm32_asm_operand : ImmAsmOperand { let Name = "ShrImm32"; }
+def shr_imm32 : Operand<i32>, ImmLeaf<i32, [{ return Imm > 0 && Imm <= 32; }]> {
+ let EncoderMethod = "getShiftRight32Imm";
+ let DecoderMethod = "DecodeShiftRight32Imm";
+ let ParserMatchClass = shr_imm32_asm_operand;
+}
+def shr_imm64_asm_operand : ImmAsmOperand { let Name = "ShrImm64"; }
+def shr_imm64 : Operand<i32>, ImmLeaf<i32, [{ return Imm > 0 && Imm <= 64; }]> {
+ let EncoderMethod = "getShiftRight64Imm";
+ let DecoderMethod = "DecodeShiftRight64Imm";
+ let ParserMatchClass = shr_imm64_asm_operand;
+}
+
+//===----------------------------------------------------------------------===//
+// ARM Assembler alias templates.
+//
+class ARMInstAlias<string Asm, dag Result, bit Emit = 0b1>
+ : InstAlias<Asm, Result, Emit>, Requires<[IsARM]>;
+class tInstAlias<string Asm, dag Result, bit Emit = 0b1>
+ : InstAlias<Asm, Result, Emit>, Requires<[IsThumb]>;
+class t2InstAlias<string Asm, dag Result, bit Emit = 0b1>
+ : InstAlias<Asm, Result, Emit>, Requires<[IsThumb2]>;
+class VFP2InstAlias<string Asm, dag Result, bit Emit = 0b1>
+ : InstAlias<Asm, Result, Emit>, Requires<[HasVFP2]>;
+class VFP2DPInstAlias<string Asm, dag Result, bit Emit = 0b1>
+ : InstAlias<Asm, Result, Emit>, Requires<[HasVFP2,HasDPVFP]>;
+class VFP3InstAlias<string Asm, dag Result, bit Emit = 0b1>
+ : InstAlias<Asm, Result, Emit>, Requires<[HasVFP3]>;
+class NEONInstAlias<string Asm, dag Result, bit Emit = 0b1>
+ : InstAlias<Asm, Result, Emit>, Requires<[HasNEON]>;
+
+
+class VFP2MnemonicAlias<string src, string dst> : MnemonicAlias<src, dst>,
+ Requires<[HasVFP2]>;
+class NEONMnemonicAlias<string src, string dst> : MnemonicAlias<src, dst>,
+ Requires<[HasNEON]>;
+
+//===----------------------------------------------------------------------===//
+// ARM Instruction templates.
+//
+
+
+class InstTemplate<AddrMode am, int sz, IndexMode im,
+ Format f, Domain d, string cstr, InstrItinClass itin>
+ : Instruction {
+ let Namespace = "ARM";
+
+ AddrMode AM = am;
+ int Size = sz;
+ IndexMode IM = im;
+ bits<2> IndexModeBits = IM.Value;
+ Format F = f;
+ bits<6> Form = F.Value;
+ Domain D = d;
+ bit isUnaryDataProc = 0;
+ bit canXformTo16Bit = 0;
+ // The instruction is a 16-bit flag setting Thumb instruction. Used
+ // by the parser to determine whether to require the 'S' suffix on the
+ // mnemonic (when not in an IT block) or preclude it (when in an IT block).
+ bit thumbArithFlagSetting = 0;
+
+ // If this is a pseudo instruction, mark it isCodeGenOnly.
+ let isCodeGenOnly = !eq(!cast<string>(f), "Pseudo");
+
+ // The layout of TSFlags should be kept in sync with ARMBaseInfo.h.
+ let TSFlags{4-0} = AM.Value;
+ let TSFlags{6-5} = IndexModeBits;
+ let TSFlags{12-7} = Form;
+ let TSFlags{13} = isUnaryDataProc;
+ let TSFlags{14} = canXformTo16Bit;
+ let TSFlags{17-15} = D.Value;
+ let TSFlags{18} = thumbArithFlagSetting;
+
+ let Constraints = cstr;
+ let Itinerary = itin;
+}
+
+class Encoding {
+ field bits<32> Inst;
+ // Mask of bits that cause an encoding to be UNPREDICTABLE.
+ // If a bit is set, then if the corresponding bit in the
+ // target encoding differs from its value in the "Inst" field,
+ // the instruction is UNPREDICTABLE (SoftFail in abstract parlance).
+ field bits<32> Unpredictable = 0;
+ // SoftFail is the generic name for this field, but we alias it so
+ // as to make it more obvious what it means in ARM-land.
+ field bits<32> SoftFail = Unpredictable;
+}
+
+class InstARM<AddrMode am, int sz, IndexMode im,
+ Format f, Domain d, string cstr, InstrItinClass itin>
+ : InstTemplate<am, sz, im, f, d, cstr, itin>, Encoding {
+ let DecoderNamespace = "ARM";
+}
+
+// This Encoding-less class is used by Thumb1 to specify the encoding bits later
+// on by adding flavors to specific instructions.
+class InstThumb<AddrMode am, int sz, IndexMode im,
+ Format f, Domain d, string cstr, InstrItinClass itin>
+ : InstTemplate<am, sz, im, f, d, cstr, itin> {
+ let DecoderNamespace = "Thumb";
+}
+
+// Pseudo-instructions for alternate assembly syntax (never used by codegen).
+// These are aliases that require C++ handling to convert to the target
+// instruction, while InstAliases can be handled directly by tblgen.
+class AsmPseudoInst<string asm, dag iops, dag oops = (outs)>
+ : InstTemplate<AddrModeNone, 0, IndexModeNone, Pseudo, GenericDomain,
+ "", NoItinerary> {
+ let OutOperandList = oops;
+ let InOperandList = iops;
+ let Pattern = [];
+ let isCodeGenOnly = 0; // So we get asm matcher for it.
+ let AsmString = asm;
+ let isPseudo = 1;
+}
+
+class ARMAsmPseudo<string asm, dag iops, dag oops = (outs)>
+ : AsmPseudoInst<asm, iops, oops>, Requires<[IsARM]>;
+class tAsmPseudo<string asm, dag iops, dag oops = (outs)>
+ : AsmPseudoInst<asm, iops, oops>, Requires<[IsThumb]>;
+class t2AsmPseudo<string asm, dag iops, dag oops = (outs)>
+ : AsmPseudoInst<asm, iops, oops>, Requires<[IsThumb2]>;
+class VFP2AsmPseudo<string asm, dag iops, dag oops = (outs)>
+ : AsmPseudoInst<asm, iops, oops>, Requires<[HasVFP2]>;
+class NEONAsmPseudo<string asm, dag iops, dag oops = (outs)>
+ : AsmPseudoInst<asm, iops, oops>, Requires<[HasNEON]>;
+
+// Pseudo instructions for the code generator.
+class PseudoInst<dag oops, dag iops, InstrItinClass itin, list<dag> pattern>
+ : InstTemplate<AddrModeNone, 0, IndexModeNone, Pseudo,
+ GenericDomain, "", itin> {
+ let OutOperandList = oops;
+ let InOperandList = iops;
+ let Pattern = pattern;
+ let isCodeGenOnly = 1;
+ let isPseudo = 1;
+}
+
+// PseudoInst that's ARM-mode only.
+class ARMPseudoInst<dag oops, dag iops, int sz, InstrItinClass itin,
+ list<dag> pattern>
+ : PseudoInst<oops, iops, itin, pattern> {
+ let Size = sz;
+ list<Predicate> Predicates = [IsARM];
+}
+
+// PseudoInst that's Thumb-mode only.
+class tPseudoInst<dag oops, dag iops, int sz, InstrItinClass itin,
+ list<dag> pattern>
+ : PseudoInst<oops, iops, itin, pattern> {
+ let Size = sz;
+ list<Predicate> Predicates = [IsThumb];
+}
+
+// PseudoInst that's Thumb2-mode only.
+class t2PseudoInst<dag oops, dag iops, int sz, InstrItinClass itin,
+ list<dag> pattern>
+ : PseudoInst<oops, iops, itin, pattern> {
+ let Size = sz;
+ list<Predicate> Predicates = [IsThumb2];
+}
+
+class ARMPseudoExpand<dag oops, dag iops, int sz,
+ InstrItinClass itin, list<dag> pattern,
+ dag Result>
+ : ARMPseudoInst<oops, iops, sz, itin, pattern>,
+ PseudoInstExpansion<Result>;
+
+class tPseudoExpand<dag oops, dag iops, int sz,
+ InstrItinClass itin, list<dag> pattern,
+ dag Result>
+ : tPseudoInst<oops, iops, sz, itin, pattern>,
+ PseudoInstExpansion<Result>;
+
+class t2PseudoExpand<dag oops, dag iops, int sz,
+ InstrItinClass itin, list<dag> pattern,
+ dag Result>
+ : t2PseudoInst<oops, iops, sz, itin, pattern>,
+ PseudoInstExpansion<Result>;
+
+// Almost all ARM instructions are predicable.
+class I<dag oops, dag iops, AddrMode am, int sz,
+ IndexMode im, Format f, InstrItinClass itin,
+ string opc, string asm, string cstr,
+ list<dag> pattern>
+ : InstARM<am, sz, im, f, GenericDomain, cstr, itin> {
+ bits<4> p;
+ let Inst{31-28} = p;
+ let OutOperandList = oops;
+ let InOperandList = !con(iops, (ins pred:$p));
+ let AsmString = !strconcat(opc, "${p}", asm);
+ let Pattern = pattern;
+ list<Predicate> Predicates = [IsARM];
+}
+
+// A few are not predicable
+class InoP<dag oops, dag iops, AddrMode am, int sz,
+ IndexMode im, Format f, InstrItinClass itin,
+ string opc, string asm, string cstr,
+ list<dag> pattern>
+ : InstARM<am, sz, im, f, GenericDomain, cstr, itin> {
+ let OutOperandList = oops;
+ let InOperandList = iops;
+ let AsmString = !strconcat(opc, asm);
+ let Pattern = pattern;
+ let isPredicable = 0;
+ list<Predicate> Predicates = [IsARM];
+}
+
+// Same as I except it can optionally modify CPSR. Note it's modeled as an input
+// operand since by default it's a zero register. It will become an implicit def
+// once it's "flipped".
+class sI<dag oops, dag iops, AddrMode am, int sz,
+ IndexMode im, Format f, InstrItinClass itin,
+ string opc, string asm, string cstr,
+ list<dag> pattern>
+ : InstARM<am, sz, im, f, GenericDomain, cstr, itin> {
+ bits<4> p; // Predicate operand
+ bits<1> s; // condition-code set flag ('1' if the insn should set the flags)
+ let Inst{31-28} = p;
+ let Inst{20} = s;
+
+ let OutOperandList = oops;
+ let InOperandList = !con(iops, (ins pred:$p, cc_out:$s));
+ let AsmString = !strconcat(opc, "${s}${p}", asm);
+ let Pattern = pattern;
+ list<Predicate> Predicates = [IsARM];
+}
+
+// Special cases
+class XI<dag oops, dag iops, AddrMode am, int sz,
+ IndexMode im, Format f, InstrItinClass itin,
+ string asm, string cstr, list<dag> pattern>
+ : InstARM<am, sz, im, f, GenericDomain, cstr, itin> {
+ let OutOperandList = oops;
+ let InOperandList = iops;
+ let AsmString = asm;
+ let Pattern = pattern;
+ list<Predicate> Predicates = [IsARM];
+}
+
+class AI<dag oops, dag iops, Format f, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : I<oops, iops, AddrModeNone, 4, IndexModeNone, f, itin,
+ opc, asm, "", pattern>;
+class AsI<dag oops, dag iops, Format f, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : sI<oops, iops, AddrModeNone, 4, IndexModeNone, f, itin,
+ opc, asm, "", pattern>;
+class AXI<dag oops, dag iops, Format f, InstrItinClass itin,
+ string asm, list<dag> pattern>
+ : XI<oops, iops, AddrModeNone, 4, IndexModeNone, f, itin,
+ asm, "", pattern>;
+class AXIM<dag oops, dag iops, AddrMode am, Format f, InstrItinClass itin,
+ string asm, list<dag> pattern>
+ : XI<oops, iops, am, 4, IndexModeNone, f, itin,
+ asm, "", pattern>;
+class AInoP<dag oops, dag iops, Format f, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : InoP<oops, iops, AddrModeNone, 4, IndexModeNone, f, itin,
+ opc, asm, "", pattern>;
+
+// Ctrl flow instructions
+class ABI<bits<4> opcod, dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : I<oops, iops, AddrModeNone, 4, IndexModeNone, BrFrm, itin,
+ opc, asm, "", pattern> {
+ let Inst{27-24} = opcod;
+}
+class ABXI<bits<4> opcod, dag oops, dag iops, InstrItinClass itin,
+ string asm, list<dag> pattern>
+ : XI<oops, iops, AddrModeNone, 4, IndexModeNone, BrFrm, itin,
+ asm, "", pattern> {
+ let Inst{27-24} = opcod;
+}
+
+// BR_JT instructions
+class JTI<dag oops, dag iops, InstrItinClass itin,
+ string asm, list<dag> pattern>
+ : XI<oops, iops, AddrModeNone, 0, IndexModeNone, BrMiscFrm, itin,
+ asm, "", pattern>;
+
+class AIldr_ex_or_acq<bits<2> opcod, bits<2> opcod2, dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : I<oops, iops, AddrModeNone, 4, IndexModeNone, LdStExFrm, itin,
+ opc, asm, "", pattern> {
+ bits<4> Rt;
+ bits<4> addr;
+ let Inst{27-23} = 0b00011;
+ let Inst{22-21} = opcod;
+ let Inst{20} = 1;
+ let Inst{19-16} = addr;
+ let Inst{15-12} = Rt;
+ let Inst{11-10} = 0b11;
+ let Inst{9-8} = opcod2;
+ let Inst{7-0} = 0b10011111;
+}
+class AIstr_ex_or_rel<bits<2> opcod, bits<2> opcod2, dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : I<oops, iops, AddrModeNone, 4, IndexModeNone, LdStExFrm, itin,
+ opc, asm, "", pattern> {
+ bits<4> Rt;
+ bits<4> addr;
+ let Inst{27-23} = 0b00011;
+ let Inst{22-21} = opcod;
+ let Inst{20} = 0;
+ let Inst{19-16} = addr;
+ let Inst{11-10} = 0b11;
+ let Inst{9-8} = opcod2;
+ let Inst{7-4} = 0b1001;
+ let Inst{3-0} = Rt;
+}
+// Atomic load/store instructions
+class AIldrex<bits<2> opcod, dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : AIldr_ex_or_acq<opcod, 0b11, oops, iops, itin, opc, asm, pattern>;
+
+class AIstrex<bits<2> opcod, dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : AIstr_ex_or_rel<opcod, 0b11, oops, iops, itin, opc, asm, pattern> {
+ bits<4> Rd;
+ let Inst{15-12} = Rd;
+}
+
+// Exclusive load/store instructions
+
+class AIldaex<bits<2> opcod, dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : AIldr_ex_or_acq<opcod, 0b10, oops, iops, itin, opc, asm, pattern>,
+ Requires<[IsARM, HasV8]>;
+
+class AIstlex<bits<2> opcod, dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : AIstr_ex_or_rel<opcod, 0b10, oops, iops, itin, opc, asm, pattern>,
+ Requires<[IsARM, HasV8]> {
+ bits<4> Rd;
+ let Inst{15-12} = Rd;
+}
+
+class AIswp<bit b, dag oops, dag iops, string opc, list<dag> pattern>
+ : AI<oops, iops, MiscFrm, NoItinerary, opc, "\t$Rt, $Rt2, $addr", pattern> {
+ bits<4> Rt;
+ bits<4> Rt2;
+ bits<4> addr;
+ let Inst{27-23} = 0b00010;
+ let Inst{22} = b;
+ let Inst{21-20} = 0b00;
+ let Inst{19-16} = addr;
+ let Inst{15-12} = Rt;
+ let Inst{11-4} = 0b00001001;
+ let Inst{3-0} = Rt2;
+
+ let Unpredictable{11-8} = 0b1111;
+ let DecoderMethod = "DecodeSwap";
+}
+// Acquire/Release load/store instructions
+class AIldracq<bits<2> opcod, dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : AIldr_ex_or_acq<opcod, 0b00, oops, iops, itin, opc, asm, pattern>,
+ Requires<[IsARM, HasV8]>;
+
+class AIstrrel<bits<2> opcod, dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : AIstr_ex_or_rel<opcod, 0b00, oops, iops, itin, opc, asm, pattern>,
+ Requires<[IsARM, HasV8]> {
+ let Inst{15-12} = 0b1111;
+}
+
+// addrmode1 instructions
+class AI1<bits<4> opcod, dag oops, dag iops, Format f, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : I<oops, iops, AddrMode1, 4, IndexModeNone, f, itin,
+ opc, asm, "", pattern> {
+ let Inst{24-21} = opcod;
+ let Inst{27-26} = 0b00;
+}
+class AsI1<bits<4> opcod, dag oops, dag iops, Format f, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : sI<oops, iops, AddrMode1, 4, IndexModeNone, f, itin,
+ opc, asm, "", pattern> {
+ let Inst{24-21} = opcod;
+ let Inst{27-26} = 0b00;
+}
+class AXI1<bits<4> opcod, dag oops, dag iops, Format f, InstrItinClass itin,
+ string asm, list<dag> pattern>
+ : XI<oops, iops, AddrMode1, 4, IndexModeNone, f, itin,
+ asm, "", pattern> {
+ let Inst{24-21} = opcod;
+ let Inst{27-26} = 0b00;
+}
+
+// loads
+
+// LDR/LDRB/STR/STRB/...
+class AI2ldst<bits<3> op, bit isLd, bit isByte, dag oops, dag iops, AddrMode am,
+ Format f, InstrItinClass itin, string opc, string asm,
+ list<dag> pattern>
+ : I<oops, iops, am, 4, IndexModeNone, f, itin, opc, asm,
+ "", pattern> {
+ let Inst{27-25} = op;
+ let Inst{24} = 1; // 24 == P
+ // 23 == U
+ let Inst{22} = isByte;
+ let Inst{21} = 0; // 21 == W
+ let Inst{20} = isLd;
+}
+// Indexed load/stores
+class AI2ldstidx<bit isLd, bit isByte, bit isPre, dag oops, dag iops,
+ IndexMode im, Format f, InstrItinClass itin, string opc,
+ string asm, string cstr, list<dag> pattern>
+ : I<oops, iops, AddrMode2, 4, im, f, itin,
+ opc, asm, cstr, pattern> {
+ bits<4> Rt;
+ let Inst{27-26} = 0b01;
+ let Inst{24} = isPre; // P bit
+ let Inst{22} = isByte; // B bit
+ let Inst{21} = isPre; // W bit
+ let Inst{20} = isLd; // L bit
+ let Inst{15-12} = Rt;
+}
+class AI2stridx_reg<bit isByte, bit isPre, dag oops, dag iops,
+ IndexMode im, Format f, InstrItinClass itin, string opc,
+ string asm, string cstr, list<dag> pattern>
+ : AI2ldstidx<0, isByte, isPre, oops, iops, im, f, itin, opc, asm, cstr,
+ pattern> {
+ // AM2 store w/ two operands: (GPR, am2offset)
+ // {12} isAdd
+ // {11-0} imm12/Rm
+ bits<14> offset;
+ bits<4> Rn;
+ let Inst{25} = 1;
+ let Inst{23} = offset{12};
+ let Inst{19-16} = Rn;
+ let Inst{11-5} = offset{11-5};
+ let Inst{4} = 0;
+ let Inst{3-0} = offset{3-0};
+}
+
+class AI2stridx_imm<bit isByte, bit isPre, dag oops, dag iops,
+ IndexMode im, Format f, InstrItinClass itin, string opc,
+ string asm, string cstr, list<dag> pattern>
+ : AI2ldstidx<0, isByte, isPre, oops, iops, im, f, itin, opc, asm, cstr,
+ pattern> {
+ // AM2 store w/ two operands: (GPR, am2offset)
+ // {12} isAdd
+ // {11-0} imm12/Rm
+ bits<14> offset;
+ bits<4> Rn;
+ let Inst{25} = 0;
+ let Inst{23} = offset{12};
+ let Inst{19-16} = Rn;
+ let Inst{11-0} = offset{11-0};
+}
+
+
+// FIXME: Merge with the above class when addrmode2 gets used for STR, STRB
+// but for now use this class for STRT and STRBT.
+class AI2stridxT<bit isByte, bit isPre, dag oops, dag iops,
+ IndexMode im, Format f, InstrItinClass itin, string opc,
+ string asm, string cstr, list<dag> pattern>
+ : AI2ldstidx<0, isByte, isPre, oops, iops, im, f, itin, opc, asm, cstr,
+ pattern> {
+ // AM2 store w/ two operands: (GPR, am2offset)
+ // {17-14} Rn
+ // {13} 1 == Rm, 0 == imm12
+ // {12} isAdd
+ // {11-0} imm12/Rm
+ bits<18> addr;
+ let Inst{25} = addr{13};
+ let Inst{23} = addr{12};
+ let Inst{19-16} = addr{17-14};
+ let Inst{11-0} = addr{11-0};
+}
+
+// addrmode3 instructions
+class AI3ld<bits<4> op, bit op20, dag oops, dag iops, Format f,
+ InstrItinClass itin, string opc, string asm, list<dag> pattern>
+ : I<oops, iops, AddrMode3, 4, IndexModeNone, f, itin,
+ opc, asm, "", pattern> {
+ bits<14> addr;
+ bits<4> Rt;
+ let Inst{27-25} = 0b000;
+ let Inst{24} = 1; // P bit
+ let Inst{23} = addr{8}; // U bit
+ let Inst{22} = addr{13}; // 1 == imm8, 0 == Rm
+ let Inst{21} = 0; // W bit
+ let Inst{20} = op20; // L bit
+ let Inst{19-16} = addr{12-9}; // Rn
+ let Inst{15-12} = Rt; // Rt
+ let Inst{11-8} = addr{7-4}; // imm7_4/zero
+ let Inst{7-4} = op;
+ let Inst{3-0} = addr{3-0}; // imm3_0/Rm
+
+ let DecoderMethod = "DecodeAddrMode3Instruction";
+}
+
+class AI3ldstidx<bits<4> op, bit op20, bit isPre, dag oops, dag iops,
+ IndexMode im, Format f, InstrItinClass itin, string opc,
+ string asm, string cstr, list<dag> pattern>
+ : I<oops, iops, AddrMode3, 4, im, f, itin,
+ opc, asm, cstr, pattern> {
+ bits<4> Rt;
+ let Inst{27-25} = 0b000;
+ let Inst{24} = isPre; // P bit
+ let Inst{21} = isPre; // W bit
+ let Inst{20} = op20; // L bit
+ let Inst{15-12} = Rt; // Rt
+ let Inst{7-4} = op;
+}
+
+// FIXME: Merge with the above class when addrmode2 gets used for LDR, LDRB
+// but for now use this class for LDRSBT, LDRHT, LDSHT.
+class AI3ldstidxT<bits<4> op, bit isLoad, dag oops, dag iops,
+ IndexMode im, Format f, InstrItinClass itin, string opc,
+ string asm, string cstr, list<dag> pattern>
+ : I<oops, iops, AddrMode3, 4, im, f, itin, opc, asm, cstr, pattern> {
+ // {13} 1 == imm8, 0 == Rm
+ // {12-9} Rn
+ // {8} isAdd
+ // {7-4} imm7_4/zero
+ // {3-0} imm3_0/Rm
+ bits<4> addr;
+ bits<4> Rt;
+ let Inst{27-25} = 0b000;
+ let Inst{24} = 0; // P bit
+ let Inst{21} = 1;
+ let Inst{20} = isLoad; // L bit
+ let Inst{19-16} = addr; // Rn
+ let Inst{15-12} = Rt; // Rt
+ let Inst{7-4} = op;
+}
+
+// stores
+class AI3str<bits<4> op, dag oops, dag iops, Format f, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : I<oops, iops, AddrMode3, 4, IndexModeNone, f, itin,
+ opc, asm, "", pattern> {
+ bits<14> addr;
+ bits<4> Rt;
+ let Inst{27-25} = 0b000;
+ let Inst{24} = 1; // P bit
+ let Inst{23} = addr{8}; // U bit
+ let Inst{22} = addr{13}; // 1 == imm8, 0 == Rm
+ let Inst{21} = 0; // W bit
+ let Inst{20} = 0; // L bit
+ let Inst{19-16} = addr{12-9}; // Rn
+ let Inst{15-12} = Rt; // Rt
+ let Inst{11-8} = addr{7-4}; // imm7_4/zero
+ let Inst{7-4} = op;
+ let Inst{3-0} = addr{3-0}; // imm3_0/Rm
+ let DecoderMethod = "DecodeAddrMode3Instruction";
+}
+
+// addrmode4 instructions
+class AXI4<dag oops, dag iops, IndexMode im, Format f, InstrItinClass itin,
+ string asm, string cstr, list<dag> pattern>
+ : XI<oops, iops, AddrMode4, 4, im, f, itin, asm, cstr, pattern> {
+ bits<4> p;
+ bits<16> regs;
+ bits<4> Rn;
+ let Inst{31-28} = p;
+ let Inst{27-25} = 0b100;
+ let Inst{22} = 0; // S bit
+ let Inst{19-16} = Rn;
+ let Inst{15-0} = regs;
+}
+
+// Unsigned multiply, multiply-accumulate instructions.
+class AMul1I<bits<7> opcod, dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : I<oops, iops, AddrModeNone, 4, IndexModeNone, MulFrm, itin,
+ opc, asm, "", pattern> {
+ let Inst{7-4} = 0b1001;
+ let Inst{20} = 0; // S bit
+ let Inst{27-21} = opcod;
+}
+class AsMul1I<bits<7> opcod, dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : sI<oops, iops, AddrModeNone, 4, IndexModeNone, MulFrm, itin,
+ opc, asm, "", pattern> {
+ let Inst{7-4} = 0b1001;
+ let Inst{27-21} = opcod;
+}
+
+// Most significant word multiply
+class AMul2I<bits<7> opcod, bits<4> opc7_4, dag oops, dag iops,
+ InstrItinClass itin, string opc, string asm, list<dag> pattern>
+ : I<oops, iops, AddrModeNone, 4, IndexModeNone, MulFrm, itin,
+ opc, asm, "", pattern> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<4> Rm;
+ let Inst{7-4} = opc7_4;
+ let Inst{20} = 1;
+ let Inst{27-21} = opcod;
+ let Inst{19-16} = Rd;
+ let Inst{11-8} = Rm;
+ let Inst{3-0} = Rn;
+}
+// MSW multiple w/ Ra operand
+class AMul2Ia<bits<7> opcod, bits<4> opc7_4, dag oops, dag iops,
+ InstrItinClass itin, string opc, string asm, list<dag> pattern>
+ : AMul2I<opcod, opc7_4, oops, iops, itin, opc, asm, pattern> {
+ bits<4> Ra;
+ let Inst{15-12} = Ra;
+}
+
+// SMUL<x><y> / SMULW<y> / SMLA<x><y> / SMLAW<x><y>
+class AMulxyIbase<bits<7> opcod, bits<2> bit6_5, dag oops, dag iops,
+ InstrItinClass itin, string opc, string asm, list<dag> pattern>
+ : I<oops, iops, AddrModeNone, 4, IndexModeNone, MulFrm, itin,
+ opc, asm, "", pattern> {
+ bits<4> Rn;
+ bits<4> Rm;
+ let Inst{4} = 0;
+ let Inst{7} = 1;
+ let Inst{20} = 0;
+ let Inst{27-21} = opcod;
+ let Inst{6-5} = bit6_5;
+ let Inst{11-8} = Rm;
+ let Inst{3-0} = Rn;
+}
+class AMulxyI<bits<7> opcod, bits<2> bit6_5, dag oops, dag iops,
+ InstrItinClass itin, string opc, string asm, list<dag> pattern>
+ : AMulxyIbase<opcod, bit6_5, oops, iops, itin, opc, asm, pattern> {
+ bits<4> Rd;
+ let Inst{19-16} = Rd;
+}
+
+// AMulxyI with Ra operand
+class AMulxyIa<bits<7> opcod, bits<2> bit6_5, dag oops, dag iops,
+ InstrItinClass itin, string opc, string asm, list<dag> pattern>
+ : AMulxyI<opcod, bit6_5, oops, iops, itin, opc, asm, pattern> {
+ bits<4> Ra;
+ let Inst{15-12} = Ra;
+}
+// SMLAL*
+class AMulxyI64<bits<7> opcod, bits<2> bit6_5, dag oops, dag iops,
+ InstrItinClass itin, string opc, string asm, list<dag> pattern>
+ : AMulxyIbase<opcod, bit6_5, oops, iops, itin, opc, asm, pattern> {
+ bits<4> RdLo;
+ bits<4> RdHi;
+ let Inst{19-16} = RdHi;
+ let Inst{15-12} = RdLo;
+}
+
+// Extend instructions.
+class AExtI<bits<8> opcod, dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : I<oops, iops, AddrModeNone, 4, IndexModeNone, ExtFrm, itin,
+ opc, asm, "", pattern> {
+ // All AExtI instructions have Rd and Rm register operands.
+ bits<4> Rd;
+ bits<4> Rm;
+ let Inst{15-12} = Rd;
+ let Inst{3-0} = Rm;
+ let Inst{7-4} = 0b0111;
+ let Inst{9-8} = 0b00;
+ let Inst{27-20} = opcod;
+
+ let Unpredictable{9-8} = 0b11;
+}
+
+// Misc Arithmetic instructions.
+class AMiscA1I<bits<8> opcod, bits<4> opc7_4, dag oops, dag iops,
+ InstrItinClass itin, string opc, string asm, list<dag> pattern>
+ : I<oops, iops, AddrModeNone, 4, IndexModeNone, ArithMiscFrm, itin,
+ opc, asm, "", pattern> {
+ bits<4> Rd;
+ bits<4> Rm;
+ let Inst{27-20} = opcod;
+ let Inst{19-16} = 0b1111;
+ let Inst{15-12} = Rd;
+ let Inst{11-8} = 0b1111;
+ let Inst{7-4} = opc7_4;
+ let Inst{3-0} = Rm;
+}
+
+// Division instructions.
+class ADivA1I<bits<3> opcod, dag oops, dag iops,
+ InstrItinClass itin, string opc, string asm, list<dag> pattern>
+ : I<oops, iops, AddrModeNone, 4, IndexModeNone, ArithMiscFrm, itin,
+ opc, asm, "", pattern> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<4> Rm;
+ let Inst{27-23} = 0b01110;
+ let Inst{22-20} = opcod;
+ let Inst{19-16} = Rd;
+ let Inst{15-12} = 0b1111;
+ let Inst{11-8} = Rm;
+ let Inst{7-4} = 0b0001;
+ let Inst{3-0} = Rn;
+}
+
+// PKH instructions
+def PKHLSLAsmOperand : ImmAsmOperand {
+ let Name = "PKHLSLImm";
+ let ParserMethod = "parsePKHLSLImm";
+}
+def pkh_lsl_amt: Operand<i32>, ImmLeaf<i32, [{ return Imm >= 0 && Imm < 32; }]>{
+ let PrintMethod = "printPKHLSLShiftImm";
+ let ParserMatchClass = PKHLSLAsmOperand;
+}
+def PKHASRAsmOperand : AsmOperandClass {
+ let Name = "PKHASRImm";
+ let ParserMethod = "parsePKHASRImm";
+}
+def pkh_asr_amt: Operand<i32>, ImmLeaf<i32, [{ return Imm > 0 && Imm <= 32; }]>{
+ let PrintMethod = "printPKHASRShiftImm";
+ let ParserMatchClass = PKHASRAsmOperand;
+}
+
+class APKHI<bits<8> opcod, bit tb, dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : I<oops, iops, AddrModeNone, 4, IndexModeNone, ArithMiscFrm, itin,
+ opc, asm, "", pattern> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<4> Rm;
+ bits<5> sh;
+ let Inst{27-20} = opcod;
+ let Inst{19-16} = Rn;
+ let Inst{15-12} = Rd;
+ let Inst{11-7} = sh;
+ let Inst{6} = tb;
+ let Inst{5-4} = 0b01;
+ let Inst{3-0} = Rm;
+}
+
+//===----------------------------------------------------------------------===//
+
+// ARMPat - Same as Pat<>, but requires that the compiler be in ARM mode.
+class ARMPat<dag pattern, dag result> : Pat<pattern, result> {
+ list<Predicate> Predicates = [IsARM];
+}
+class ARMV5TPat<dag pattern, dag result> : Pat<pattern, result> {
+ list<Predicate> Predicates = [IsARM, HasV5T];
+}
+class ARMV5TEPat<dag pattern, dag result> : Pat<pattern, result> {
+ list<Predicate> Predicates = [IsARM, HasV5TE];
+}
+// ARMV5MOPat - Same as ARMV5TEPat with UseMulOps.
+class ARMV5MOPat<dag pattern, dag result> : Pat<pattern, result> {
+ list<Predicate> Predicates = [IsARM, HasV5TE, UseMulOps];
+}
+class ARMV6Pat<dag pattern, dag result> : Pat<pattern, result> {
+ list<Predicate> Predicates = [IsARM, HasV6];
+}
+class VFPPat<dag pattern, dag result> : Pat<pattern, result> {
+ list<Predicate> Predicates = [HasVFP2];
+}
+class VFPNoNEONPat<dag pattern, dag result> : Pat<pattern, result> {
+ list<Predicate> Predicates = [HasVFP2, DontUseNEONForFP];
+}
+//===----------------------------------------------------------------------===//
+// Thumb Instruction Format Definitions.
+//
+
+class ThumbI<dag oops, dag iops, AddrMode am, int sz,
+ InstrItinClass itin, string asm, string cstr, list<dag> pattern>
+ : InstThumb<am, sz, IndexModeNone, ThumbFrm, GenericDomain, cstr, itin> {
+ let OutOperandList = oops;
+ let InOperandList = iops;
+ let AsmString = asm;
+ let Pattern = pattern;
+ list<Predicate> Predicates = [IsThumb];
+}
+
+// TI - Thumb instruction.
+class TI<dag oops, dag iops, InstrItinClass itin, string asm, list<dag> pattern>
+ : ThumbI<oops, iops, AddrModeNone, 2, itin, asm, "", pattern>;
+
+// Two-address instructions
+class TIt<dag oops, dag iops, InstrItinClass itin, string asm,
+ list<dag> pattern>
+ : ThumbI<oops, iops, AddrModeNone, 2, itin, asm, "$lhs = $dst",
+ pattern>;
+
+// tBL, tBX 32-bit instructions
+class TIx2<bits<5> opcod1, bits<2> opcod2, bit opcod3,
+ dag oops, dag iops, InstrItinClass itin, string asm,
+ list<dag> pattern>
+ : ThumbI<oops, iops, AddrModeNone, 4, itin, asm, "", pattern>,
+ Encoding {
+ let Inst{31-27} = opcod1;
+ let Inst{15-14} = opcod2;
+ let Inst{12} = opcod3;
+}
+
+// BR_JT instructions
+class TJTI<dag oops, dag iops, InstrItinClass itin, string asm,
+ list<dag> pattern>
+ : ThumbI<oops, iops, AddrModeNone, 0, itin, asm, "", pattern>;
+
+// Thumb1 only
+class Thumb1I<dag oops, dag iops, AddrMode am, int sz,
+ InstrItinClass itin, string asm, string cstr, list<dag> pattern>
+ : InstThumb<am, sz, IndexModeNone, ThumbFrm, GenericDomain, cstr, itin> {
+ let OutOperandList = oops;
+ let InOperandList = iops;
+ let AsmString = asm;
+ let Pattern = pattern;
+ list<Predicate> Predicates = [IsThumb, IsThumb1Only];
+}
+
+class T1I<dag oops, dag iops, InstrItinClass itin,
+ string asm, list<dag> pattern>
+ : Thumb1I<oops, iops, AddrModeNone, 2, itin, asm, "", pattern>;
+class T1Ix2<dag oops, dag iops, InstrItinClass itin,
+ string asm, list<dag> pattern>
+ : Thumb1I<oops, iops, AddrModeNone, 4, itin, asm, "", pattern>;
+
+// Two-address instructions
+class T1It<dag oops, dag iops, InstrItinClass itin,
+ string asm, string cstr, list<dag> pattern>
+ : Thumb1I<oops, iops, AddrModeNone, 2, itin,
+ asm, cstr, pattern>;
+
+// Thumb1 instruction that can either be predicated or set CPSR.
+class Thumb1sI<dag oops, dag iops, AddrMode am, int sz,
+ InstrItinClass itin,
+ string opc, string asm, string cstr, list<dag> pattern>
+ : InstThumb<am, sz, IndexModeNone, ThumbFrm, GenericDomain, cstr, itin> {
+ let OutOperandList = !con(oops, (outs s_cc_out:$s));
+ let InOperandList = !con(iops, (ins pred:$p));
+ let AsmString = !strconcat(opc, "${s}${p}", asm);
+ let Pattern = pattern;
+ let thumbArithFlagSetting = 1;
+ list<Predicate> Predicates = [IsThumb, IsThumb1Only];
+ let DecoderNamespace = "ThumbSBit";
+}
+
+class T1sI<dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : Thumb1sI<oops, iops, AddrModeNone, 2, itin, opc, asm, "", pattern>;
+
+// Two-address instructions
+class T1sIt<dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : Thumb1sI<oops, iops, AddrModeNone, 2, itin, opc, asm,
+ "$Rn = $Rdn", pattern>;
+
+// Thumb1 instruction that can be predicated.
+class Thumb1pI<dag oops, dag iops, AddrMode am, int sz,
+ InstrItinClass itin,
+ string opc, string asm, string cstr, list<dag> pattern>
+ : InstThumb<am, sz, IndexModeNone, ThumbFrm, GenericDomain, cstr, itin> {
+ let OutOperandList = oops;
+ let InOperandList = !con(iops, (ins pred:$p));
+ let AsmString = !strconcat(opc, "${p}", asm);
+ let Pattern = pattern;
+ list<Predicate> Predicates = [IsThumb, IsThumb1Only];
+}
+
+class T1pI<dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : Thumb1pI<oops, iops, AddrModeNone, 2, itin, opc, asm, "", pattern>;
+
+// Two-address instructions
+class T1pIt<dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : Thumb1pI<oops, iops, AddrModeNone, 2, itin, opc, asm,
+ "$Rn = $Rdn", pattern>;
+
+class T1pIs<dag oops, dag iops,
+ InstrItinClass itin, string opc, string asm, list<dag> pattern>
+ : Thumb1pI<oops, iops, AddrModeT1_s, 2, itin, opc, asm, "", pattern>;
+
+class Encoding16 : Encoding {
+ let Inst{31-16} = 0x0000;
+}
+
+// A6.2 16-bit Thumb instruction encoding
+class T1Encoding<bits<6> opcode> : Encoding16 {
+ let Inst{15-10} = opcode;
+}
+
+// A6.2.1 Shift (immediate), add, subtract, move, and compare encoding.
+class T1General<bits<5> opcode> : Encoding16 {
+ let Inst{15-14} = 0b00;
+ let Inst{13-9} = opcode;
+}
+
+// A6.2.2 Data-processing encoding.
+class T1DataProcessing<bits<4> opcode> : Encoding16 {
+ let Inst{15-10} = 0b010000;
+ let Inst{9-6} = opcode;
+}
+
+// A6.2.3 Special data instructions and branch and exchange encoding.
+class T1Special<bits<4> opcode> : Encoding16 {
+ let Inst{15-10} = 0b010001;
+ let Inst{9-6} = opcode;
+}
+
+// A6.2.4 Load/store single data item encoding.
+class T1LoadStore<bits<4> opA, bits<3> opB> : Encoding16 {
+ let Inst{15-12} = opA;
+ let Inst{11-9} = opB;
+}
+class T1LdStSP<bits<3> opB> : T1LoadStore<0b1001, opB>; // SP relative
+
+class T1BranchCond<bits<4> opcode> : Encoding16 {
+ let Inst{15-12} = opcode;
+}
+
+// Helper classes to encode Thumb1 loads and stores. For immediates, the
+// following bits are used for "opA" (see A6.2.4):
+//
+// 0b0110 => Immediate, 4 bytes
+// 0b1000 => Immediate, 2 bytes
+// 0b0111 => Immediate, 1 byte
+class T1pILdStEncode<bits<3> opcode, dag oops, dag iops, AddrMode am,
+ InstrItinClass itin, string opc, string asm,
+ list<dag> pattern>
+ : Thumb1pI<oops, iops, am, 2, itin, opc, asm, "", pattern>,
+ T1LoadStore<0b0101, opcode> {
+ bits<3> Rt;
+ bits<8> addr;
+ let Inst{8-6} = addr{5-3}; // Rm
+ let Inst{5-3} = addr{2-0}; // Rn
+ let Inst{2-0} = Rt;
+}
+class T1pILdStEncodeImm<bits<4> opA, bit opB, dag oops, dag iops, AddrMode am,
+ InstrItinClass itin, string opc, string asm,
+ list<dag> pattern>
+ : Thumb1pI<oops, iops, am, 2, itin, opc, asm, "", pattern>,
+ T1LoadStore<opA, {opB,?,?}> {
+ bits<3> Rt;
+ bits<8> addr;
+ let Inst{10-6} = addr{7-3}; // imm5
+ let Inst{5-3} = addr{2-0}; // Rn
+ let Inst{2-0} = Rt;
+}
+
+// A6.2.5 Miscellaneous 16-bit instructions encoding.
+class T1Misc<bits<7> opcode> : Encoding16 {
+ let Inst{15-12} = 0b1011;
+ let Inst{11-5} = opcode;
+}
+
+// Thumb2I - Thumb2 instruction. Almost all Thumb2 instructions are predicable.
+class Thumb2I<dag oops, dag iops, AddrMode am, int sz,
+ InstrItinClass itin,
+ string opc, string asm, string cstr, list<dag> pattern>
+ : InstARM<am, sz, IndexModeNone, ThumbFrm, GenericDomain, cstr, itin> {
+ let OutOperandList = oops;
+ let InOperandList = !con(iops, (ins pred:$p));
+ let AsmString = !strconcat(opc, "${p}", asm);
+ let Pattern = pattern;
+ list<Predicate> Predicates = [IsThumb2];
+ let DecoderNamespace = "Thumb2";
+}
+
+// Same as Thumb2I except it can optionally modify CPSR. Note it's modeled as an
+// input operand since by default it's a zero register. It will become an
+// implicit def once it's "flipped".
+//
+// FIXME: This uses unified syntax so {s} comes before {p}. We should make it
+// more consistent.
+class Thumb2sI<dag oops, dag iops, AddrMode am, int sz,
+ InstrItinClass itin,
+ string opc, string asm, string cstr, list<dag> pattern>
+ : InstARM<am, sz, IndexModeNone, ThumbFrm, GenericDomain, cstr, itin> {
+ bits<1> s; // condition-code set flag ('1' if the insn should set the flags)
+ let Inst{20} = s;
+
+ let OutOperandList = oops;
+ let InOperandList = !con(iops, (ins pred:$p, cc_out:$s));
+ let AsmString = !strconcat(opc, "${s}${p}", asm);
+ let Pattern = pattern;
+ list<Predicate> Predicates = [IsThumb2];
+ let DecoderNamespace = "Thumb2";
+}
+
+// Special cases
+class Thumb2XI<dag oops, dag iops, AddrMode am, int sz,
+ InstrItinClass itin,
+ string asm, string cstr, list<dag> pattern>
+ : InstARM<am, sz, IndexModeNone, ThumbFrm, GenericDomain, cstr, itin> {
+ let OutOperandList = oops;
+ let InOperandList = iops;
+ let AsmString = asm;
+ let Pattern = pattern;
+ list<Predicate> Predicates = [IsThumb2];
+ let DecoderNamespace = "Thumb2";
+}
+
+class ThumbXI<dag oops, dag iops, AddrMode am, int sz,
+ InstrItinClass itin,
+ string asm, string cstr, list<dag> pattern>
+ : InstARM<am, sz, IndexModeNone, ThumbFrm, GenericDomain, cstr, itin> {
+ let OutOperandList = oops;
+ let InOperandList = iops;
+ let AsmString = asm;
+ let Pattern = pattern;
+ list<Predicate> Predicates = [IsThumb, IsThumb1Only];
+ let DecoderNamespace = "Thumb";
+}
+
+class T2I<dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : Thumb2I<oops, iops, AddrModeNone, 4, itin, opc, asm, "", pattern>;
+class T2Ii12<dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : Thumb2I<oops, iops, AddrModeT2_i12, 4, itin, opc, asm, "",pattern>;
+class T2Ii8<dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : Thumb2I<oops, iops, AddrModeT2_i8, 4, itin, opc, asm, "", pattern>;
+class T2Iso<dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : Thumb2I<oops, iops, AddrModeT2_so, 4, itin, opc, asm, "", pattern>;
+class T2Ipc<dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : Thumb2I<oops, iops, AddrModeT2_pc, 4, itin, opc, asm, "", pattern>;
+class T2Ii8s4<bit P, bit W, bit isLoad, dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, string cstr, list<dag> pattern>
+ : Thumb2I<oops, iops, AddrModeT2_i8s4, 4, itin, opc, asm, cstr,
+ pattern> {
+ bits<4> Rt;
+ bits<4> Rt2;
+ bits<13> addr;
+ let Inst{31-25} = 0b1110100;
+ let Inst{24} = P;
+ let Inst{23} = addr{8};
+ let Inst{22} = 1;
+ let Inst{21} = W;
+ let Inst{20} = isLoad;
+ let Inst{19-16} = addr{12-9};
+ let Inst{15-12} = Rt{3-0};
+ let Inst{11-8} = Rt2{3-0};
+ let Inst{7-0} = addr{7-0};
+}
+class T2Ii8s4post<bit P, bit W, bit isLoad, dag oops, dag iops,
+ InstrItinClass itin, string opc, string asm, string cstr,
+ list<dag> pattern>
+ : Thumb2I<oops, iops, AddrModeT2_i8s4, 4, itin, opc, asm, cstr,
+ pattern> {
+ bits<4> Rt;
+ bits<4> Rt2;
+ bits<4> addr;
+ bits<9> imm;
+ let Inst{31-25} = 0b1110100;
+ let Inst{24} = P;
+ let Inst{23} = imm{8};
+ let Inst{22} = 1;
+ let Inst{21} = W;
+ let Inst{20} = isLoad;
+ let Inst{19-16} = addr;
+ let Inst{15-12} = Rt{3-0};
+ let Inst{11-8} = Rt2{3-0};
+ let Inst{7-0} = imm{7-0};
+}
+
+class T2sI<dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : Thumb2sI<oops, iops, AddrModeNone, 4, itin, opc, asm, "", pattern>;
+
+class T2XI<dag oops, dag iops, InstrItinClass itin,
+ string asm, list<dag> pattern>
+ : Thumb2XI<oops, iops, AddrModeNone, 4, itin, asm, "", pattern>;
+class T2JTI<dag oops, dag iops, InstrItinClass itin,
+ string asm, list<dag> pattern>
+ : Thumb2XI<oops, iops, AddrModeNone, 0, itin, asm, "", pattern>;
+
+// Move to/from coprocessor instructions
+class T2Cop<bits<4> opc, dag oops, dag iops, string opcstr, string asm,
+ list<dag> pattern>
+ : T2I <oops, iops, NoItinerary, opcstr, asm, pattern>, Requires<[IsThumb2]> {
+ let Inst{31-28} = opc;
+}
+
+// Two-address instructions
+class T2XIt<dag oops, dag iops, InstrItinClass itin,
+ string asm, string cstr, list<dag> pattern>
+ : Thumb2XI<oops, iops, AddrModeNone, 4, itin, asm, cstr, pattern>;
+
+// T2Ipreldst - Thumb2 pre-indexed load / store instructions.
+class T2Ipreldst<bit signed, bits<2> opcod, bit load, bit pre,
+ dag oops, dag iops,
+ AddrMode am, IndexMode im, InstrItinClass itin,
+ string opc, string asm, string cstr, list<dag> pattern>
+ : InstARM<am, 4, im, ThumbFrm, GenericDomain, cstr, itin> {
+ let OutOperandList = oops;
+ let InOperandList = !con(iops, (ins pred:$p));
+ let AsmString = !strconcat(opc, "${p}", asm);
+ let Pattern = pattern;
+ list<Predicate> Predicates = [IsThumb2];
+ let DecoderNamespace = "Thumb2";
+
+ bits<4> Rt;
+ bits<13> addr;
+ let Inst{31-27} = 0b11111;
+ let Inst{26-25} = 0b00;
+ let Inst{24} = signed;
+ let Inst{23} = 0;
+ let Inst{22-21} = opcod;
+ let Inst{20} = load;
+ let Inst{19-16} = addr{12-9};
+ let Inst{15-12} = Rt{3-0};
+ let Inst{11} = 1;
+ // (P, W) = (1, 1) Pre-indexed or (0, 1) Post-indexed
+ let Inst{10} = pre; // The P bit.
+ let Inst{9} = addr{8}; // Sign bit
+ let Inst{8} = 1; // The W bit.
+ let Inst{7-0} = addr{7-0};
+
+ let DecoderMethod = "DecodeT2LdStPre";
+}
+
+// T2Ipostldst - Thumb2 post-indexed load / store instructions.
+class T2Ipostldst<bit signed, bits<2> opcod, bit load, bit pre,
+ dag oops, dag iops,
+ AddrMode am, IndexMode im, InstrItinClass itin,
+ string opc, string asm, string cstr, list<dag> pattern>
+ : InstARM<am, 4, im, ThumbFrm, GenericDomain, cstr, itin> {
+ let OutOperandList = oops;
+ let InOperandList = !con(iops, (ins pred:$p));
+ let AsmString = !strconcat(opc, "${p}", asm);
+ let Pattern = pattern;
+ list<Predicate> Predicates = [IsThumb2];
+ let DecoderNamespace = "Thumb2";
+
+ bits<4> Rt;
+ bits<4> Rn;
+ bits<9> offset;
+ let Inst{31-27} = 0b11111;
+ let Inst{26-25} = 0b00;
+ let Inst{24} = signed;
+ let Inst{23} = 0;
+ let Inst{22-21} = opcod;
+ let Inst{20} = load;
+ let Inst{19-16} = Rn;
+ let Inst{15-12} = Rt{3-0};
+ let Inst{11} = 1;
+ // (P, W) = (1, 1) Pre-indexed or (0, 1) Post-indexed
+ let Inst{10} = pre; // The P bit.
+ let Inst{9} = offset{8}; // Sign bit
+ let Inst{8} = 1; // The W bit.
+ let Inst{7-0} = offset{7-0};
+
+ let DecoderMethod = "DecodeT2LdStPre";
+}
+
+// Tv5Pat - Same as Pat<>, but requires V5T Thumb mode.
+class Tv5Pat<dag pattern, dag result> : Pat<pattern, result> {
+ list<Predicate> Predicates = [IsThumb, IsThumb1Only, HasV5T];
+}
+
+// T1Pat - Same as Pat<>, but requires that the compiler be in Thumb1 mode.
+class T1Pat<dag pattern, dag result> : Pat<pattern, result> {
+ list<Predicate> Predicates = [IsThumb, IsThumb1Only];
+}
+
+// T2v6Pat - Same as Pat<>, but requires V6T2 Thumb2 mode.
+class T2v6Pat<dag pattern, dag result> : Pat<pattern, result> {
+ list<Predicate> Predicates = [IsThumb2, HasV6T2];
+}
+
+// T2Pat - Same as Pat<>, but requires that the compiler be in Thumb2 mode.
+class T2Pat<dag pattern, dag result> : Pat<pattern, result> {
+ list<Predicate> Predicates = [IsThumb2];
+}
+
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// ARM VFP Instruction templates.
+//
+
+// Almost all VFP instructions are predicable.
+class VFPI<dag oops, dag iops, AddrMode am, int sz,
+ IndexMode im, Format f, InstrItinClass itin,
+ string opc, string asm, string cstr, list<dag> pattern>
+ : InstARM<am, sz, im, f, VFPDomain, cstr, itin> {
+ bits<4> p;
+ let Inst{31-28} = p;
+ let OutOperandList = oops;
+ let InOperandList = !con(iops, (ins pred:$p));
+ let AsmString = !strconcat(opc, "${p}", asm);
+ let Pattern = pattern;
+ let PostEncoderMethod = "VFPThumb2PostEncoder";
+ let DecoderNamespace = "VFP";
+ list<Predicate> Predicates = [HasVFP2];
+}
+
+// Special cases
+class VFPXI<dag oops, dag iops, AddrMode am, int sz,
+ IndexMode im, Format f, InstrItinClass itin,
+ string asm, string cstr, list<dag> pattern>
+ : InstARM<am, sz, im, f, VFPDomain, cstr, itin> {
+ bits<4> p;
+ let Inst{31-28} = p;
+ let OutOperandList = oops;
+ let InOperandList = iops;
+ let AsmString = asm;
+ let Pattern = pattern;
+ let PostEncoderMethod = "VFPThumb2PostEncoder";
+ let DecoderNamespace = "VFP";
+ list<Predicate> Predicates = [HasVFP2];
+}
+
+class VFPAI<dag oops, dag iops, Format f, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : VFPI<oops, iops, AddrModeNone, 4, IndexModeNone, f, itin,
+ opc, asm, "", pattern> {
+ let PostEncoderMethod = "VFPThumb2PostEncoder";
+}
+
+// ARM VFP addrmode5 loads and stores
+class ADI5<bits<4> opcod1, bits<2> opcod2, dag oops, dag iops,
+ InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : VFPI<oops, iops, AddrMode5, 4, IndexModeNone,
+ VFPLdStFrm, itin, opc, asm, "", pattern> {
+ // Instruction operands.
+ bits<5> Dd;
+ bits<13> addr;
+
+ // Encode instruction operands.
+ let Inst{23} = addr{8}; // U (add = (U == '1'))
+ let Inst{22} = Dd{4};
+ let Inst{19-16} = addr{12-9}; // Rn
+ let Inst{15-12} = Dd{3-0};
+ let Inst{7-0} = addr{7-0}; // imm8
+
+ let Inst{27-24} = opcod1;
+ let Inst{21-20} = opcod2;
+ let Inst{11-9} = 0b101;
+ let Inst{8} = 1; // Double precision
+
+ // Loads & stores operate on both NEON and VFP pipelines.
+ let D = VFPNeonDomain;
+}
+
+class ASI5<bits<4> opcod1, bits<2> opcod2, dag oops, dag iops,
+ InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : VFPI<oops, iops, AddrMode5, 4, IndexModeNone,
+ VFPLdStFrm, itin, opc, asm, "", pattern> {
+ // Instruction operands.
+ bits<5> Sd;
+ bits<13> addr;
+
+ // Encode instruction operands.
+ let Inst{23} = addr{8}; // U (add = (U == '1'))
+ let Inst{22} = Sd{0};
+ let Inst{19-16} = addr{12-9}; // Rn
+ let Inst{15-12} = Sd{4-1};
+ let Inst{7-0} = addr{7-0}; // imm8
+
+ let Inst{27-24} = opcod1;
+ let Inst{21-20} = opcod2;
+ let Inst{11-9} = 0b101;
+ let Inst{8} = 0; // Single precision
+
+ // Loads & stores operate on both NEON and VFP pipelines.
+ let D = VFPNeonDomain;
+}
+
+// VFP Load / store multiple pseudo instructions.
+class PseudoVFPLdStM<dag oops, dag iops, InstrItinClass itin, string cstr,
+ list<dag> pattern>
+ : InstARM<AddrMode4, 4, IndexModeNone, Pseudo, VFPNeonDomain,
+ cstr, itin> {
+ let OutOperandList = oops;
+ let InOperandList = !con(iops, (ins pred:$p));
+ let Pattern = pattern;
+ list<Predicate> Predicates = [HasVFP2];
+}
+
+// Load / store multiple
+
+// Unknown precision
+class AXXI4<dag oops, dag iops, IndexMode im,
+ string asm, string cstr, list<dag> pattern>
+ : VFPXI<oops, iops, AddrMode4, 4, im,
+ VFPLdStFrm, NoItinerary, asm, cstr, pattern> {
+ // Instruction operands.
+ bits<4> Rn;
+ bits<13> regs;
+
+ // Encode instruction operands.
+ let Inst{19-16} = Rn;
+ let Inst{22} = 0;
+ let Inst{15-12} = regs{11-8};
+ let Inst{7-1} = regs{7-1};
+
+ let Inst{27-25} = 0b110;
+ let Inst{11-8} = 0b1011;
+ let Inst{0} = 1;
+}
+
+// Double precision
+class AXDI4<dag oops, dag iops, IndexMode im, InstrItinClass itin,
+ string asm, string cstr, list<dag> pattern>
+ : VFPXI<oops, iops, AddrMode4, 4, im,
+ VFPLdStMulFrm, itin, asm, cstr, pattern> {
+ // Instruction operands.
+ bits<4> Rn;
+ bits<13> regs;
+
+ // Encode instruction operands.
+ let Inst{19-16} = Rn;
+ let Inst{22} = regs{12};
+ let Inst{15-12} = regs{11-8};
+ let Inst{7-1} = regs{7-1};
+
+ let Inst{27-25} = 0b110;
+ let Inst{11-9} = 0b101;
+ let Inst{8} = 1; // Double precision
+ let Inst{0} = 0;
+}
+
+// Single Precision
+class AXSI4<dag oops, dag iops, IndexMode im, InstrItinClass itin,
+ string asm, string cstr, list<dag> pattern>
+ : VFPXI<oops, iops, AddrMode4, 4, im,
+ VFPLdStMulFrm, itin, asm, cstr, pattern> {
+ // Instruction operands.
+ bits<4> Rn;
+ bits<13> regs;
+
+ // Encode instruction operands.
+ let Inst{19-16} = Rn;
+ let Inst{22} = regs{8};
+ let Inst{15-12} = regs{12-9};
+ let Inst{7-0} = regs{7-0};
+
+ let Inst{27-25} = 0b110;
+ let Inst{11-9} = 0b101;
+ let Inst{8} = 0; // Single precision
+}
+
+// Double precision, unary
+class ADuI<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3, bits<2> opcod4,
+ bit opcod5, dag oops, dag iops, InstrItinClass itin, string opc,
+ string asm, list<dag> pattern>
+ : VFPAI<oops, iops, VFPUnaryFrm, itin, opc, asm, pattern> {
+ // Instruction operands.
+ bits<5> Dd;
+ bits<5> Dm;
+
+ // Encode instruction operands.
+ let Inst{3-0} = Dm{3-0};
+ let Inst{5} = Dm{4};
+ let Inst{15-12} = Dd{3-0};
+ let Inst{22} = Dd{4};
+
+ let Inst{27-23} = opcod1;
+ let Inst{21-20} = opcod2;
+ let Inst{19-16} = opcod3;
+ let Inst{11-9} = 0b101;
+ let Inst{8} = 1; // Double precision
+ let Inst{7-6} = opcod4;
+ let Inst{4} = opcod5;
+
+ let Predicates = [HasVFP2, HasDPVFP];
+}
+
+// Double precision, unary, not-predicated
+class ADuInp<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3, bits<2> opcod4,
+ bit opcod5, dag oops, dag iops, InstrItinClass itin,
+ string asm, list<dag> pattern>
+ : VFPXI<oops, iops, AddrModeNone, 4, IndexModeNone, VFPUnaryFrm, itin, asm, "", pattern> {
+ // Instruction operands.
+ bits<5> Dd;
+ bits<5> Dm;
+
+ let Inst{31-28} = 0b1111;
+
+ // Encode instruction operands.
+ let Inst{3-0} = Dm{3-0};
+ let Inst{5} = Dm{4};
+ let Inst{15-12} = Dd{3-0};
+ let Inst{22} = Dd{4};
+
+ let Inst{27-23} = opcod1;
+ let Inst{21-20} = opcod2;
+ let Inst{19-16} = opcod3;
+ let Inst{11-9} = 0b101;
+ let Inst{8} = 1; // Double precision
+ let Inst{7-6} = opcod4;
+ let Inst{4} = opcod5;
+}
+
+// Double precision, binary
+class ADbI<bits<5> opcod1, bits<2> opcod2, bit op6, bit op4, dag oops,
+ dag iops, InstrItinClass itin, string opc, string asm,
+ list<dag> pattern>
+ : VFPAI<oops, iops, VFPBinaryFrm, itin, opc, asm, pattern> {
+ // Instruction operands.
+ bits<5> Dd;
+ bits<5> Dn;
+ bits<5> Dm;
+
+ // Encode instruction operands.
+ let Inst{3-0} = Dm{3-0};
+ let Inst{5} = Dm{4};
+ let Inst{19-16} = Dn{3-0};
+ let Inst{7} = Dn{4};
+ let Inst{15-12} = Dd{3-0};
+ let Inst{22} = Dd{4};
+
+ let Inst{27-23} = opcod1;
+ let Inst{21-20} = opcod2;
+ let Inst{11-9} = 0b101;
+ let Inst{8} = 1; // Double precision
+ let Inst{6} = op6;
+ let Inst{4} = op4;
+
+ let Predicates = [HasVFP2, HasDPVFP];
+}
+
+// FP, binary, not predicated
+class ADbInp<bits<5> opcod1, bits<2> opcod2, bit opcod3, dag oops, dag iops,
+ InstrItinClass itin, string asm, list<dag> pattern>
+ : VFPXI<oops, iops, AddrModeNone, 4, IndexModeNone, VFPBinaryFrm, itin,
+ asm, "", pattern>
+{
+ // Instruction operands.
+ bits<5> Dd;
+ bits<5> Dn;
+ bits<5> Dm;
+
+ let Inst{31-28} = 0b1111;
+
+ // Encode instruction operands.
+ let Inst{3-0} = Dm{3-0};
+ let Inst{5} = Dm{4};
+ let Inst{19-16} = Dn{3-0};
+ let Inst{7} = Dn{4};
+ let Inst{15-12} = Dd{3-0};
+ let Inst{22} = Dd{4};
+
+ let Inst{27-23} = opcod1;
+ let Inst{21-20} = opcod2;
+ let Inst{11-9} = 0b101;
+ let Inst{8} = 1; // double precision
+ let Inst{6} = opcod3;
+ let Inst{4} = 0;
+
+ let Predicates = [HasVFP2, HasDPVFP];
+}
+
+// Single precision, unary, predicated
+class ASuI<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3, bits<2> opcod4,
+ bit opcod5, dag oops, dag iops, InstrItinClass itin, string opc,
+ string asm, list<dag> pattern>
+ : VFPAI<oops, iops, VFPUnaryFrm, itin, opc, asm, pattern> {
+ // Instruction operands.
+ bits<5> Sd;
+ bits<5> Sm;
+
+ // Encode instruction operands.
+ let Inst{3-0} = Sm{4-1};
+ let Inst{5} = Sm{0};
+ let Inst{15-12} = Sd{4-1};
+ let Inst{22} = Sd{0};
+
+ let Inst{27-23} = opcod1;
+ let Inst{21-20} = opcod2;
+ let Inst{19-16} = opcod3;
+ let Inst{11-9} = 0b101;
+ let Inst{8} = 0; // Single precision
+ let Inst{7-6} = opcod4;
+ let Inst{4} = opcod5;
+}
+
+// Single precision, unary, non-predicated
+class ASuInp<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3, bits<2> opcod4,
+ bit opcod5, dag oops, dag iops, InstrItinClass itin,
+ string asm, list<dag> pattern>
+ : VFPXI<oops, iops, AddrModeNone, 4, IndexModeNone,
+ VFPUnaryFrm, itin, asm, "", pattern> {
+ // Instruction operands.
+ bits<5> Sd;
+ bits<5> Sm;
+
+ let Inst{31-28} = 0b1111;
+
+ // Encode instruction operands.
+ let Inst{3-0} = Sm{4-1};
+ let Inst{5} = Sm{0};
+ let Inst{15-12} = Sd{4-1};
+ let Inst{22} = Sd{0};
+
+ let Inst{27-23} = opcod1;
+ let Inst{21-20} = opcod2;
+ let Inst{19-16} = opcod3;
+ let Inst{11-9} = 0b101;
+ let Inst{8} = 0; // Single precision
+ let Inst{7-6} = opcod4;
+ let Inst{4} = opcod5;
+}
+
+// Single precision unary, if no NEON. Same as ASuI except not available if
+// NEON is enabled.
+class ASuIn<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3, bits<2> opcod4,
+ bit opcod5, dag oops, dag iops, InstrItinClass itin, string opc,
+ string asm, list<dag> pattern>
+ : ASuI<opcod1, opcod2, opcod3, opcod4, opcod5, oops, iops, itin, opc, asm,
+ pattern> {
+ list<Predicate> Predicates = [HasVFP2,DontUseNEONForFP];
+}
+
+// Single precision, binary
+class ASbI<bits<5> opcod1, bits<2> opcod2, bit op6, bit op4, dag oops, dag iops,
+ InstrItinClass itin, string opc, string asm, list<dag> pattern>
+ : VFPAI<oops, iops, VFPBinaryFrm, itin, opc, asm, pattern> {
+ // Instruction operands.
+ bits<5> Sd;
+ bits<5> Sn;
+ bits<5> Sm;
+
+ // Encode instruction operands.
+ let Inst{3-0} = Sm{4-1};
+ let Inst{5} = Sm{0};
+ let Inst{19-16} = Sn{4-1};
+ let Inst{7} = Sn{0};
+ let Inst{15-12} = Sd{4-1};
+ let Inst{22} = Sd{0};
+
+ let Inst{27-23} = opcod1;
+ let Inst{21-20} = opcod2;
+ let Inst{11-9} = 0b101;
+ let Inst{8} = 0; // Single precision
+ let Inst{6} = op6;
+ let Inst{4} = op4;
+}
+
+// Single precision, binary, not predicated
+class ASbInp<bits<5> opcod1, bits<2> opcod2, bit opcod3, dag oops, dag iops,
+ InstrItinClass itin, string asm, list<dag> pattern>
+ : VFPXI<oops, iops, AddrModeNone, 4, IndexModeNone,
+ VFPBinaryFrm, itin, asm, "", pattern>
+{
+ // Instruction operands.
+ bits<5> Sd;
+ bits<5> Sn;
+ bits<5> Sm;
+
+ let Inst{31-28} = 0b1111;
+
+ // Encode instruction operands.
+ let Inst{3-0} = Sm{4-1};
+ let Inst{5} = Sm{0};
+ let Inst{19-16} = Sn{4-1};
+ let Inst{7} = Sn{0};
+ let Inst{15-12} = Sd{4-1};
+ let Inst{22} = Sd{0};
+
+ let Inst{27-23} = opcod1;
+ let Inst{21-20} = opcod2;
+ let Inst{11-9} = 0b101;
+ let Inst{8} = 0; // Single precision
+ let Inst{6} = opcod3;
+ let Inst{4} = 0;
+}
+
+// Single precision binary, if no NEON. Same as ASbI except not available if
+// NEON is enabled.
+class ASbIn<bits<5> opcod1, bits<2> opcod2, bit op6, bit op4, dag oops,
+ dag iops, InstrItinClass itin, string opc, string asm,
+ list<dag> pattern>
+ : ASbI<opcod1, opcod2, op6, op4, oops, iops, itin, opc, asm, pattern> {
+ list<Predicate> Predicates = [HasVFP2,DontUseNEONForFP];
+
+ // Instruction operands.
+ bits<5> Sd;
+ bits<5> Sn;
+ bits<5> Sm;
+
+ // Encode instruction operands.
+ let Inst{3-0} = Sm{4-1};
+ let Inst{5} = Sm{0};
+ let Inst{19-16} = Sn{4-1};
+ let Inst{7} = Sn{0};
+ let Inst{15-12} = Sd{4-1};
+ let Inst{22} = Sd{0};
+}
+
+// VFP conversion instructions
+class AVConv1I<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3, bits<4> opcod4,
+ dag oops, dag iops, InstrItinClass itin, string opc, string asm,
+ list<dag> pattern>
+ : VFPAI<oops, iops, VFPConv1Frm, itin, opc, asm, pattern> {
+ let Inst{27-23} = opcod1;
+ let Inst{21-20} = opcod2;
+ let Inst{19-16} = opcod3;
+ let Inst{11-8} = opcod4;
+ let Inst{6} = 1;
+ let Inst{4} = 0;
+}
+
+// VFP conversion between floating-point and fixed-point
+class AVConv1XI<bits<5> op1, bits<2> op2, bits<4> op3, bits<4> op4, bit op5,
+ dag oops, dag iops, InstrItinClass itin, string opc, string asm,
+ list<dag> pattern>
+ : AVConv1I<op1, op2, op3, op4, oops, iops, itin, opc, asm, pattern> {
+ bits<5> fbits;
+ // size (fixed-point number): sx == 0 ? 16 : 32
+ let Inst{7} = op5; // sx
+ let Inst{5} = fbits{0};
+ let Inst{3-0} = fbits{4-1};
+}
+
+// VFP conversion instructions, if no NEON
+class AVConv1In<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3, bits<4> opcod4,
+ dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : AVConv1I<opcod1, opcod2, opcod3, opcod4, oops, iops, itin, opc, asm,
+ pattern> {
+ list<Predicate> Predicates = [HasVFP2,DontUseNEONForFP];
+}
+
+class AVConvXI<bits<8> opcod1, bits<4> opcod2, dag oops, dag iops, Format f,
+ InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : VFPAI<oops, iops, f, itin, opc, asm, pattern> {
+ let Inst{27-20} = opcod1;
+ let Inst{11-8} = opcod2;
+ let Inst{4} = 1;
+}
+
+class AVConv2I<bits<8> opcod1, bits<4> opcod2, dag oops, dag iops,
+ InstrItinClass itin, string opc, string asm, list<dag> pattern>
+ : AVConvXI<opcod1, opcod2, oops, iops, VFPConv2Frm, itin, opc, asm, pattern>;
+
+class AVConv3I<bits<8> opcod1, bits<4> opcod2, dag oops, dag iops,
+ InstrItinClass itin, string opc, string asm, list<dag> pattern>
+ : AVConvXI<opcod1, opcod2, oops, iops, VFPConv3Frm, itin, opc, asm, pattern>;
+
+class AVConv4I<bits<8> opcod1, bits<4> opcod2, dag oops, dag iops,
+ InstrItinClass itin, string opc, string asm, list<dag> pattern>
+ : AVConvXI<opcod1, opcod2, oops, iops, VFPConv4Frm, itin, opc, asm, pattern>;
+
+class AVConv5I<bits<8> opcod1, bits<4> opcod2, dag oops, dag iops,
+ InstrItinClass itin, string opc, string asm, list<dag> pattern>
+ : AVConvXI<opcod1, opcod2, oops, iops, VFPConv5Frm, itin, opc, asm, pattern>;
+
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// ARM NEON Instruction templates.
+//
+
+class NeonI<dag oops, dag iops, AddrMode am, IndexMode im, Format f,
+ InstrItinClass itin, string opc, string dt, string asm, string cstr,
+ list<dag> pattern>
+ : InstARM<am, 4, im, f, NeonDomain, cstr, itin> {
+ let OutOperandList = oops;
+ let InOperandList = !con(iops, (ins pred:$p));
+ let AsmString = !strconcat(opc, "${p}", ".", dt, "\t", asm);
+ let Pattern = pattern;
+ list<Predicate> Predicates = [HasNEON];
+ let DecoderNamespace = "NEON";
+}
+
+// Same as NeonI except it does not have a "data type" specifier.
+class NeonXI<dag oops, dag iops, AddrMode am, IndexMode im, Format f,
+ InstrItinClass itin, string opc, string asm, string cstr,
+ list<dag> pattern>
+ : InstARM<am, 4, im, f, NeonDomain, cstr, itin> {
+ let OutOperandList = oops;
+ let InOperandList = !con(iops, (ins pred:$p));
+ let AsmString = !strconcat(opc, "${p}", "\t", asm);
+ let Pattern = pattern;
+ list<Predicate> Predicates = [HasNEON];
+ let DecoderNamespace = "NEON";
+}
+
+// Same as NeonI except it is not predicated
+class NeonInp<dag oops, dag iops, AddrMode am, IndexMode im, Format f,
+ InstrItinClass itin, string opc, string dt, string asm, string cstr,
+ list<dag> pattern>
+ : InstARM<am, 4, im, f, NeonDomain, cstr, itin> {
+ let OutOperandList = oops;
+ let InOperandList = iops;
+ let AsmString = !strconcat(opc, ".", dt, "\t", asm);
+ let Pattern = pattern;
+ list<Predicate> Predicates = [HasNEON];
+ let DecoderNamespace = "NEON";
+
+ let Inst{31-28} = 0b1111;
+}
+
+class NLdSt<bit op23, bits<2> op21_20, bits<4> op11_8, bits<4> op7_4,
+ dag oops, dag iops, InstrItinClass itin,
+ string opc, string dt, string asm, string cstr, list<dag> pattern>
+ : NeonI<oops, iops, AddrMode6, IndexModeNone, NLdStFrm, itin, opc, dt, asm,
+ cstr, pattern> {
+ let Inst{31-24} = 0b11110100;
+ let Inst{23} = op23;
+ let Inst{21-20} = op21_20;
+ let Inst{11-8} = op11_8;
+ let Inst{7-4} = op7_4;
+
+ let PostEncoderMethod = "NEONThumb2LoadStorePostEncoder";
+ let DecoderNamespace = "NEONLoadStore";
+
+ bits<5> Vd;
+ bits<6> Rn;
+ bits<4> Rm;
+
+ let Inst{22} = Vd{4};
+ let Inst{15-12} = Vd{3-0};
+ let Inst{19-16} = Rn{3-0};
+ let Inst{3-0} = Rm{3-0};
+}
+
+class NLdStLn<bit op23, bits<2> op21_20, bits<4> op11_8, bits<4> op7_4,
+ dag oops, dag iops, InstrItinClass itin,
+ string opc, string dt, string asm, string cstr, list<dag> pattern>
+ : NLdSt<op23, op21_20, op11_8, op7_4, oops, iops, itin, opc,
+ dt, asm, cstr, pattern> {
+ bits<3> lane;
+}
+
+class PseudoNLdSt<dag oops, dag iops, InstrItinClass itin, string cstr>
+ : InstARM<AddrMode6, 4, IndexModeNone, Pseudo, NeonDomain, cstr,
+ itin> {
+ let OutOperandList = oops;
+ let InOperandList = !con(iops, (ins pred:$p));
+ list<Predicate> Predicates = [HasNEON];
+}
+
+class PseudoNeonI<dag oops, dag iops, InstrItinClass itin, string cstr,
+ list<dag> pattern>
+ : InstARM<AddrModeNone, 4, IndexModeNone, Pseudo, NeonDomain, cstr,
+ itin> {
+ let OutOperandList = oops;
+ let InOperandList = !con(iops, (ins pred:$p));
+ let Pattern = pattern;
+ list<Predicate> Predicates = [HasNEON];
+}
+
+class NDataI<dag oops, dag iops, Format f, InstrItinClass itin,
+ string opc, string dt, string asm, string cstr, list<dag> pattern>
+ : NeonI<oops, iops, AddrModeNone, IndexModeNone, f, itin, opc, dt, asm, cstr,
+ pattern> {
+ let Inst{31-25} = 0b1111001;
+ let PostEncoderMethod = "NEONThumb2DataIPostEncoder";
+ let DecoderNamespace = "NEONData";
+}
+
+class NDataXI<dag oops, dag iops, Format f, InstrItinClass itin,
+ string opc, string asm, string cstr, list<dag> pattern>
+ : NeonXI<oops, iops, AddrModeNone, IndexModeNone, f, itin, opc, asm,
+ cstr, pattern> {
+ let Inst{31-25} = 0b1111001;
+ let PostEncoderMethod = "NEONThumb2DataIPostEncoder";
+ let DecoderNamespace = "NEONData";
+}
+
+// NEON "one register and a modified immediate" format.
+class N1ModImm<bit op23, bits<3> op21_19, bits<4> op11_8, bit op7, bit op6,
+ bit op5, bit op4,
+ dag oops, dag iops, InstrItinClass itin,
+ string opc, string dt, string asm, string cstr,
+ list<dag> pattern>
+ : NDataI<oops, iops, N1RegModImmFrm, itin, opc, dt, asm, cstr, pattern> {
+ let Inst{23} = op23;
+ let Inst{21-19} = op21_19;
+ let Inst{11-8} = op11_8;
+ let Inst{7} = op7;
+ let Inst{6} = op6;
+ let Inst{5} = op5;
+ let Inst{4} = op4;
+
+ // Instruction operands.
+ bits<5> Vd;
+ bits<13> SIMM;
+
+ let Inst{15-12} = Vd{3-0};
+ let Inst{22} = Vd{4};
+ let Inst{24} = SIMM{7};
+ let Inst{18-16} = SIMM{6-4};
+ let Inst{3-0} = SIMM{3-0};
+ let DecoderMethod = "DecodeNEONModImmInstruction";
+}
+
+// NEON 2 vector register format.
+class N2V<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16,
+ bits<5> op11_7, bit op6, bit op4,
+ dag oops, dag iops, InstrItinClass itin,
+ string opc, string dt, string asm, string cstr, list<dag> pattern>
+ : NDataI<oops, iops, N2RegFrm, itin, opc, dt, asm, cstr, pattern> {
+ let Inst{24-23} = op24_23;
+ let Inst{21-20} = op21_20;
+ let Inst{19-18} = op19_18;
+ let Inst{17-16} = op17_16;
+ let Inst{11-7} = op11_7;
+ let Inst{6} = op6;
+ let Inst{4} = op4;
+
+ // Instruction operands.
+ bits<5> Vd;
+ bits<5> Vm;
+
+ let Inst{15-12} = Vd{3-0};
+ let Inst{22} = Vd{4};
+ let Inst{3-0} = Vm{3-0};
+ let Inst{5} = Vm{4};
+}
+
+// Same as N2V but not predicated.
+class N2Vnp<bits<2> op19_18, bits<2> op17_16, bits<3> op10_8, bit op7, bit op6,
+ dag oops, dag iops, InstrItinClass itin, string OpcodeStr,
+ string Dt, list<dag> pattern>
+ : NeonInp<oops, iops, AddrModeNone, IndexModeNone, N2RegFrm, itin,
+ OpcodeStr, Dt, "$Vd, $Vm", "", pattern> {
+ bits<5> Vd;
+ bits<5> Vm;
+
+ // Encode instruction operands
+ let Inst{22} = Vd{4};
+ let Inst{15-12} = Vd{3-0};
+ let Inst{5} = Vm{4};
+ let Inst{3-0} = Vm{3-0};
+
+ // Encode constant bits
+ let Inst{27-23} = 0b00111;
+ let Inst{21-20} = 0b11;
+ let Inst{19-18} = op19_18;
+ let Inst{17-16} = op17_16;
+ let Inst{11} = 0;
+ let Inst{10-8} = op10_8;
+ let Inst{7} = op7;
+ let Inst{6} = op6;
+ let Inst{4} = 0;
+
+ let DecoderNamespace = "NEON";
+}
+
+// Same as N2V except it doesn't have a datatype suffix.
+class N2VX<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16,
+ bits<5> op11_7, bit op6, bit op4,
+ dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, string cstr, list<dag> pattern>
+ : NDataXI<oops, iops, N2RegFrm, itin, opc, asm, cstr, pattern> {
+ let Inst{24-23} = op24_23;
+ let Inst{21-20} = op21_20;
+ let Inst{19-18} = op19_18;
+ let Inst{17-16} = op17_16;
+ let Inst{11-7} = op11_7;
+ let Inst{6} = op6;
+ let Inst{4} = op4;
+
+ // Instruction operands.
+ bits<5> Vd;
+ bits<5> Vm;
+
+ let Inst{15-12} = Vd{3-0};
+ let Inst{22} = Vd{4};
+ let Inst{3-0} = Vm{3-0};
+ let Inst{5} = Vm{4};
+}
+
+// NEON 2 vector register with immediate.
+class N2VImm<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4,
+ dag oops, dag iops, Format f, InstrItinClass itin,
+ string opc, string dt, string asm, string cstr, list<dag> pattern>
+ : NDataI<oops, iops, f, itin, opc, dt, asm, cstr, pattern> {
+ let Inst{24} = op24;
+ let Inst{23} = op23;
+ let Inst{11-8} = op11_8;
+ let Inst{7} = op7;
+ let Inst{6} = op6;
+ let Inst{4} = op4;
+
+ // Instruction operands.
+ bits<5> Vd;
+ bits<5> Vm;
+ bits<6> SIMM;
+
+ let Inst{15-12} = Vd{3-0};
+ let Inst{22} = Vd{4};
+ let Inst{3-0} = Vm{3-0};
+ let Inst{5} = Vm{4};
+ let Inst{21-16} = SIMM{5-0};
+}
+
+// NEON 3 vector register format.
+
+class N3VCommon<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op6,
+ bit op4, dag oops, dag iops, Format f, InstrItinClass itin,
+ string opc, string dt, string asm, string cstr,
+ list<dag> pattern>
+ : NDataI<oops, iops, f, itin, opc, dt, asm, cstr, pattern> {
+ let Inst{24} = op24;
+ let Inst{23} = op23;
+ let Inst{21-20} = op21_20;
+ let Inst{11-8} = op11_8;
+ let Inst{6} = op6;
+ let Inst{4} = op4;
+}
+
+class N3V<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op6, bit op4,
+ dag oops, dag iops, Format f, InstrItinClass itin,
+ string opc, string dt, string asm, string cstr, list<dag> pattern>
+ : N3VCommon<op24, op23, op21_20, op11_8, op6, op4,
+ oops, iops, f, itin, opc, dt, asm, cstr, pattern> {
+ // Instruction operands.
+ bits<5> Vd;
+ bits<5> Vn;
+ bits<5> Vm;
+
+ let Inst{15-12} = Vd{3-0};
+ let Inst{22} = Vd{4};
+ let Inst{19-16} = Vn{3-0};
+ let Inst{7} = Vn{4};
+ let Inst{3-0} = Vm{3-0};
+ let Inst{5} = Vm{4};
+}
+
+class N3Vnp<bits<5> op27_23, bits<2> op21_20, bits<4> op11_8, bit op6,
+ bit op4, dag oops, dag iops,Format f, InstrItinClass itin,
+ string OpcodeStr, string Dt, list<dag> pattern>
+ : NeonInp<oops, iops, AddrModeNone, IndexModeNone, f, itin, OpcodeStr,
+ Dt, "$Vd, $Vn, $Vm", "", pattern> {
+ bits<5> Vd;
+ bits<5> Vn;
+ bits<5> Vm;
+
+ // Encode instruction operands
+ let Inst{22} = Vd{4};
+ let Inst{15-12} = Vd{3-0};
+ let Inst{19-16} = Vn{3-0};
+ let Inst{7} = Vn{4};
+ let Inst{5} = Vm{4};
+ let Inst{3-0} = Vm{3-0};
+
+ // Encode constant bits
+ let Inst{27-23} = op27_23;
+ let Inst{21-20} = op21_20;
+ let Inst{11-8} = op11_8;
+ let Inst{6} = op6;
+ let Inst{4} = op4;
+}
+
+class N3VLane32<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op6,
+ bit op4, dag oops, dag iops, Format f, InstrItinClass itin,
+ string opc, string dt, string asm, string cstr,
+ list<dag> pattern>
+ : N3VCommon<op24, op23, op21_20, op11_8, op6, op4,
+ oops, iops, f, itin, opc, dt, asm, cstr, pattern> {
+
+ // Instruction operands.
+ bits<5> Vd;
+ bits<5> Vn;
+ bits<5> Vm;
+ bit lane;
+
+ let Inst{15-12} = Vd{3-0};
+ let Inst{22} = Vd{4};
+ let Inst{19-16} = Vn{3-0};
+ let Inst{7} = Vn{4};
+ let Inst{3-0} = Vm{3-0};
+ let Inst{5} = lane;
+}
+
+class N3VLane16<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op6,
+ bit op4, dag oops, dag iops, Format f, InstrItinClass itin,
+ string opc, string dt, string asm, string cstr,
+ list<dag> pattern>
+ : N3VCommon<op24, op23, op21_20, op11_8, op6, op4,
+ oops, iops, f, itin, opc, dt, asm, cstr, pattern> {
+
+ // Instruction operands.
+ bits<5> Vd;
+ bits<5> Vn;
+ bits<5> Vm;
+ bits<2> lane;
+
+ let Inst{15-12} = Vd{3-0};
+ let Inst{22} = Vd{4};
+ let Inst{19-16} = Vn{3-0};
+ let Inst{7} = Vn{4};
+ let Inst{2-0} = Vm{2-0};
+ let Inst{5} = lane{1};
+ let Inst{3} = lane{0};
+}
+
+// Same as N3V except it doesn't have a data type suffix.
+class N3VX<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op6,
+ bit op4,
+ dag oops, dag iops, Format f, InstrItinClass itin,
+ string opc, string asm, string cstr, list<dag> pattern>
+ : NDataXI<oops, iops, f, itin, opc, asm, cstr, pattern> {
+ let Inst{24} = op24;
+ let Inst{23} = op23;
+ let Inst{21-20} = op21_20;
+ let Inst{11-8} = op11_8;
+ let Inst{6} = op6;
+ let Inst{4} = op4;
+
+ // Instruction operands.
+ bits<5> Vd;
+ bits<5> Vn;
+ bits<5> Vm;
+
+ let Inst{15-12} = Vd{3-0};
+ let Inst{22} = Vd{4};
+ let Inst{19-16} = Vn{3-0};
+ let Inst{7} = Vn{4};
+ let Inst{3-0} = Vm{3-0};
+ let Inst{5} = Vm{4};
+}
+
+// NEON VMOVs between scalar and core registers.
+class NVLaneOp<bits<8> opcod1, bits<4> opcod2, bits<2> opcod3,
+ dag oops, dag iops, Format f, InstrItinClass itin,
+ string opc, string dt, string asm, list<dag> pattern>
+ : InstARM<AddrModeNone, 4, IndexModeNone, f, NeonDomain,
+ "", itin> {
+ let Inst{27-20} = opcod1;
+ let Inst{11-8} = opcod2;
+ let Inst{6-5} = opcod3;
+ let Inst{4} = 1;
+ // A8.6.303, A8.6.328, A8.6.329
+ let Inst{3-0} = 0b0000;
+
+ let OutOperandList = oops;
+ let InOperandList = !con(iops, (ins pred:$p));
+ let AsmString = !strconcat(opc, "${p}", ".", dt, "\t", asm);
+ let Pattern = pattern;
+ list<Predicate> Predicates = [HasNEON];
+
+ let PostEncoderMethod = "NEONThumb2DupPostEncoder";
+ let DecoderNamespace = "NEONDup";
+
+ bits<5> V;
+ bits<4> R;
+ bits<4> p;
+ bits<4> lane;
+
+ let Inst{31-28} = p{3-0};
+ let Inst{7} = V{4};
+ let Inst{19-16} = V{3-0};
+ let Inst{15-12} = R{3-0};
+}
+class NVGetLane<bits<8> opcod1, bits<4> opcod2, bits<2> opcod3,
+ dag oops, dag iops, InstrItinClass itin,
+ string opc, string dt, string asm, list<dag> pattern>
+ : NVLaneOp<opcod1, opcod2, opcod3, oops, iops, NGetLnFrm, itin,
+ opc, dt, asm, pattern>;
+class NVSetLane<bits<8> opcod1, bits<4> opcod2, bits<2> opcod3,
+ dag oops, dag iops, InstrItinClass itin,
+ string opc, string dt, string asm, list<dag> pattern>
+ : NVLaneOp<opcod1, opcod2, opcod3, oops, iops, NSetLnFrm, itin,
+ opc, dt, asm, pattern>;
+class NVDup<bits<8> opcod1, bits<4> opcod2, bits<2> opcod3,
+ dag oops, dag iops, InstrItinClass itin,
+ string opc, string dt, string asm, list<dag> pattern>
+ : NVLaneOp<opcod1, opcod2, opcod3, oops, iops, NDupFrm, itin,
+ opc, dt, asm, pattern>;
+
+// Vector Duplicate Lane (from scalar to all elements)
+class NVDupLane<bits<4> op19_16, bit op6, dag oops, dag iops,
+ InstrItinClass itin, string opc, string dt, string asm,
+ list<dag> pattern>
+ : NDataI<oops, iops, NVDupLnFrm, itin, opc, dt, asm, "", pattern> {
+ let Inst{24-23} = 0b11;
+ let Inst{21-20} = 0b11;
+ let Inst{19-16} = op19_16;
+ let Inst{11-7} = 0b11000;
+ let Inst{6} = op6;
+ let Inst{4} = 0;
+
+ bits<5> Vd;
+ bits<5> Vm;
+
+ let Inst{22} = Vd{4};
+ let Inst{15-12} = Vd{3-0};
+ let Inst{5} = Vm{4};
+ let Inst{3-0} = Vm{3-0};
+}
+
+// NEONFPPat - Same as Pat<>, but requires that the compiler be using NEON
+// for single-precision FP.
+class NEONFPPat<dag pattern, dag result> : Pat<pattern, result> {
+ list<Predicate> Predicates = [HasNEON,UseNEONForFP];
+}
+
+// VFP/NEON Instruction aliases for type suffices.
+class VFPDataTypeInstAlias<string opc, string dt, string asm, dag Result> :
+ InstAlias<!strconcat(opc, dt, "\t", asm), Result>, Requires<[HasVFP2]>;
+
+multiclass VFPDTAnyInstAlias<string opc, string asm, dag Result> {
+ def : VFPDataTypeInstAlias<opc, ".8", asm, Result>;
+ def : VFPDataTypeInstAlias<opc, ".16", asm, Result>;
+ def : VFPDataTypeInstAlias<opc, ".32", asm, Result>;
+ def : VFPDataTypeInstAlias<opc, ".64", asm, Result>;
+}
+
+multiclass NEONDTAnyInstAlias<string opc, string asm, dag Result> {
+ let Predicates = [HasNEON] in {
+ def : VFPDataTypeInstAlias<opc, ".8", asm, Result>;
+ def : VFPDataTypeInstAlias<opc, ".16", asm, Result>;
+ def : VFPDataTypeInstAlias<opc, ".32", asm, Result>;
+ def : VFPDataTypeInstAlias<opc, ".64", asm, Result>;
+}
+}
+
+// The same alias classes using AsmPseudo instead, for the more complex
+// stuff in NEON that InstAlias can't quite handle.
+// Note that we can't use anonymous defm references here like we can
+// above, as we care about the ultimate instruction enum names generated, unlike
+// for instalias defs.
+class NEONDataTypeAsmPseudoInst<string opc, string dt, string asm, dag iops> :
+ AsmPseudoInst<!strconcat(opc, dt, "\t", asm), iops>, Requires<[HasNEON]>;
+
+// Data type suffix token aliases. Implements Table A7-3 in the ARM ARM.
+def : TokenAlias<".s8", ".i8">;
+def : TokenAlias<".u8", ".i8">;
+def : TokenAlias<".s16", ".i16">;
+def : TokenAlias<".u16", ".i16">;
+def : TokenAlias<".s32", ".i32">;
+def : TokenAlias<".u32", ".i32">;
+def : TokenAlias<".s64", ".i64">;
+def : TokenAlias<".u64", ".i64">;
+
+def : TokenAlias<".i8", ".8">;
+def : TokenAlias<".i16", ".16">;
+def : TokenAlias<".i32", ".32">;
+def : TokenAlias<".i64", ".64">;
+
+def : TokenAlias<".p8", ".8">;
+def : TokenAlias<".p16", ".16">;
+
+def : TokenAlias<".f32", ".32">;
+def : TokenAlias<".f64", ".64">;
+def : TokenAlias<".f", ".f32">;
+def : TokenAlias<".d", ".f64">;
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.cpp
new file mode 100644
index 0000000..cf973d6
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.cpp
@@ -0,0 +1,134 @@
+//===-- ARMInstrInfo.cpp - ARM Instruction Information --------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the ARM implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARMInstrInfo.h"
+#include "ARM.h"
+#include "ARMConstantPoolValue.h"
+#include "ARMMachineFunctionInfo.h"
+#include "ARMTargetMachine.h"
+#include "MCTargetDesc/ARMAddressingModes.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCInst.h"
+using namespace llvm;
+
+ARMInstrInfo::ARMInstrInfo(const ARMSubtarget &STI)
+ : ARMBaseInstrInfo(STI), RI() {}
+
+/// getNoopForMachoTarget - Return the noop instruction to use for a noop.
+void ARMInstrInfo::getNoopForMachoTarget(MCInst &NopInst) const {
+ if (hasNOP()) {
+ NopInst.setOpcode(ARM::HINT);
+ NopInst.addOperand(MCOperand::createImm(0));
+ NopInst.addOperand(MCOperand::createImm(ARMCC::AL));
+ NopInst.addOperand(MCOperand::createReg(0));
+ } else {
+ NopInst.setOpcode(ARM::MOVr);
+ NopInst.addOperand(MCOperand::createReg(ARM::R0));
+ NopInst.addOperand(MCOperand::createReg(ARM::R0));
+ NopInst.addOperand(MCOperand::createImm(ARMCC::AL));
+ NopInst.addOperand(MCOperand::createReg(0));
+ NopInst.addOperand(MCOperand::createReg(0));
+ }
+}
+
+unsigned ARMInstrInfo::getUnindexedOpcode(unsigned Opc) const {
+ switch (Opc) {
+ default:
+ break;
+ case ARM::LDR_PRE_IMM:
+ case ARM::LDR_PRE_REG:
+ case ARM::LDR_POST_IMM:
+ case ARM::LDR_POST_REG:
+ return ARM::LDRi12;
+ case ARM::LDRH_PRE:
+ case ARM::LDRH_POST:
+ return ARM::LDRH;
+ case ARM::LDRB_PRE_IMM:
+ case ARM::LDRB_PRE_REG:
+ case ARM::LDRB_POST_IMM:
+ case ARM::LDRB_POST_REG:
+ return ARM::LDRBi12;
+ case ARM::LDRSH_PRE:
+ case ARM::LDRSH_POST:
+ return ARM::LDRSH;
+ case ARM::LDRSB_PRE:
+ case ARM::LDRSB_POST:
+ return ARM::LDRSB;
+ case ARM::STR_PRE_IMM:
+ case ARM::STR_PRE_REG:
+ case ARM::STR_POST_IMM:
+ case ARM::STR_POST_REG:
+ return ARM::STRi12;
+ case ARM::STRH_PRE:
+ case ARM::STRH_POST:
+ return ARM::STRH;
+ case ARM::STRB_PRE_IMM:
+ case ARM::STRB_PRE_REG:
+ case ARM::STRB_POST_IMM:
+ case ARM::STRB_POST_REG:
+ return ARM::STRBi12;
+ }
+
+ return 0;
+}
+
+void ARMInstrInfo::expandLoadStackGuard(MachineBasicBlock::iterator MI,
+ Reloc::Model RM) const {
+ MachineFunction &MF = *MI->getParent()->getParent();
+ const ARMSubtarget &Subtarget = MF.getSubtarget<ARMSubtarget>();
+
+ if (!Subtarget.useMovt(MF)) {
+ if (RM == Reloc::PIC_)
+ expandLoadStackGuardBase(MI, ARM::LDRLIT_ga_pcrel, ARM::LDRi12, RM);
+ else
+ expandLoadStackGuardBase(MI, ARM::LDRLIT_ga_abs, ARM::LDRi12, RM);
+ return;
+ }
+
+ if (RM != Reloc::PIC_) {
+ expandLoadStackGuardBase(MI, ARM::MOVi32imm, ARM::LDRi12, RM);
+ return;
+ }
+
+ const GlobalValue *GV =
+ cast<GlobalValue>((*MI->memoperands_begin())->getValue());
+
+ if (!Subtarget.GVIsIndirectSymbol(GV, RM)) {
+ expandLoadStackGuardBase(MI, ARM::MOV_ga_pcrel, ARM::LDRi12, RM);
+ return;
+ }
+
+ MachineBasicBlock &MBB = *MI->getParent();
+ DebugLoc DL = MI->getDebugLoc();
+ unsigned Reg = MI->getOperand(0).getReg();
+ MachineInstrBuilder MIB;
+
+ MIB = BuildMI(MBB, MI, DL, get(ARM::MOV_ga_pcrel_ldr), Reg)
+ .addGlobalAddress(GV, 0, ARMII::MO_NONLAZY);
+ unsigned Flag = MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant;
+ MachineMemOperand *MMO = MBB.getParent()->getMachineMemOperand(
+ MachinePointerInfo::getGOT(*MBB.getParent()), Flag, 4, 4);
+ MIB.addMemOperand(MMO);
+ MIB = BuildMI(MBB, MI, DL, get(ARM::LDRi12), Reg);
+ MIB.addReg(Reg, RegState::Kill).addImm(0);
+ MIB.setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
+ AddDefaultPred(MIB);
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrInfo.h b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.h
new file mode 100644
index 0000000..90f34ea
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.h
@@ -0,0 +1,48 @@
+//===-- ARMInstrInfo.h - ARM Instruction Information ------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the ARM implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_ARM_ARMINSTRINFO_H
+#define LLVM_LIB_TARGET_ARM_ARMINSTRINFO_H
+
+#include "ARMBaseInstrInfo.h"
+#include "ARMRegisterInfo.h"
+
+namespace llvm {
+ class ARMSubtarget;
+
+class ARMInstrInfo : public ARMBaseInstrInfo {
+ ARMRegisterInfo RI;
+public:
+ explicit ARMInstrInfo(const ARMSubtarget &STI);
+
+ /// getNoopForMachoTarget - Return the noop instruction to use for a noop.
+ void getNoopForMachoTarget(MCInst &NopInst) const override;
+
+ // Return the non-pre/post incrementing version of 'Opc'. Return 0
+ // if there is not such an opcode.
+ unsigned getUnindexedOpcode(unsigned Opc) const override;
+
+ /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As
+ /// such, whenever a client has an instance of instruction info, it should
+ /// always be able to get register info as well (through this method).
+ ///
+ const ARMRegisterInfo &getRegisterInfo() const override { return RI; }
+
+private:
+ void expandLoadStackGuard(MachineBasicBlock::iterator MI,
+ Reloc::Model RM) const override;
+};
+
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td
new file mode 100644
index 0000000..b9de83b
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td
@@ -0,0 +1,5756 @@
+//===- ARMInstrInfo.td - Target Description for ARM Target -*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the ARM instructions in TableGen format.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// ARM specific DAG Nodes.
+//
+
+// Type profiles.
+def SDT_ARMCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32> ]>;
+def SDT_ARMCallSeqEnd : SDCallSeqEnd<[ SDTCisVT<0, i32>, SDTCisVT<1, i32> ]>;
+def SDT_ARMStructByVal : SDTypeProfile<0, 4,
+ [SDTCisVT<0, i32>, SDTCisVT<1, i32>,
+ SDTCisVT<2, i32>, SDTCisVT<3, i32>]>;
+
+def SDT_ARMSaveCallPC : SDTypeProfile<0, 1, []>;
+
+def SDT_ARMcall : SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>;
+
+def SDT_ARMCMov : SDTypeProfile<1, 3,
+ [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>,
+ SDTCisVT<3, i32>]>;
+
+def SDT_ARMBrcond : SDTypeProfile<0, 2,
+ [SDTCisVT<0, OtherVT>, SDTCisVT<1, i32>]>;
+
+def SDT_ARMBrJT : SDTypeProfile<0, 2,
+ [SDTCisPtrTy<0>, SDTCisVT<1, i32>]>;
+
+def SDT_ARMBr2JT : SDTypeProfile<0, 3,
+ [SDTCisPtrTy<0>, SDTCisVT<1, i32>,
+ SDTCisVT<2, i32>]>;
+
+def SDT_ARMBCC_i64 : SDTypeProfile<0, 6,
+ [SDTCisVT<0, i32>,
+ SDTCisVT<1, i32>, SDTCisVT<2, i32>,
+ SDTCisVT<3, i32>, SDTCisVT<4, i32>,
+ SDTCisVT<5, OtherVT>]>;
+
+def SDT_ARMAnd : SDTypeProfile<1, 2,
+ [SDTCisVT<0, i32>, SDTCisVT<1, i32>,
+ SDTCisVT<2, i32>]>;
+
+def SDT_ARMCmp : SDTypeProfile<0, 2, [SDTCisSameAs<0, 1>]>;
+
+def SDT_ARMPICAdd : SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>,
+ SDTCisPtrTy<1>, SDTCisVT<2, i32>]>;
+
+def SDT_ARMThreadPointer : SDTypeProfile<1, 0, [SDTCisPtrTy<0>]>;
+def SDT_ARMEH_SJLJ_Setjmp : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisPtrTy<1>,
+ SDTCisInt<2>]>;
+def SDT_ARMEH_SJLJ_Longjmp: SDTypeProfile<0, 2, [SDTCisPtrTy<0>, SDTCisInt<1>]>;
+def SDT_ARMEH_SJLJ_SetupDispatch: SDTypeProfile<0, 0, []>;
+
+def SDT_ARMMEMBARRIER : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
+
+def SDT_ARMPREFETCH : SDTypeProfile<0, 3, [SDTCisPtrTy<0>, SDTCisSameAs<1, 2>,
+ SDTCisInt<1>]>;
+
+def SDT_ARMTCRET : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>;
+
+def SDT_ARMBFI : SDTypeProfile<1, 3, [SDTCisVT<0, i32>, SDTCisVT<1, i32>,
+ SDTCisVT<2, i32>, SDTCisVT<3, i32>]>;
+
+def SDT_WIN__DBZCHK : SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>;
+
+def SDT_ARMMEMCPY : SDTypeProfile<2, 3, [SDTCisVT<0, i32>, SDTCisVT<1, i32>,
+ SDTCisVT<2, i32>, SDTCisVT<3, i32>,
+ SDTCisVT<4, i32>]>;
+
+def SDTBinaryArithWithFlags : SDTypeProfile<2, 2,
+ [SDTCisSameAs<0, 2>,
+ SDTCisSameAs<0, 3>,
+ SDTCisInt<0>, SDTCisVT<1, i32>]>;
+
+// SDTBinaryArithWithFlagsInOut - RES1, CPSR = op LHS, RHS, CPSR
+def SDTBinaryArithWithFlagsInOut : SDTypeProfile<2, 3,
+ [SDTCisSameAs<0, 2>,
+ SDTCisSameAs<0, 3>,
+ SDTCisInt<0>,
+ SDTCisVT<1, i32>,
+ SDTCisVT<4, i32>]>;
+
+def SDT_ARM64bitmlal : SDTypeProfile<2,4, [ SDTCisVT<0, i32>, SDTCisVT<1, i32>,
+ SDTCisVT<2, i32>, SDTCisVT<3, i32>,
+ SDTCisVT<4, i32>, SDTCisVT<5, i32> ] >;
+def ARMUmlal : SDNode<"ARMISD::UMLAL", SDT_ARM64bitmlal>;
+def ARMSmlal : SDNode<"ARMISD::SMLAL", SDT_ARM64bitmlal>;
+
+// Node definitions.
+def ARMWrapper : SDNode<"ARMISD::Wrapper", SDTIntUnaryOp>;
+def ARMWrapperPIC : SDNode<"ARMISD::WrapperPIC", SDTIntUnaryOp>;
+def ARMWrapperJT : SDNode<"ARMISD::WrapperJT", SDTIntUnaryOp>;
+
+def ARMcallseq_start : SDNode<"ISD::CALLSEQ_START", SDT_ARMCallSeqStart,
+ [SDNPHasChain, SDNPSideEffect, SDNPOutGlue]>;
+def ARMcallseq_end : SDNode<"ISD::CALLSEQ_END", SDT_ARMCallSeqEnd,
+ [SDNPHasChain, SDNPSideEffect,
+ SDNPOptInGlue, SDNPOutGlue]>;
+def ARMcopystructbyval : SDNode<"ARMISD::COPY_STRUCT_BYVAL" ,
+ SDT_ARMStructByVal,
+ [SDNPHasChain, SDNPInGlue, SDNPOutGlue,
+ SDNPMayStore, SDNPMayLoad]>;
+
+def ARMcall : SDNode<"ARMISD::CALL", SDT_ARMcall,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
+ SDNPVariadic]>;
+def ARMcall_pred : SDNode<"ARMISD::CALL_PRED", SDT_ARMcall,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
+ SDNPVariadic]>;
+def ARMcall_nolink : SDNode<"ARMISD::CALL_NOLINK", SDT_ARMcall,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
+ SDNPVariadic]>;
+
+def ARMretflag : SDNode<"ARMISD::RET_FLAG", SDTNone,
+ [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
+def ARMintretflag : SDNode<"ARMISD::INTRET_FLAG", SDT_ARMcall,
+ [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
+def ARMcmov : SDNode<"ARMISD::CMOV", SDT_ARMCMov,
+ [SDNPInGlue]>;
+
+def ARMbrcond : SDNode<"ARMISD::BRCOND", SDT_ARMBrcond,
+ [SDNPHasChain, SDNPInGlue, SDNPOutGlue]>;
+
+def ARMbrjt : SDNode<"ARMISD::BR_JT", SDT_ARMBrJT,
+ [SDNPHasChain]>;
+def ARMbr2jt : SDNode<"ARMISD::BR2_JT", SDT_ARMBr2JT,
+ [SDNPHasChain]>;
+
+def ARMBcci64 : SDNode<"ARMISD::BCC_i64", SDT_ARMBCC_i64,
+ [SDNPHasChain]>;
+
+def ARMcmp : SDNode<"ARMISD::CMP", SDT_ARMCmp,
+ [SDNPOutGlue]>;
+
+def ARMcmn : SDNode<"ARMISD::CMN", SDT_ARMCmp,
+ [SDNPOutGlue]>;
+
+def ARMcmpZ : SDNode<"ARMISD::CMPZ", SDT_ARMCmp,
+ [SDNPOutGlue, SDNPCommutative]>;
+
+def ARMpic_add : SDNode<"ARMISD::PIC_ADD", SDT_ARMPICAdd>;
+
+def ARMsrl_flag : SDNode<"ARMISD::SRL_FLAG", SDTIntUnaryOp, [SDNPOutGlue]>;
+def ARMsra_flag : SDNode<"ARMISD::SRA_FLAG", SDTIntUnaryOp, [SDNPOutGlue]>;
+def ARMrrx : SDNode<"ARMISD::RRX" , SDTIntUnaryOp, [SDNPInGlue ]>;
+
+def ARMaddc : SDNode<"ARMISD::ADDC", SDTBinaryArithWithFlags,
+ [SDNPCommutative]>;
+def ARMsubc : SDNode<"ARMISD::SUBC", SDTBinaryArithWithFlags>;
+def ARMadde : SDNode<"ARMISD::ADDE", SDTBinaryArithWithFlagsInOut>;
+def ARMsube : SDNode<"ARMISD::SUBE", SDTBinaryArithWithFlagsInOut>;
+
+def ARMthread_pointer: SDNode<"ARMISD::THREAD_POINTER", SDT_ARMThreadPointer>;
+def ARMeh_sjlj_setjmp: SDNode<"ARMISD::EH_SJLJ_SETJMP",
+ SDT_ARMEH_SJLJ_Setjmp,
+ [SDNPHasChain, SDNPSideEffect]>;
+def ARMeh_sjlj_longjmp: SDNode<"ARMISD::EH_SJLJ_LONGJMP",
+ SDT_ARMEH_SJLJ_Longjmp,
+ [SDNPHasChain, SDNPSideEffect]>;
+def ARMeh_sjlj_setup_dispatch: SDNode<"ARMISD::EH_SJLJ_SETUP_DISPATCH",
+ SDT_ARMEH_SJLJ_SetupDispatch,
+ [SDNPHasChain, SDNPSideEffect]>;
+
+def ARMMemBarrierMCR : SDNode<"ARMISD::MEMBARRIER_MCR", SDT_ARMMEMBARRIER,
+ [SDNPHasChain, SDNPSideEffect]>;
+def ARMPreload : SDNode<"ARMISD::PRELOAD", SDT_ARMPREFETCH,
+ [SDNPHasChain, SDNPMayLoad, SDNPMayStore]>;
+
+def ARMtcret : SDNode<"ARMISD::TC_RETURN", SDT_ARMTCRET,
+ [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
+
+def ARMbfi : SDNode<"ARMISD::BFI", SDT_ARMBFI>;
+
+def ARMmemcopy : SDNode<"ARMISD::MEMCPY", SDT_ARMMEMCPY,
+ [SDNPHasChain, SDNPInGlue, SDNPOutGlue,
+ SDNPMayStore, SDNPMayLoad]>;
+
+//===----------------------------------------------------------------------===//
+// ARM Instruction Predicate Definitions.
+//
+def HasV4T : Predicate<"Subtarget->hasV4TOps()">,
+ AssemblerPredicate<"HasV4TOps", "armv4t">;
+def NoV4T : Predicate<"!Subtarget->hasV4TOps()">;
+def HasV5T : Predicate<"Subtarget->hasV5TOps()">,
+ AssemblerPredicate<"HasV5TOps", "armv5t">;
+def HasV5TE : Predicate<"Subtarget->hasV5TEOps()">,
+ AssemblerPredicate<"HasV5TEOps", "armv5te">;
+def HasV6 : Predicate<"Subtarget->hasV6Ops()">,
+ AssemblerPredicate<"HasV6Ops", "armv6">;
+def NoV6 : Predicate<"!Subtarget->hasV6Ops()">;
+def HasV6M : Predicate<"Subtarget->hasV6MOps()">,
+ AssemblerPredicate<"HasV6MOps",
+ "armv6m or armv6t2">;
+def HasV6T2 : Predicate<"Subtarget->hasV6T2Ops()">,
+ AssemblerPredicate<"HasV6T2Ops", "armv6t2">;
+def NoV6T2 : Predicate<"!Subtarget->hasV6T2Ops()">;
+def HasV6K : Predicate<"Subtarget->hasV6KOps()">,
+ AssemblerPredicate<"HasV6KOps", "armv6k">;
+def NoV6K : Predicate<"!Subtarget->hasV6KOps()">;
+def HasV7 : Predicate<"Subtarget->hasV7Ops()">,
+ AssemblerPredicate<"HasV7Ops", "armv7">;
+def HasV8 : Predicate<"Subtarget->hasV8Ops()">,
+ AssemblerPredicate<"HasV8Ops", "armv8">;
+def PreV8 : Predicate<"!Subtarget->hasV8Ops()">,
+ AssemblerPredicate<"!HasV8Ops", "armv7 or earlier">;
+def HasV8_1a : Predicate<"Subtarget->hasV8_1aOps()">,
+ AssemblerPredicate<"HasV8_1aOps", "armv8.1a">;
+def HasV8_2a : Predicate<"Subtarget->hasV8_2aOps()">,
+ AssemblerPredicate<"HasV8_2aOps", "armv8.2a">;
+def NoVFP : Predicate<"!Subtarget->hasVFP2()">;
+def HasVFP2 : Predicate<"Subtarget->hasVFP2()">,
+ AssemblerPredicate<"FeatureVFP2", "VFP2">;
+def HasVFP3 : Predicate<"Subtarget->hasVFP3()">,
+ AssemblerPredicate<"FeatureVFP3", "VFP3">;
+def HasVFP4 : Predicate<"Subtarget->hasVFP4()">,
+ AssemblerPredicate<"FeatureVFP4", "VFP4">;
+def HasDPVFP : Predicate<"!Subtarget->isFPOnlySP()">,
+ AssemblerPredicate<"!FeatureVFPOnlySP",
+ "double precision VFP">;
+def HasFPARMv8 : Predicate<"Subtarget->hasFPARMv8()">,
+ AssemblerPredicate<"FeatureFPARMv8", "FPARMv8">;
+def HasNEON : Predicate<"Subtarget->hasNEON()">,
+ AssemblerPredicate<"FeatureNEON", "NEON">;
+def HasCrypto : Predicate<"Subtarget->hasCrypto()">,
+ AssemblerPredicate<"FeatureCrypto", "crypto">;
+def HasCRC : Predicate<"Subtarget->hasCRC()">,
+ AssemblerPredicate<"FeatureCRC", "crc">;
+def HasFP16 : Predicate<"Subtarget->hasFP16()">,
+ AssemblerPredicate<"FeatureFP16","half-float conversions">;
+def HasFullFP16 : Predicate<"Subtarget->hasFullFP16()">,
+ AssemblerPredicate<"FeatureFullFP16","full half-float">;
+def HasDivide : Predicate<"Subtarget->hasDivide()">,
+ AssemblerPredicate<"FeatureHWDiv", "divide in THUMB">;
+def HasDivideInARM : Predicate<"Subtarget->hasDivideInARMMode()">,
+ AssemblerPredicate<"FeatureHWDivARM", "divide in ARM">;
+def HasT2ExtractPack : Predicate<"Subtarget->hasT2ExtractPack()">,
+ AssemblerPredicate<"FeatureT2XtPk",
+ "pack/extract">;
+def HasDSP : Predicate<"Subtarget->hasDSP()">,
+ AssemblerPredicate<"FeatureDSP", "dsp">;
+def HasDB : Predicate<"Subtarget->hasDataBarrier()">,
+ AssemblerPredicate<"FeatureDB",
+ "data-barriers">;
+def HasMP : Predicate<"Subtarget->hasMPExtension()">,
+ AssemblerPredicate<"FeatureMP",
+ "mp-extensions">;
+def HasVirtualization: Predicate<"false">,
+ AssemblerPredicate<"FeatureVirtualization",
+ "virtualization-extensions">;
+def HasTrustZone : Predicate<"Subtarget->hasTrustZone()">,
+ AssemblerPredicate<"FeatureTrustZone",
+ "TrustZone">;
+def HasZCZ : Predicate<"Subtarget->hasZeroCycleZeroing()">;
+def UseNEONForFP : Predicate<"Subtarget->useNEONForSinglePrecisionFP()">;
+def DontUseNEONForFP : Predicate<"!Subtarget->useNEONForSinglePrecisionFP()">;
+def IsThumb : Predicate<"Subtarget->isThumb()">,
+ AssemblerPredicate<"ModeThumb", "thumb">;
+def IsThumb1Only : Predicate<"Subtarget->isThumb1Only()">;
+def IsThumb2 : Predicate<"Subtarget->isThumb2()">,
+ AssemblerPredicate<"ModeThumb,FeatureThumb2",
+ "thumb2">;
+def IsMClass : Predicate<"Subtarget->isMClass()">,
+ AssemblerPredicate<"FeatureMClass", "armv*m">;
+def IsNotMClass : Predicate<"!Subtarget->isMClass()">,
+ AssemblerPredicate<"!FeatureMClass",
+ "!armv*m">;
+def IsARM : Predicate<"!Subtarget->isThumb()">,
+ AssemblerPredicate<"!ModeThumb", "arm-mode">;
+def IsMachO : Predicate<"Subtarget->isTargetMachO()">;
+def IsNotMachO : Predicate<"!Subtarget->isTargetMachO()">;
+def IsNaCl : Predicate<"Subtarget->isTargetNaCl()">;
+def UseNaClTrap : Predicate<"Subtarget->useNaClTrap()">,
+ AssemblerPredicate<"FeatureNaClTrap", "NaCl">;
+def DontUseNaClTrap : Predicate<"!Subtarget->useNaClTrap()">;
+
+// FIXME: Eventually this will be just "hasV6T2Ops".
+def UseMovt : Predicate<"Subtarget->useMovt(*MF)">;
+def DontUseMovt : Predicate<"!Subtarget->useMovt(*MF)">;
+def UseFPVMLx : Predicate<"Subtarget->useFPVMLx()">;
+def UseMulOps : Predicate<"Subtarget->useMulOps()">;
+
+// Prefer fused MAC for fp mul + add over fp VMLA / VMLS if they are available.
+// But only select them if more precision in FP computation is allowed.
+// Do not use them for Darwin platforms.
+def UseFusedMAC : Predicate<"(TM.Options.AllowFPOpFusion =="
+ " FPOpFusion::Fast && "
+ " Subtarget->hasVFP4()) && "
+ "!Subtarget->isTargetDarwin()">;
+def DontUseFusedMAC : Predicate<"!(TM.Options.AllowFPOpFusion =="
+ " FPOpFusion::Fast &&"
+ " Subtarget->hasVFP4()) || "
+ "Subtarget->isTargetDarwin()">;
+
+// VGETLNi32 is microcoded on Swift - prefer VMOV.
+def HasFastVGETLNi32 : Predicate<"!Subtarget->isSwift()">;
+def HasSlowVGETLNi32 : Predicate<"Subtarget->isSwift()">;
+
+// VDUP.32 is microcoded on Swift - prefer VMOV.
+def HasFastVDUP32 : Predicate<"!Subtarget->isSwift()">;
+def HasSlowVDUP32 : Predicate<"Subtarget->isSwift()">;
+
+// Cortex-A9 prefers VMOVSR to VMOVDRR even when using NEON for scalar FP, as
+// this allows more effective execution domain optimization. See
+// setExecutionDomain().
+def UseVMOVSR : Predicate<"Subtarget->isCortexA9() || !Subtarget->useNEONForSinglePrecisionFP()">;
+def DontUseVMOVSR : Predicate<"!Subtarget->isCortexA9() && Subtarget->useNEONForSinglePrecisionFP()">;
+
+def IsLE : Predicate<"MF->getDataLayout().isLittleEndian()">;
+def IsBE : Predicate<"MF->getDataLayout().isBigEndian()">;
+
+//===----------------------------------------------------------------------===//
+// ARM Flag Definitions.
+
+class RegConstraint<string C> {
+ string Constraints = C;
+}
+
+//===----------------------------------------------------------------------===//
+// ARM specific transformation functions and pattern fragments.
+//
+
+// imm_neg_XFORM - Return the negation of an i32 immediate value.
+def imm_neg_XFORM : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(-(int)N->getZExtValue(), SDLoc(N), MVT::i32);
+}]>;
+
+// imm_not_XFORM - Return the complement of a i32 immediate value.
+def imm_not_XFORM : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(~(int)N->getZExtValue(), SDLoc(N), MVT::i32);
+}]>;
+
+/// imm16_31 predicate - True if the 32-bit immediate is in the range [16,31].
+def imm16_31 : ImmLeaf<i32, [{
+ return (int32_t)Imm >= 16 && (int32_t)Imm < 32;
+}]>;
+
+// sext_16_node predicate - True if the SDNode is sign-extended 16 or more bits.
+def sext_16_node : PatLeaf<(i32 GPR:$a), [{
+ return CurDAG->ComputeNumSignBits(SDValue(N,0)) >= 17;
+}]>;
+
+/// Split a 32-bit immediate into two 16 bit parts.
+def hi16 : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant((uint32_t)N->getZExtValue() >> 16, SDLoc(N),
+ MVT::i32);
+}]>;
+
+def lo16AllZero : PatLeaf<(i32 imm), [{
+ // Returns true if all low 16-bits are 0.
+ return (((uint32_t)N->getZExtValue()) & 0xFFFFUL) == 0;
+}], hi16>;
+
+class BinOpWithFlagFrag<dag res> :
+ PatFrag<(ops node:$LHS, node:$RHS, node:$FLAG), res>;
+class BinOpFrag<dag res> : PatFrag<(ops node:$LHS, node:$RHS), res>;
+class UnOpFrag <dag res> : PatFrag<(ops node:$Src), res>;
+
+// An 'and' node with a single use.
+def and_su : PatFrag<(ops node:$lhs, node:$rhs), (and node:$lhs, node:$rhs), [{
+ return N->hasOneUse();
+}]>;
+
+// An 'xor' node with a single use.
+def xor_su : PatFrag<(ops node:$lhs, node:$rhs), (xor node:$lhs, node:$rhs), [{
+ return N->hasOneUse();
+}]>;
+
+// An 'fmul' node with a single use.
+def fmul_su : PatFrag<(ops node:$lhs, node:$rhs), (fmul node:$lhs, node:$rhs),[{
+ return N->hasOneUse();
+}]>;
+
+// An 'fadd' node which checks for single non-hazardous use.
+def fadd_mlx : PatFrag<(ops node:$lhs, node:$rhs),(fadd node:$lhs, node:$rhs),[{
+ return hasNoVMLxHazardUse(N);
+}]>;
+
+// An 'fsub' node which checks for single non-hazardous use.
+def fsub_mlx : PatFrag<(ops node:$lhs, node:$rhs),(fsub node:$lhs, node:$rhs),[{
+ return hasNoVMLxHazardUse(N);
+}]>;
+
+//===----------------------------------------------------------------------===//
+// Operand Definitions.
+//
+
+// Immediate operands with a shared generic asm render method.
+class ImmAsmOperand : AsmOperandClass { let RenderMethod = "addImmOperands"; }
+
+// Operands that are part of a memory addressing mode.
+class MemOperand : Operand<i32> { let OperandType = "OPERAND_MEMORY"; }
+
+// Branch target.
+// FIXME: rename brtarget to t2_brtarget
+def brtarget : Operand<OtherVT> {
+ let EncoderMethod = "getBranchTargetOpValue";
+ let OperandType = "OPERAND_PCREL";
+ let DecoderMethod = "DecodeT2BROperand";
+}
+
+// FIXME: get rid of this one?
+def uncondbrtarget : Operand<OtherVT> {
+ let EncoderMethod = "getUnconditionalBranchTargetOpValue";
+ let OperandType = "OPERAND_PCREL";
+}
+
+// Branch target for ARM. Handles conditional/unconditional
+def br_target : Operand<OtherVT> {
+ let EncoderMethod = "getARMBranchTargetOpValue";
+ let OperandType = "OPERAND_PCREL";
+}
+
+// Call target.
+// FIXME: rename bltarget to t2_bl_target?
+def bltarget : Operand<i32> {
+ // Encoded the same as branch targets.
+ let EncoderMethod = "getBranchTargetOpValue";
+ let OperandType = "OPERAND_PCREL";
+}
+
+// Call target for ARM. Handles conditional/unconditional
+// FIXME: rename bl_target to t2_bltarget?
+def bl_target : Operand<i32> {
+ let EncoderMethod = "getARMBLTargetOpValue";
+ let OperandType = "OPERAND_PCREL";
+}
+
+def blx_target : Operand<i32> {
+ let EncoderMethod = "getARMBLXTargetOpValue";
+ let OperandType = "OPERAND_PCREL";
+}
+
+// A list of registers separated by comma. Used by load/store multiple.
+def RegListAsmOperand : AsmOperandClass { let Name = "RegList"; }
+def reglist : Operand<i32> {
+ let EncoderMethod = "getRegisterListOpValue";
+ let ParserMatchClass = RegListAsmOperand;
+ let PrintMethod = "printRegisterList";
+ let DecoderMethod = "DecodeRegListOperand";
+}
+
+def GPRPairOp : RegisterOperand<GPRPair, "printGPRPairOperand">;
+
+def DPRRegListAsmOperand : AsmOperandClass { let Name = "DPRRegList"; }
+def dpr_reglist : Operand<i32> {
+ let EncoderMethod = "getRegisterListOpValue";
+ let ParserMatchClass = DPRRegListAsmOperand;
+ let PrintMethod = "printRegisterList";
+ let DecoderMethod = "DecodeDPRRegListOperand";
+}
+
+def SPRRegListAsmOperand : AsmOperandClass { let Name = "SPRRegList"; }
+def spr_reglist : Operand<i32> {
+ let EncoderMethod = "getRegisterListOpValue";
+ let ParserMatchClass = SPRRegListAsmOperand;
+ let PrintMethod = "printRegisterList";
+ let DecoderMethod = "DecodeSPRRegListOperand";
+}
+
+// An operand for the CONSTPOOL_ENTRY pseudo-instruction.
+def cpinst_operand : Operand<i32> {
+ let PrintMethod = "printCPInstOperand";
+}
+
+// Local PC labels.
+def pclabel : Operand<i32> {
+ let PrintMethod = "printPCLabel";
+}
+
+// ADR instruction labels.
+def AdrLabelAsmOperand : AsmOperandClass { let Name = "AdrLabel"; }
+def adrlabel : Operand<i32> {
+ let EncoderMethod = "getAdrLabelOpValue";
+ let ParserMatchClass = AdrLabelAsmOperand;
+ let PrintMethod = "printAdrLabelOperand<0>";
+}
+
+def neon_vcvt_imm32 : Operand<i32> {
+ let EncoderMethod = "getNEONVcvtImm32OpValue";
+ let DecoderMethod = "DecodeVCVTImmOperand";
+}
+
+// rot_imm: An integer that encodes a rotate amount. Must be 8, 16, or 24.
+def rot_imm_XFORM: SDNodeXForm<imm, [{
+ switch (N->getZExtValue()){
+ default: llvm_unreachable(nullptr);
+ case 0: return CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
+ case 8: return CurDAG->getTargetConstant(1, SDLoc(N), MVT::i32);
+ case 16: return CurDAG->getTargetConstant(2, SDLoc(N), MVT::i32);
+ case 24: return CurDAG->getTargetConstant(3, SDLoc(N), MVT::i32);
+ }
+}]>;
+def RotImmAsmOperand : AsmOperandClass {
+ let Name = "RotImm";
+ let ParserMethod = "parseRotImm";
+}
+def rot_imm : Operand<i32>, PatLeaf<(i32 imm), [{
+ int32_t v = N->getZExtValue();
+ return v == 8 || v == 16 || v == 24; }],
+ rot_imm_XFORM> {
+ let PrintMethod = "printRotImmOperand";
+ let ParserMatchClass = RotImmAsmOperand;
+}
+
+// shift_imm: An integer that encodes a shift amount and the type of shift
+// (asr or lsl). The 6-bit immediate encodes as:
+// {5} 0 ==> lsl
+// 1 asr
+// {4-0} imm5 shift amount.
+// asr #32 encoded as imm5 == 0.
+def ShifterImmAsmOperand : AsmOperandClass {
+ let Name = "ShifterImm";
+ let ParserMethod = "parseShifterImm";
+}
+def shift_imm : Operand<i32> {
+ let PrintMethod = "printShiftImmOperand";
+ let ParserMatchClass = ShifterImmAsmOperand;
+}
+
+// shifter_operand operands: so_reg_reg, so_reg_imm, and mod_imm.
+def ShiftedRegAsmOperand : AsmOperandClass { let Name = "RegShiftedReg"; }
+def so_reg_reg : Operand<i32>, // reg reg imm
+ ComplexPattern<i32, 3, "SelectRegShifterOperand",
+ [shl, srl, sra, rotr]> {
+ let EncoderMethod = "getSORegRegOpValue";
+ let PrintMethod = "printSORegRegOperand";
+ let DecoderMethod = "DecodeSORegRegOperand";
+ let ParserMatchClass = ShiftedRegAsmOperand;
+ let MIOperandInfo = (ops GPRnopc, GPRnopc, i32imm);
+}
+
+def ShiftedImmAsmOperand : AsmOperandClass { let Name = "RegShiftedImm"; }
+def so_reg_imm : Operand<i32>, // reg imm
+ ComplexPattern<i32, 2, "SelectImmShifterOperand",
+ [shl, srl, sra, rotr]> {
+ let EncoderMethod = "getSORegImmOpValue";
+ let PrintMethod = "printSORegImmOperand";
+ let DecoderMethod = "DecodeSORegImmOperand";
+ let ParserMatchClass = ShiftedImmAsmOperand;
+ let MIOperandInfo = (ops GPR, i32imm);
+}
+
+// FIXME: Does this need to be distinct from so_reg?
+def shift_so_reg_reg : Operand<i32>, // reg reg imm
+ ComplexPattern<i32, 3, "SelectShiftRegShifterOperand",
+ [shl,srl,sra,rotr]> {
+ let EncoderMethod = "getSORegRegOpValue";
+ let PrintMethod = "printSORegRegOperand";
+ let DecoderMethod = "DecodeSORegRegOperand";
+ let ParserMatchClass = ShiftedRegAsmOperand;
+ let MIOperandInfo = (ops GPR, GPR, i32imm);
+}
+
+// FIXME: Does this need to be distinct from so_reg?
+def shift_so_reg_imm : Operand<i32>, // reg reg imm
+ ComplexPattern<i32, 2, "SelectShiftImmShifterOperand",
+ [shl,srl,sra,rotr]> {
+ let EncoderMethod = "getSORegImmOpValue";
+ let PrintMethod = "printSORegImmOperand";
+ let DecoderMethod = "DecodeSORegImmOperand";
+ let ParserMatchClass = ShiftedImmAsmOperand;
+ let MIOperandInfo = (ops GPR, i32imm);
+}
+
+// mod_imm: match a 32-bit immediate operand, which can be encoded into
+// a 12-bit immediate; an 8-bit integer and a 4-bit rotator (See ARMARM
+// - "Modified Immediate Constants"). Within the MC layer we keep this
+// immediate in its encoded form.
+def ModImmAsmOperand: AsmOperandClass {
+ let Name = "ModImm";
+ let ParserMethod = "parseModImm";
+}
+def mod_imm : Operand<i32>, ImmLeaf<i32, [{
+ return ARM_AM::getSOImmVal(Imm) != -1;
+ }]> {
+ let EncoderMethod = "getModImmOpValue";
+ let PrintMethod = "printModImmOperand";
+ let ParserMatchClass = ModImmAsmOperand;
+}
+
+// Note: the patterns mod_imm_not and mod_imm_neg do not require an encoder
+// method and such, as they are only used on aliases (Pat<> and InstAlias<>).
+// The actual parsing, encoding, decoding are handled by the destination
+// instructions, which use mod_imm.
+
+def ModImmNotAsmOperand : AsmOperandClass { let Name = "ModImmNot"; }
+def mod_imm_not : Operand<i32>, PatLeaf<(imm), [{
+ return ARM_AM::getSOImmVal(~(uint32_t)N->getZExtValue()) != -1;
+ }], imm_not_XFORM> {
+ let ParserMatchClass = ModImmNotAsmOperand;
+}
+
+def ModImmNegAsmOperand : AsmOperandClass { let Name = "ModImmNeg"; }
+def mod_imm_neg : Operand<i32>, PatLeaf<(imm), [{
+ unsigned Value = -(unsigned)N->getZExtValue();
+ return Value && ARM_AM::getSOImmVal(Value) != -1;
+ }], imm_neg_XFORM> {
+ let ParserMatchClass = ModImmNegAsmOperand;
+}
+
+/// arm_i32imm - True for +V6T2, or when isSOImmTwoParVal()
+def arm_i32imm : PatLeaf<(imm), [{
+ if (Subtarget->useMovt(*MF))
+ return true;
+ return ARM_AM::isSOImmTwoPartVal((unsigned)N->getZExtValue());
+}]>;
+
+/// imm0_1 predicate - Immediate in the range [0,1].
+def Imm0_1AsmOperand: ImmAsmOperand { let Name = "Imm0_1"; }
+def imm0_1 : Operand<i32> { let ParserMatchClass = Imm0_1AsmOperand; }
+
+/// imm0_3 predicate - Immediate in the range [0,3].
+def Imm0_3AsmOperand: ImmAsmOperand { let Name = "Imm0_3"; }
+def imm0_3 : Operand<i32> { let ParserMatchClass = Imm0_3AsmOperand; }
+
+/// imm0_7 predicate - Immediate in the range [0,7].
+def Imm0_7AsmOperand: ImmAsmOperand { let Name = "Imm0_7"; }
+def imm0_7 : Operand<i32>, ImmLeaf<i32, [{
+ return Imm >= 0 && Imm < 8;
+}]> {
+ let ParserMatchClass = Imm0_7AsmOperand;
+}
+
+/// imm8 predicate - Immediate is exactly 8.
+def Imm8AsmOperand: ImmAsmOperand { let Name = "Imm8"; }
+def imm8 : Operand<i32>, ImmLeaf<i32, [{ return Imm == 8; }]> {
+ let ParserMatchClass = Imm8AsmOperand;
+}
+
+/// imm16 predicate - Immediate is exactly 16.
+def Imm16AsmOperand: ImmAsmOperand { let Name = "Imm16"; }
+def imm16 : Operand<i32>, ImmLeaf<i32, [{ return Imm == 16; }]> {
+ let ParserMatchClass = Imm16AsmOperand;
+}
+
+/// imm32 predicate - Immediate is exactly 32.
+def Imm32AsmOperand: ImmAsmOperand { let Name = "Imm32"; }
+def imm32 : Operand<i32>, ImmLeaf<i32, [{ return Imm == 32; }]> {
+ let ParserMatchClass = Imm32AsmOperand;
+}
+
+def imm8_or_16 : ImmLeaf<i32, [{ return Imm == 8 || Imm == 16;}]>;
+
+/// imm1_7 predicate - Immediate in the range [1,7].
+def Imm1_7AsmOperand: ImmAsmOperand { let Name = "Imm1_7"; }
+def imm1_7 : Operand<i32>, ImmLeaf<i32, [{ return Imm > 0 && Imm < 8; }]> {
+ let ParserMatchClass = Imm1_7AsmOperand;
+}
+
+/// imm1_15 predicate - Immediate in the range [1,15].
+def Imm1_15AsmOperand: ImmAsmOperand { let Name = "Imm1_15"; }
+def imm1_15 : Operand<i32>, ImmLeaf<i32, [{ return Imm > 0 && Imm < 16; }]> {
+ let ParserMatchClass = Imm1_15AsmOperand;
+}
+
+/// imm1_31 predicate - Immediate in the range [1,31].
+def Imm1_31AsmOperand: ImmAsmOperand { let Name = "Imm1_31"; }
+def imm1_31 : Operand<i32>, ImmLeaf<i32, [{ return Imm > 0 && Imm < 32; }]> {
+ let ParserMatchClass = Imm1_31AsmOperand;
+}
+
+/// imm0_15 predicate - Immediate in the range [0,15].
+def Imm0_15AsmOperand: ImmAsmOperand {
+ let Name = "Imm0_15";
+ let DiagnosticType = "ImmRange0_15";
+}
+def imm0_15 : Operand<i32>, ImmLeaf<i32, [{
+ return Imm >= 0 && Imm < 16;
+}]> {
+ let ParserMatchClass = Imm0_15AsmOperand;
+}
+
+/// imm0_31 predicate - True if the 32-bit immediate is in the range [0,31].
+def Imm0_31AsmOperand: ImmAsmOperand { let Name = "Imm0_31"; }
+def imm0_31 : Operand<i32>, ImmLeaf<i32, [{
+ return Imm >= 0 && Imm < 32;
+}]> {
+ let ParserMatchClass = Imm0_31AsmOperand;
+}
+
+/// imm0_32 predicate - True if the 32-bit immediate is in the range [0,32].
+def Imm0_32AsmOperand: ImmAsmOperand { let Name = "Imm0_32"; }
+def imm0_32 : Operand<i32>, ImmLeaf<i32, [{
+ return Imm >= 0 && Imm < 32;
+}]> {
+ let ParserMatchClass = Imm0_32AsmOperand;
+}
+
+/// imm0_63 predicate - True if the 32-bit immediate is in the range [0,63].
+def Imm0_63AsmOperand: ImmAsmOperand { let Name = "Imm0_63"; }
+def imm0_63 : Operand<i32>, ImmLeaf<i32, [{
+ return Imm >= 0 && Imm < 64;
+}]> {
+ let ParserMatchClass = Imm0_63AsmOperand;
+}
+
+/// imm0_239 predicate - Immediate in the range [0,239].
+def Imm0_239AsmOperand : ImmAsmOperand {
+ let Name = "Imm0_239";
+ let DiagnosticType = "ImmRange0_239";
+}
+def imm0_239 : Operand<i32>, ImmLeaf<i32, [{ return Imm >= 0 && Imm < 240; }]> {
+ let ParserMatchClass = Imm0_239AsmOperand;
+}
+
+/// imm0_255 predicate - Immediate in the range [0,255].
+def Imm0_255AsmOperand : ImmAsmOperand { let Name = "Imm0_255"; }
+def imm0_255 : Operand<i32>, ImmLeaf<i32, [{ return Imm >= 0 && Imm < 256; }]> {
+ let ParserMatchClass = Imm0_255AsmOperand;
+}
+
+/// imm0_65535 - An immediate is in the range [0.65535].
+def Imm0_65535AsmOperand: ImmAsmOperand { let Name = "Imm0_65535"; }
+def imm0_65535 : Operand<i32>, ImmLeaf<i32, [{
+ return Imm >= 0 && Imm < 65536;
+}]> {
+ let ParserMatchClass = Imm0_65535AsmOperand;
+}
+
+// imm0_65535_neg - An immediate whose negative value is in the range [0.65535].
+def imm0_65535_neg : Operand<i32>, ImmLeaf<i32, [{
+ return -Imm >= 0 && -Imm < 65536;
+}]>;
+
+// imm0_65535_expr - For movt/movw - 16-bit immediate that can also reference
+// a relocatable expression.
+//
+// FIXME: This really needs a Thumb version separate from the ARM version.
+// While the range is the same, and can thus use the same match class,
+// the encoding is different so it should have a different encoder method.
+def Imm0_65535ExprAsmOperand: ImmAsmOperand { let Name = "Imm0_65535Expr"; }
+def imm0_65535_expr : Operand<i32> {
+ let EncoderMethod = "getHiLo16ImmOpValue";
+ let ParserMatchClass = Imm0_65535ExprAsmOperand;
+}
+
+def Imm256_65535ExprAsmOperand: ImmAsmOperand { let Name = "Imm256_65535Expr"; }
+def imm256_65535_expr : Operand<i32> {
+ let ParserMatchClass = Imm256_65535ExprAsmOperand;
+}
+
+/// imm24b - True if the 32-bit immediate is encodable in 24 bits.
+def Imm24bitAsmOperand: ImmAsmOperand { let Name = "Imm24bit"; }
+def imm24b : Operand<i32>, ImmLeaf<i32, [{
+ return Imm >= 0 && Imm <= 0xffffff;
+}]> {
+ let ParserMatchClass = Imm24bitAsmOperand;
+}
+
+
+/// bf_inv_mask_imm predicate - An AND mask to clear an arbitrary width bitfield
+/// e.g., 0xf000ffff
+def BitfieldAsmOperand : AsmOperandClass {
+ let Name = "Bitfield";
+ let ParserMethod = "parseBitfield";
+}
+
+def bf_inv_mask_imm : Operand<i32>,
+ PatLeaf<(imm), [{
+ return ARM::isBitFieldInvertedMask(N->getZExtValue());
+}] > {
+ let EncoderMethod = "getBitfieldInvertedMaskOpValue";
+ let PrintMethod = "printBitfieldInvMaskImmOperand";
+ let DecoderMethod = "DecodeBitfieldMaskOperand";
+ let ParserMatchClass = BitfieldAsmOperand;
+}
+
+def imm1_32_XFORM: SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant((int)N->getZExtValue() - 1, SDLoc(N),
+ MVT::i32);
+}]>;
+def Imm1_32AsmOperand: AsmOperandClass { let Name = "Imm1_32"; }
+def imm1_32 : Operand<i32>, PatLeaf<(imm), [{
+ uint64_t Imm = N->getZExtValue();
+ return Imm > 0 && Imm <= 32;
+ }],
+ imm1_32_XFORM> {
+ let PrintMethod = "printImmPlusOneOperand";
+ let ParserMatchClass = Imm1_32AsmOperand;
+}
+
+def imm1_16_XFORM: SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant((int)N->getZExtValue() - 1, SDLoc(N),
+ MVT::i32);
+}]>;
+def Imm1_16AsmOperand: AsmOperandClass { let Name = "Imm1_16"; }
+def imm1_16 : Operand<i32>, PatLeaf<(imm), [{ return Imm > 0 && Imm <= 16; }],
+ imm1_16_XFORM> {
+ let PrintMethod = "printImmPlusOneOperand";
+ let ParserMatchClass = Imm1_16AsmOperand;
+}
+
+// Define ARM specific addressing modes.
+// addrmode_imm12 := reg +/- imm12
+//
+def MemImm12OffsetAsmOperand : AsmOperandClass { let Name = "MemImm12Offset"; }
+class AddrMode_Imm12 : MemOperand,
+ ComplexPattern<i32, 2, "SelectAddrModeImm12", []> {
+ // 12-bit immediate operand. Note that instructions using this encode
+ // #0 and #-0 differently. We flag #-0 as the magic value INT32_MIN. All other
+ // immediate values are as normal.
+
+ let EncoderMethod = "getAddrModeImm12OpValue";
+ let DecoderMethod = "DecodeAddrModeImm12Operand";
+ let ParserMatchClass = MemImm12OffsetAsmOperand;
+ let MIOperandInfo = (ops GPR:$base, i32imm:$offsimm);
+}
+
+def addrmode_imm12 : AddrMode_Imm12 {
+ let PrintMethod = "printAddrModeImm12Operand<false>";
+}
+
+def addrmode_imm12_pre : AddrMode_Imm12 {
+ let PrintMethod = "printAddrModeImm12Operand<true>";
+}
+
+// ldst_so_reg := reg +/- reg shop imm
+//
+def MemRegOffsetAsmOperand : AsmOperandClass { let Name = "MemRegOffset"; }
+def ldst_so_reg : MemOperand,
+ ComplexPattern<i32, 3, "SelectLdStSOReg", []> {
+ let EncoderMethod = "getLdStSORegOpValue";
+ // FIXME: Simplify the printer
+ let PrintMethod = "printAddrMode2Operand";
+ let DecoderMethod = "DecodeSORegMemOperand";
+ let ParserMatchClass = MemRegOffsetAsmOperand;
+ let MIOperandInfo = (ops GPR:$base, GPRnopc:$offsreg, i32imm:$shift);
+}
+
+// postidx_imm8 := +/- [0,255]
+//
+// 9 bit value:
+// {8} 1 is imm8 is non-negative. 0 otherwise.
+// {7-0} [0,255] imm8 value.
+def PostIdxImm8AsmOperand : AsmOperandClass { let Name = "PostIdxImm8"; }
+def postidx_imm8 : MemOperand {
+ let PrintMethod = "printPostIdxImm8Operand";
+ let ParserMatchClass = PostIdxImm8AsmOperand;
+ let MIOperandInfo = (ops i32imm);
+}
+
+// postidx_imm8s4 := +/- [0,1020]
+//
+// 9 bit value:
+// {8} 1 is imm8 is non-negative. 0 otherwise.
+// {7-0} [0,255] imm8 value, scaled by 4.
+def PostIdxImm8s4AsmOperand : AsmOperandClass { let Name = "PostIdxImm8s4"; }
+def postidx_imm8s4 : MemOperand {
+ let PrintMethod = "printPostIdxImm8s4Operand";
+ let ParserMatchClass = PostIdxImm8s4AsmOperand;
+ let MIOperandInfo = (ops i32imm);
+}
+
+
+// postidx_reg := +/- reg
+//
+def PostIdxRegAsmOperand : AsmOperandClass {
+ let Name = "PostIdxReg";
+ let ParserMethod = "parsePostIdxReg";
+}
+def postidx_reg : MemOperand {
+ let EncoderMethod = "getPostIdxRegOpValue";
+ let DecoderMethod = "DecodePostIdxReg";
+ let PrintMethod = "printPostIdxRegOperand";
+ let ParserMatchClass = PostIdxRegAsmOperand;
+ let MIOperandInfo = (ops GPRnopc, i32imm);
+}
+
+
+// addrmode2 := reg +/- imm12
+// := reg +/- reg shop imm
+//
+// FIXME: addrmode2 should be refactored the rest of the way to always
+// use explicit imm vs. reg versions above (addrmode_imm12 and ldst_so_reg).
+def AddrMode2AsmOperand : AsmOperandClass { let Name = "AddrMode2"; }
+def addrmode2 : MemOperand,
+ ComplexPattern<i32, 3, "SelectAddrMode2", []> {
+ let EncoderMethod = "getAddrMode2OpValue";
+ let PrintMethod = "printAddrMode2Operand";
+ let ParserMatchClass = AddrMode2AsmOperand;
+ let MIOperandInfo = (ops GPR:$base, GPR:$offsreg, i32imm:$offsimm);
+}
+
+def PostIdxRegShiftedAsmOperand : AsmOperandClass {
+ let Name = "PostIdxRegShifted";
+ let ParserMethod = "parsePostIdxReg";
+}
+def am2offset_reg : MemOperand,
+ ComplexPattern<i32, 2, "SelectAddrMode2OffsetReg",
+ [], [SDNPWantRoot]> {
+ let EncoderMethod = "getAddrMode2OffsetOpValue";
+ let PrintMethod = "printAddrMode2OffsetOperand";
+ // When using this for assembly, it's always as a post-index offset.
+ let ParserMatchClass = PostIdxRegShiftedAsmOperand;
+ let MIOperandInfo = (ops GPRnopc, i32imm);
+}
+
+// FIXME: am2offset_imm should only need the immediate, not the GPR. Having
+// the GPR is purely vestigal at this point.
+def AM2OffsetImmAsmOperand : AsmOperandClass { let Name = "AM2OffsetImm"; }
+def am2offset_imm : MemOperand,
+ ComplexPattern<i32, 2, "SelectAddrMode2OffsetImm",
+ [], [SDNPWantRoot]> {
+ let EncoderMethod = "getAddrMode2OffsetOpValue";
+ let PrintMethod = "printAddrMode2OffsetOperand";
+ let ParserMatchClass = AM2OffsetImmAsmOperand;
+ let MIOperandInfo = (ops GPRnopc, i32imm);
+}
+
+
+// addrmode3 := reg +/- reg
+// addrmode3 := reg +/- imm8
+//
+// FIXME: split into imm vs. reg versions.
+def AddrMode3AsmOperand : AsmOperandClass { let Name = "AddrMode3"; }
+class AddrMode3 : MemOperand,
+ ComplexPattern<i32, 3, "SelectAddrMode3", []> {
+ let EncoderMethod = "getAddrMode3OpValue";
+ let ParserMatchClass = AddrMode3AsmOperand;
+ let MIOperandInfo = (ops GPR:$base, GPR:$offsreg, i32imm:$offsimm);
+}
+
+def addrmode3 : AddrMode3
+{
+ let PrintMethod = "printAddrMode3Operand<false>";
+}
+
+def addrmode3_pre : AddrMode3
+{
+ let PrintMethod = "printAddrMode3Operand<true>";
+}
+
+// FIXME: split into imm vs. reg versions.
+// FIXME: parser method to handle +/- register.
+def AM3OffsetAsmOperand : AsmOperandClass {
+ let Name = "AM3Offset";
+ let ParserMethod = "parseAM3Offset";
+}
+def am3offset : MemOperand,
+ ComplexPattern<i32, 2, "SelectAddrMode3Offset",
+ [], [SDNPWantRoot]> {
+ let EncoderMethod = "getAddrMode3OffsetOpValue";
+ let PrintMethod = "printAddrMode3OffsetOperand";
+ let ParserMatchClass = AM3OffsetAsmOperand;
+ let MIOperandInfo = (ops GPR, i32imm);
+}
+
+// ldstm_mode := {ia, ib, da, db}
+//
+def ldstm_mode : OptionalDefOperand<OtherVT, (ops i32), (ops (i32 1))> {
+ let EncoderMethod = "getLdStmModeOpValue";
+ let PrintMethod = "printLdStmModeOperand";
+}
+
+// addrmode5 := reg +/- imm8*4
+//
+def AddrMode5AsmOperand : AsmOperandClass { let Name = "AddrMode5"; }
+class AddrMode5 : MemOperand,
+ ComplexPattern<i32, 2, "SelectAddrMode5", []> {
+ let EncoderMethod = "getAddrMode5OpValue";
+ let DecoderMethod = "DecodeAddrMode5Operand";
+ let ParserMatchClass = AddrMode5AsmOperand;
+ let MIOperandInfo = (ops GPR:$base, i32imm);
+}
+
+def addrmode5 : AddrMode5 {
+ let PrintMethod = "printAddrMode5Operand<false>";
+}
+
+def addrmode5_pre : AddrMode5 {
+ let PrintMethod = "printAddrMode5Operand<true>";
+}
+
+// addrmode6 := reg with optional alignment
+//
+def AddrMode6AsmOperand : AsmOperandClass { let Name = "AlignedMemory"; }
+def addrmode6 : MemOperand,
+ ComplexPattern<i32, 2, "SelectAddrMode6", [], [SDNPWantParent]>{
+ let PrintMethod = "printAddrMode6Operand";
+ let MIOperandInfo = (ops GPR:$addr, i32imm:$align);
+ let EncoderMethod = "getAddrMode6AddressOpValue";
+ let DecoderMethod = "DecodeAddrMode6Operand";
+ let ParserMatchClass = AddrMode6AsmOperand;
+}
+
+def am6offset : MemOperand,
+ ComplexPattern<i32, 1, "SelectAddrMode6Offset",
+ [], [SDNPWantRoot]> {
+ let PrintMethod = "printAddrMode6OffsetOperand";
+ let MIOperandInfo = (ops GPR);
+ let EncoderMethod = "getAddrMode6OffsetOpValue";
+ let DecoderMethod = "DecodeGPRRegisterClass";
+}
+
+// Special version of addrmode6 to handle alignment encoding for VST1/VLD1
+// (single element from one lane) for size 32.
+def addrmode6oneL32 : MemOperand,
+ ComplexPattern<i32, 2, "SelectAddrMode6", [], [SDNPWantParent]>{
+ let PrintMethod = "printAddrMode6Operand";
+ let MIOperandInfo = (ops GPR:$addr, i32imm);
+ let EncoderMethod = "getAddrMode6OneLane32AddressOpValue";
+}
+
+// Base class for addrmode6 with specific alignment restrictions.
+class AddrMode6Align : MemOperand,
+ ComplexPattern<i32, 2, "SelectAddrMode6", [], [SDNPWantParent]>{
+ let PrintMethod = "printAddrMode6Operand";
+ let MIOperandInfo = (ops GPR:$addr, i32imm:$align);
+ let EncoderMethod = "getAddrMode6AddressOpValue";
+ let DecoderMethod = "DecodeAddrMode6Operand";
+}
+
+// Special version of addrmode6 to handle no allowed alignment encoding for
+// VLD/VST instructions and checking the alignment is not specified.
+def AddrMode6AlignNoneAsmOperand : AsmOperandClass {
+ let Name = "AlignedMemoryNone";
+ let DiagnosticType = "AlignedMemoryRequiresNone";
+}
+def addrmode6alignNone : AddrMode6Align {
+ // The alignment specifier can only be omitted.
+ let ParserMatchClass = AddrMode6AlignNoneAsmOperand;
+}
+
+// Special version of addrmode6 to handle 16-bit alignment encoding for
+// VLD/VST instructions and checking the alignment value.
+def AddrMode6Align16AsmOperand : AsmOperandClass {
+ let Name = "AlignedMemory16";
+ let DiagnosticType = "AlignedMemoryRequires16";
+}
+def addrmode6align16 : AddrMode6Align {
+ // The alignment specifier can only be 16 or omitted.
+ let ParserMatchClass = AddrMode6Align16AsmOperand;
+}
+
+// Special version of addrmode6 to handle 32-bit alignment encoding for
+// VLD/VST instructions and checking the alignment value.
+def AddrMode6Align32AsmOperand : AsmOperandClass {
+ let Name = "AlignedMemory32";
+ let DiagnosticType = "AlignedMemoryRequires32";
+}
+def addrmode6align32 : AddrMode6Align {
+ // The alignment specifier can only be 32 or omitted.
+ let ParserMatchClass = AddrMode6Align32AsmOperand;
+}
+
+// Special version of addrmode6 to handle 64-bit alignment encoding for
+// VLD/VST instructions and checking the alignment value.
+def AddrMode6Align64AsmOperand : AsmOperandClass {
+ let Name = "AlignedMemory64";
+ let DiagnosticType = "AlignedMemoryRequires64";
+}
+def addrmode6align64 : AddrMode6Align {
+ // The alignment specifier can only be 64 or omitted.
+ let ParserMatchClass = AddrMode6Align64AsmOperand;
+}
+
+// Special version of addrmode6 to handle 64-bit or 128-bit alignment encoding
+// for VLD/VST instructions and checking the alignment value.
+def AddrMode6Align64or128AsmOperand : AsmOperandClass {
+ let Name = "AlignedMemory64or128";
+ let DiagnosticType = "AlignedMemoryRequires64or128";
+}
+def addrmode6align64or128 : AddrMode6Align {
+ // The alignment specifier can only be 64, 128 or omitted.
+ let ParserMatchClass = AddrMode6Align64or128AsmOperand;
+}
+
+// Special version of addrmode6 to handle 64-bit, 128-bit or 256-bit alignment
+// encoding for VLD/VST instructions and checking the alignment value.
+def AddrMode6Align64or128or256AsmOperand : AsmOperandClass {
+ let Name = "AlignedMemory64or128or256";
+ let DiagnosticType = "AlignedMemoryRequires64or128or256";
+}
+def addrmode6align64or128or256 : AddrMode6Align {
+ // The alignment specifier can only be 64, 128, 256 or omitted.
+ let ParserMatchClass = AddrMode6Align64or128or256AsmOperand;
+}
+
+// Special version of addrmode6 to handle alignment encoding for VLD-dup
+// instructions, specifically VLD4-dup.
+def addrmode6dup : MemOperand,
+ ComplexPattern<i32, 2, "SelectAddrMode6", [], [SDNPWantParent]>{
+ let PrintMethod = "printAddrMode6Operand";
+ let MIOperandInfo = (ops GPR:$addr, i32imm);
+ let EncoderMethod = "getAddrMode6DupAddressOpValue";
+ // FIXME: This is close, but not quite right. The alignment specifier is
+ // different.
+ let ParserMatchClass = AddrMode6AsmOperand;
+}
+
+// Base class for addrmode6dup with specific alignment restrictions.
+class AddrMode6DupAlign : MemOperand,
+ ComplexPattern<i32, 2, "SelectAddrMode6", [], [SDNPWantParent]>{
+ let PrintMethod = "printAddrMode6Operand";
+ let MIOperandInfo = (ops GPR:$addr, i32imm);
+ let EncoderMethod = "getAddrMode6DupAddressOpValue";
+}
+
+// Special version of addrmode6 to handle no allowed alignment encoding for
+// VLD-dup instruction and checking the alignment is not specified.
+def AddrMode6dupAlignNoneAsmOperand : AsmOperandClass {
+ let Name = "DupAlignedMemoryNone";
+ let DiagnosticType = "DupAlignedMemoryRequiresNone";
+}
+def addrmode6dupalignNone : AddrMode6DupAlign {
+ // The alignment specifier can only be omitted.
+ let ParserMatchClass = AddrMode6dupAlignNoneAsmOperand;
+}
+
+// Special version of addrmode6 to handle 16-bit alignment encoding for VLD-dup
+// instruction and checking the alignment value.
+def AddrMode6dupAlign16AsmOperand : AsmOperandClass {
+ let Name = "DupAlignedMemory16";
+ let DiagnosticType = "DupAlignedMemoryRequires16";
+}
+def addrmode6dupalign16 : AddrMode6DupAlign {
+ // The alignment specifier can only be 16 or omitted.
+ let ParserMatchClass = AddrMode6dupAlign16AsmOperand;
+}
+
+// Special version of addrmode6 to handle 32-bit alignment encoding for VLD-dup
+// instruction and checking the alignment value.
+def AddrMode6dupAlign32AsmOperand : AsmOperandClass {
+ let Name = "DupAlignedMemory32";
+ let DiagnosticType = "DupAlignedMemoryRequires32";
+}
+def addrmode6dupalign32 : AddrMode6DupAlign {
+ // The alignment specifier can only be 32 or omitted.
+ let ParserMatchClass = AddrMode6dupAlign32AsmOperand;
+}
+
+// Special version of addrmode6 to handle 64-bit alignment encoding for VLD
+// instructions and checking the alignment value.
+def AddrMode6dupAlign64AsmOperand : AsmOperandClass {
+ let Name = "DupAlignedMemory64";
+ let DiagnosticType = "DupAlignedMemoryRequires64";
+}
+def addrmode6dupalign64 : AddrMode6DupAlign {
+ // The alignment specifier can only be 64 or omitted.
+ let ParserMatchClass = AddrMode6dupAlign64AsmOperand;
+}
+
+// Special version of addrmode6 to handle 64-bit or 128-bit alignment encoding
+// for VLD instructions and checking the alignment value.
+def AddrMode6dupAlign64or128AsmOperand : AsmOperandClass {
+ let Name = "DupAlignedMemory64or128";
+ let DiagnosticType = "DupAlignedMemoryRequires64or128";
+}
+def addrmode6dupalign64or128 : AddrMode6DupAlign {
+ // The alignment specifier can only be 64, 128 or omitted.
+ let ParserMatchClass = AddrMode6dupAlign64or128AsmOperand;
+}
+
+// addrmodepc := pc + reg
+//
+def addrmodepc : MemOperand,
+ ComplexPattern<i32, 2, "SelectAddrModePC", []> {
+ let PrintMethod = "printAddrModePCOperand";
+ let MIOperandInfo = (ops GPR, i32imm);
+}
+
+// addr_offset_none := reg
+//
+def MemNoOffsetAsmOperand : AsmOperandClass { let Name = "MemNoOffset"; }
+def addr_offset_none : MemOperand,
+ ComplexPattern<i32, 1, "SelectAddrOffsetNone", []> {
+ let PrintMethod = "printAddrMode7Operand";
+ let DecoderMethod = "DecodeAddrMode7Operand";
+ let ParserMatchClass = MemNoOffsetAsmOperand;
+ let MIOperandInfo = (ops GPR:$base);
+}
+
+def nohash_imm : Operand<i32> {
+ let PrintMethod = "printNoHashImmediate";
+}
+
+def CoprocNumAsmOperand : AsmOperandClass {
+ let Name = "CoprocNum";
+ let ParserMethod = "parseCoprocNumOperand";
+}
+def p_imm : Operand<i32> {
+ let PrintMethod = "printPImmediate";
+ let ParserMatchClass = CoprocNumAsmOperand;
+ let DecoderMethod = "DecodeCoprocessor";
+}
+
+def CoprocRegAsmOperand : AsmOperandClass {
+ let Name = "CoprocReg";
+ let ParserMethod = "parseCoprocRegOperand";
+}
+def c_imm : Operand<i32> {
+ let PrintMethod = "printCImmediate";
+ let ParserMatchClass = CoprocRegAsmOperand;
+}
+def CoprocOptionAsmOperand : AsmOperandClass {
+ let Name = "CoprocOption";
+ let ParserMethod = "parseCoprocOptionOperand";
+}
+def coproc_option_imm : Operand<i32> {
+ let PrintMethod = "printCoprocOptionImm";
+ let ParserMatchClass = CoprocOptionAsmOperand;
+}
+
+//===----------------------------------------------------------------------===//
+
+include "ARMInstrFormats.td"
+
+//===----------------------------------------------------------------------===//
+// Multiclass helpers...
+//
+
+/// AsI1_bin_irs - Defines a set of (op r, {mod_imm|r|so_reg}) patterns for a
+/// binop that produces a value.
+let TwoOperandAliasConstraint = "$Rn = $Rd" in
+multiclass AsI1_bin_irs<bits<4> opcod, string opc,
+ InstrItinClass iii, InstrItinClass iir, InstrItinClass iis,
+ PatFrag opnode, bit Commutable = 0> {
+ // The register-immediate version is re-materializable. This is useful
+ // in particular for taking the address of a local.
+ let isReMaterializable = 1 in {
+ def ri : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, mod_imm:$imm), DPFrm,
+ iii, opc, "\t$Rd, $Rn, $imm",
+ [(set GPR:$Rd, (opnode GPR:$Rn, mod_imm:$imm))]>,
+ Sched<[WriteALU, ReadALU]> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<12> imm;
+ let Inst{25} = 1;
+ let Inst{19-16} = Rn;
+ let Inst{15-12} = Rd;
+ let Inst{11-0} = imm;
+ }
+ }
+ def rr : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), DPFrm,
+ iir, opc, "\t$Rd, $Rn, $Rm",
+ [(set GPR:$Rd, (opnode GPR:$Rn, GPR:$Rm))]>,
+ Sched<[WriteALU, ReadALU, ReadALU]> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<4> Rm;
+ let Inst{25} = 0;
+ let isCommutable = Commutable;
+ let Inst{19-16} = Rn;
+ let Inst{15-12} = Rd;
+ let Inst{11-4} = 0b00000000;
+ let Inst{3-0} = Rm;
+ }
+
+ def rsi : AsI1<opcod, (outs GPR:$Rd),
+ (ins GPR:$Rn, so_reg_imm:$shift), DPSoRegImmFrm,
+ iis, opc, "\t$Rd, $Rn, $shift",
+ [(set GPR:$Rd, (opnode GPR:$Rn, so_reg_imm:$shift))]>,
+ Sched<[WriteALUsi, ReadALU]> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<12> shift;
+ let Inst{25} = 0;
+ let Inst{19-16} = Rn;
+ let Inst{15-12} = Rd;
+ let Inst{11-5} = shift{11-5};
+ let Inst{4} = 0;
+ let Inst{3-0} = shift{3-0};
+ }
+
+ def rsr : AsI1<opcod, (outs GPR:$Rd),
+ (ins GPR:$Rn, so_reg_reg:$shift), DPSoRegRegFrm,
+ iis, opc, "\t$Rd, $Rn, $shift",
+ [(set GPR:$Rd, (opnode GPR:$Rn, so_reg_reg:$shift))]>,
+ Sched<[WriteALUsr, ReadALUsr]> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<12> shift;
+ let Inst{25} = 0;
+ let Inst{19-16} = Rn;
+ let Inst{15-12} = Rd;
+ let Inst{11-8} = shift{11-8};
+ let Inst{7} = 0;
+ let Inst{6-5} = shift{6-5};
+ let Inst{4} = 1;
+ let Inst{3-0} = shift{3-0};
+ }
+}
+
+/// AsI1_rbin_irs - Same as AsI1_bin_irs except the order of operands are
+/// reversed. The 'rr' form is only defined for the disassembler; for codegen
+/// it is equivalent to the AsI1_bin_irs counterpart.
+let TwoOperandAliasConstraint = "$Rn = $Rd" in
+multiclass AsI1_rbin_irs<bits<4> opcod, string opc,
+ InstrItinClass iii, InstrItinClass iir, InstrItinClass iis,
+ PatFrag opnode, bit Commutable = 0> {
+ // The register-immediate version is re-materializable. This is useful
+ // in particular for taking the address of a local.
+ let isReMaterializable = 1 in {
+ def ri : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, mod_imm:$imm), DPFrm,
+ iii, opc, "\t$Rd, $Rn, $imm",
+ [(set GPR:$Rd, (opnode mod_imm:$imm, GPR:$Rn))]>,
+ Sched<[WriteALU, ReadALU]> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<12> imm;
+ let Inst{25} = 1;
+ let Inst{19-16} = Rn;
+ let Inst{15-12} = Rd;
+ let Inst{11-0} = imm;
+ }
+ }
+ def rr : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), DPFrm,
+ iir, opc, "\t$Rd, $Rn, $Rm",
+ [/* pattern left blank */]>,
+ Sched<[WriteALU, ReadALU, ReadALU]> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<4> Rm;
+ let Inst{11-4} = 0b00000000;
+ let Inst{25} = 0;
+ let Inst{3-0} = Rm;
+ let Inst{15-12} = Rd;
+ let Inst{19-16} = Rn;
+ }
+
+ def rsi : AsI1<opcod, (outs GPR:$Rd),
+ (ins GPR:$Rn, so_reg_imm:$shift), DPSoRegImmFrm,
+ iis, opc, "\t$Rd, $Rn, $shift",
+ [(set GPR:$Rd, (opnode so_reg_imm:$shift, GPR:$Rn))]>,
+ Sched<[WriteALUsi, ReadALU]> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<12> shift;
+ let Inst{25} = 0;
+ let Inst{19-16} = Rn;
+ let Inst{15-12} = Rd;
+ let Inst{11-5} = shift{11-5};
+ let Inst{4} = 0;
+ let Inst{3-0} = shift{3-0};
+ }
+
+ def rsr : AsI1<opcod, (outs GPR:$Rd),
+ (ins GPR:$Rn, so_reg_reg:$shift), DPSoRegRegFrm,
+ iis, opc, "\t$Rd, $Rn, $shift",
+ [(set GPR:$Rd, (opnode so_reg_reg:$shift, GPR:$Rn))]>,
+ Sched<[WriteALUsr, ReadALUsr]> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<12> shift;
+ let Inst{25} = 0;
+ let Inst{19-16} = Rn;
+ let Inst{15-12} = Rd;
+ let Inst{11-8} = shift{11-8};
+ let Inst{7} = 0;
+ let Inst{6-5} = shift{6-5};
+ let Inst{4} = 1;
+ let Inst{3-0} = shift{3-0};
+ }
+}
+
+/// AsI1_bin_s_irs - Same as AsI1_bin_irs except it sets the 's' bit by default.
+///
+/// These opcodes will be converted to the real non-S opcodes by
+/// AdjustInstrPostInstrSelection after giving them an optional CPSR operand.
+let hasPostISelHook = 1, Defs = [CPSR] in {
+multiclass AsI1_bin_s_irs<InstrItinClass iii, InstrItinClass iir,
+ InstrItinClass iis, PatFrag opnode,
+ bit Commutable = 0> {
+ def ri : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, mod_imm:$imm, pred:$p),
+ 4, iii,
+ [(set GPR:$Rd, CPSR, (opnode GPR:$Rn, mod_imm:$imm))]>,
+ Sched<[WriteALU, ReadALU]>;
+
+ def rr : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, pred:$p),
+ 4, iir,
+ [(set GPR:$Rd, CPSR, (opnode GPR:$Rn, GPR:$Rm))]>,
+ Sched<[WriteALU, ReadALU, ReadALU]> {
+ let isCommutable = Commutable;
+ }
+ def rsi : ARMPseudoInst<(outs GPR:$Rd),
+ (ins GPR:$Rn, so_reg_imm:$shift, pred:$p),
+ 4, iis,
+ [(set GPR:$Rd, CPSR, (opnode GPR:$Rn,
+ so_reg_imm:$shift))]>,
+ Sched<[WriteALUsi, ReadALU]>;
+
+ def rsr : ARMPseudoInst<(outs GPR:$Rd),
+ (ins GPR:$Rn, so_reg_reg:$shift, pred:$p),
+ 4, iis,
+ [(set GPR:$Rd, CPSR, (opnode GPR:$Rn,
+ so_reg_reg:$shift))]>,
+ Sched<[WriteALUSsr, ReadALUsr]>;
+}
+}
+
+/// AsI1_rbin_s_is - Same as AsI1_bin_s_irs, except selection DAG
+/// operands are reversed.
+let hasPostISelHook = 1, Defs = [CPSR] in {
+multiclass AsI1_rbin_s_is<InstrItinClass iii, InstrItinClass iir,
+ InstrItinClass iis, PatFrag opnode,
+ bit Commutable = 0> {
+ def ri : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, mod_imm:$imm, pred:$p),
+ 4, iii,
+ [(set GPR:$Rd, CPSR, (opnode mod_imm:$imm, GPR:$Rn))]>,
+ Sched<[WriteALU, ReadALU]>;
+
+ def rsi : ARMPseudoInst<(outs GPR:$Rd),
+ (ins GPR:$Rn, so_reg_imm:$shift, pred:$p),
+ 4, iis,
+ [(set GPR:$Rd, CPSR, (opnode so_reg_imm:$shift,
+ GPR:$Rn))]>,
+ Sched<[WriteALUsi, ReadALU]>;
+
+ def rsr : ARMPseudoInst<(outs GPR:$Rd),
+ (ins GPR:$Rn, so_reg_reg:$shift, pred:$p),
+ 4, iis,
+ [(set GPR:$Rd, CPSR, (opnode so_reg_reg:$shift,
+ GPR:$Rn))]>,
+ Sched<[WriteALUSsr, ReadALUsr]>;
+}
+}
+
+/// AI1_cmp_irs - Defines a set of (op r, {mod_imm|r|so_reg}) cmp / test
+/// patterns. Similar to AsI1_bin_irs except the instruction does not produce
+/// a explicit result, only implicitly set CPSR.
+let isCompare = 1, Defs = [CPSR] in {
+multiclass AI1_cmp_irs<bits<4> opcod, string opc,
+ InstrItinClass iii, InstrItinClass iir, InstrItinClass iis,
+ PatFrag opnode, bit Commutable = 0,
+ string rrDecoderMethod = ""> {
+ def ri : AI1<opcod, (outs), (ins GPR:$Rn, mod_imm:$imm), DPFrm, iii,
+ opc, "\t$Rn, $imm",
+ [(opnode GPR:$Rn, mod_imm:$imm)]>,
+ Sched<[WriteCMP, ReadALU]> {
+ bits<4> Rn;
+ bits<12> imm;
+ let Inst{25} = 1;
+ let Inst{20} = 1;
+ let Inst{19-16} = Rn;
+ let Inst{15-12} = 0b0000;
+ let Inst{11-0} = imm;
+
+ let Unpredictable{15-12} = 0b1111;
+ }
+ def rr : AI1<opcod, (outs), (ins GPR:$Rn, GPR:$Rm), DPFrm, iir,
+ opc, "\t$Rn, $Rm",
+ [(opnode GPR:$Rn, GPR:$Rm)]>,
+ Sched<[WriteCMP, ReadALU, ReadALU]> {
+ bits<4> Rn;
+ bits<4> Rm;
+ let isCommutable = Commutable;
+ let Inst{25} = 0;
+ let Inst{20} = 1;
+ let Inst{19-16} = Rn;
+ let Inst{15-12} = 0b0000;
+ let Inst{11-4} = 0b00000000;
+ let Inst{3-0} = Rm;
+ let DecoderMethod = rrDecoderMethod;
+
+ let Unpredictable{15-12} = 0b1111;
+ }
+ def rsi : AI1<opcod, (outs),
+ (ins GPR:$Rn, so_reg_imm:$shift), DPSoRegImmFrm, iis,
+ opc, "\t$Rn, $shift",
+ [(opnode GPR:$Rn, so_reg_imm:$shift)]>,
+ Sched<[WriteCMPsi, ReadALU]> {
+ bits<4> Rn;
+ bits<12> shift;
+ let Inst{25} = 0;
+ let Inst{20} = 1;
+ let Inst{19-16} = Rn;
+ let Inst{15-12} = 0b0000;
+ let Inst{11-5} = shift{11-5};
+ let Inst{4} = 0;
+ let Inst{3-0} = shift{3-0};
+
+ let Unpredictable{15-12} = 0b1111;
+ }
+ def rsr : AI1<opcod, (outs),
+ (ins GPRnopc:$Rn, so_reg_reg:$shift), DPSoRegRegFrm, iis,
+ opc, "\t$Rn, $shift",
+ [(opnode GPRnopc:$Rn, so_reg_reg:$shift)]>,
+ Sched<[WriteCMPsr, ReadALU]> {
+ bits<4> Rn;
+ bits<12> shift;
+ let Inst{25} = 0;
+ let Inst{20} = 1;
+ let Inst{19-16} = Rn;
+ let Inst{15-12} = 0b0000;
+ let Inst{11-8} = shift{11-8};
+ let Inst{7} = 0;
+ let Inst{6-5} = shift{6-5};
+ let Inst{4} = 1;
+ let Inst{3-0} = shift{3-0};
+
+ let Unpredictable{15-12} = 0b1111;
+ }
+
+}
+}
+
+/// AI_ext_rrot - A unary operation with two forms: one whose operand is a
+/// register and one whose operand is a register rotated by 8/16/24.
+/// FIXME: Remove the 'r' variant. Its rot_imm is zero.
+class AI_ext_rrot<bits<8> opcod, string opc, PatFrag opnode>
+ : AExtI<opcod, (outs GPRnopc:$Rd), (ins GPRnopc:$Rm, rot_imm:$rot),
+ IIC_iEXTr, opc, "\t$Rd, $Rm$rot",
+ [(set GPRnopc:$Rd, (opnode (rotr GPRnopc:$Rm, rot_imm:$rot)))]>,
+ Requires<[IsARM, HasV6]>, Sched<[WriteALUsi]> {
+ bits<4> Rd;
+ bits<4> Rm;
+ bits<2> rot;
+ let Inst{19-16} = 0b1111;
+ let Inst{15-12} = Rd;
+ let Inst{11-10} = rot;
+ let Inst{3-0} = Rm;
+}
+
+class AI_ext_rrot_np<bits<8> opcod, string opc>
+ : AExtI<opcod, (outs GPRnopc:$Rd), (ins GPRnopc:$Rm, rot_imm:$rot),
+ IIC_iEXTr, opc, "\t$Rd, $Rm$rot", []>,
+ Requires<[IsARM, HasV6]>, Sched<[WriteALUsi]> {
+ bits<2> rot;
+ let Inst{19-16} = 0b1111;
+ let Inst{11-10} = rot;
+ }
+
+/// AI_exta_rrot - A binary operation with two forms: one whose operand is a
+/// register and one whose operand is a register rotated by 8/16/24.
+class AI_exta_rrot<bits<8> opcod, string opc, PatFrag opnode>
+ : AExtI<opcod, (outs GPRnopc:$Rd), (ins GPR:$Rn, GPRnopc:$Rm, rot_imm:$rot),
+ IIC_iEXTAr, opc, "\t$Rd, $Rn, $Rm$rot",
+ [(set GPRnopc:$Rd, (opnode GPR:$Rn,
+ (rotr GPRnopc:$Rm, rot_imm:$rot)))]>,
+ Requires<[IsARM, HasV6]>, Sched<[WriteALUsr]> {
+ bits<4> Rd;
+ bits<4> Rm;
+ bits<4> Rn;
+ bits<2> rot;
+ let Inst{19-16} = Rn;
+ let Inst{15-12} = Rd;
+ let Inst{11-10} = rot;
+ let Inst{9-4} = 0b000111;
+ let Inst{3-0} = Rm;
+}
+
+class AI_exta_rrot_np<bits<8> opcod, string opc>
+ : AExtI<opcod, (outs GPRnopc:$Rd), (ins GPR:$Rn, GPRnopc:$Rm, rot_imm:$rot),
+ IIC_iEXTAr, opc, "\t$Rd, $Rn, $Rm$rot", []>,
+ Requires<[IsARM, HasV6]>, Sched<[WriteALUsr]> {
+ bits<4> Rn;
+ bits<2> rot;
+ let Inst{19-16} = Rn;
+ let Inst{11-10} = rot;
+}
+
+/// AI1_adde_sube_irs - Define instructions and patterns for adde and sube.
+let TwoOperandAliasConstraint = "$Rn = $Rd" in
+multiclass AI1_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode,
+ bit Commutable = 0> {
+ let hasPostISelHook = 1, Defs = [CPSR], Uses = [CPSR] in {
+ def ri : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, mod_imm:$imm),
+ DPFrm, IIC_iALUi, opc, "\t$Rd, $Rn, $imm",
+ [(set GPR:$Rd, CPSR, (opnode GPR:$Rn, mod_imm:$imm, CPSR))]>,
+ Requires<[IsARM]>,
+ Sched<[WriteALU, ReadALU]> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<12> imm;
+ let Inst{25} = 1;
+ let Inst{15-12} = Rd;
+ let Inst{19-16} = Rn;
+ let Inst{11-0} = imm;
+ }
+ def rr : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
+ DPFrm, IIC_iALUr, opc, "\t$Rd, $Rn, $Rm",
+ [(set GPR:$Rd, CPSR, (opnode GPR:$Rn, GPR:$Rm, CPSR))]>,
+ Requires<[IsARM]>,
+ Sched<[WriteALU, ReadALU, ReadALU]> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<4> Rm;
+ let Inst{11-4} = 0b00000000;
+ let Inst{25} = 0;
+ let isCommutable = Commutable;
+ let Inst{3-0} = Rm;
+ let Inst{15-12} = Rd;
+ let Inst{19-16} = Rn;
+ }
+ def rsi : AsI1<opcod, (outs GPR:$Rd),
+ (ins GPR:$Rn, so_reg_imm:$shift),
+ DPSoRegImmFrm, IIC_iALUsr, opc, "\t$Rd, $Rn, $shift",
+ [(set GPR:$Rd, CPSR, (opnode GPR:$Rn, so_reg_imm:$shift, CPSR))]>,
+ Requires<[IsARM]>,
+ Sched<[WriteALUsi, ReadALU]> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<12> shift;
+ let Inst{25} = 0;
+ let Inst{19-16} = Rn;
+ let Inst{15-12} = Rd;
+ let Inst{11-5} = shift{11-5};
+ let Inst{4} = 0;
+ let Inst{3-0} = shift{3-0};
+ }
+ def rsr : AsI1<opcod, (outs GPRnopc:$Rd),
+ (ins GPRnopc:$Rn, so_reg_reg:$shift),
+ DPSoRegRegFrm, IIC_iALUsr, opc, "\t$Rd, $Rn, $shift",
+ [(set GPRnopc:$Rd, CPSR,
+ (opnode GPRnopc:$Rn, so_reg_reg:$shift, CPSR))]>,
+ Requires<[IsARM]>,
+ Sched<[WriteALUsr, ReadALUsr]> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<12> shift;
+ let Inst{25} = 0;
+ let Inst{19-16} = Rn;
+ let Inst{15-12} = Rd;
+ let Inst{11-8} = shift{11-8};
+ let Inst{7} = 0;
+ let Inst{6-5} = shift{6-5};
+ let Inst{4} = 1;
+ let Inst{3-0} = shift{3-0};
+ }
+ }
+}
+
+/// AI1_rsc_irs - Define instructions and patterns for rsc
+let TwoOperandAliasConstraint = "$Rn = $Rd" in
+multiclass AI1_rsc_irs<bits<4> opcod, string opc, PatFrag opnode> {
+ let hasPostISelHook = 1, Defs = [CPSR], Uses = [CPSR] in {
+ def ri : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, mod_imm:$imm),
+ DPFrm, IIC_iALUi, opc, "\t$Rd, $Rn, $imm",
+ [(set GPR:$Rd, CPSR, (opnode mod_imm:$imm, GPR:$Rn, CPSR))]>,
+ Requires<[IsARM]>,
+ Sched<[WriteALU, ReadALU]> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<12> imm;
+ let Inst{25} = 1;
+ let Inst{15-12} = Rd;
+ let Inst{19-16} = Rn;
+ let Inst{11-0} = imm;
+ }
+ def rr : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
+ DPFrm, IIC_iALUr, opc, "\t$Rd, $Rn, $Rm",
+ [/* pattern left blank */]>,
+ Sched<[WriteALU, ReadALU, ReadALU]> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<4> Rm;
+ let Inst{11-4} = 0b00000000;
+ let Inst{25} = 0;
+ let Inst{3-0} = Rm;
+ let Inst{15-12} = Rd;
+ let Inst{19-16} = Rn;
+ }
+ def rsi : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_reg_imm:$shift),
+ DPSoRegImmFrm, IIC_iALUsr, opc, "\t$Rd, $Rn, $shift",
+ [(set GPR:$Rd, CPSR, (opnode so_reg_imm:$shift, GPR:$Rn, CPSR))]>,
+ Requires<[IsARM]>,
+ Sched<[WriteALUsi, ReadALU]> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<12> shift;
+ let Inst{25} = 0;
+ let Inst{19-16} = Rn;
+ let Inst{15-12} = Rd;
+ let Inst{11-5} = shift{11-5};
+ let Inst{4} = 0;
+ let Inst{3-0} = shift{3-0};
+ }
+ def rsr : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_reg_reg:$shift),
+ DPSoRegRegFrm, IIC_iALUsr, opc, "\t$Rd, $Rn, $shift",
+ [(set GPR:$Rd, CPSR, (opnode so_reg_reg:$shift, GPR:$Rn, CPSR))]>,
+ Requires<[IsARM]>,
+ Sched<[WriteALUsr, ReadALUsr]> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<12> shift;
+ let Inst{25} = 0;
+ let Inst{19-16} = Rn;
+ let Inst{15-12} = Rd;
+ let Inst{11-8} = shift{11-8};
+ let Inst{7} = 0;
+ let Inst{6-5} = shift{6-5};
+ let Inst{4} = 1;
+ let Inst{3-0} = shift{3-0};
+ }
+ }
+}
+
+let canFoldAsLoad = 1, isReMaterializable = 1 in {
+multiclass AI_ldr1<bit isByte, string opc, InstrItinClass iii,
+ InstrItinClass iir, PatFrag opnode> {
+ // Note: We use the complex addrmode_imm12 rather than just an input
+ // GPR and a constrained immediate so that we can use this to match
+ // frame index references and avoid matching constant pool references.
+ def i12: AI2ldst<0b010, 1, isByte, (outs GPR:$Rt), (ins addrmode_imm12:$addr),
+ AddrMode_i12, LdFrm, iii, opc, "\t$Rt, $addr",
+ [(set GPR:$Rt, (opnode addrmode_imm12:$addr))]> {
+ bits<4> Rt;
+ bits<17> addr;
+ let Inst{23} = addr{12}; // U (add = ('U' == 1))
+ let Inst{19-16} = addr{16-13}; // Rn
+ let Inst{15-12} = Rt;
+ let Inst{11-0} = addr{11-0}; // imm12
+ }
+ def rs : AI2ldst<0b011, 1, isByte, (outs GPR:$Rt), (ins ldst_so_reg:$shift),
+ AddrModeNone, LdFrm, iir, opc, "\t$Rt, $shift",
+ [(set GPR:$Rt, (opnode ldst_so_reg:$shift))]> {
+ bits<4> Rt;
+ bits<17> shift;
+ let shift{4} = 0; // Inst{4} = 0
+ let Inst{23} = shift{12}; // U (add = ('U' == 1))
+ let Inst{19-16} = shift{16-13}; // Rn
+ let Inst{15-12} = Rt;
+ let Inst{11-0} = shift{11-0};
+ }
+}
+}
+
+let canFoldAsLoad = 1, isReMaterializable = 1 in {
+multiclass AI_ldr1nopc<bit isByte, string opc, InstrItinClass iii,
+ InstrItinClass iir, PatFrag opnode> {
+ // Note: We use the complex addrmode_imm12 rather than just an input
+ // GPR and a constrained immediate so that we can use this to match
+ // frame index references and avoid matching constant pool references.
+ def i12: AI2ldst<0b010, 1, isByte, (outs GPRnopc:$Rt),
+ (ins addrmode_imm12:$addr),
+ AddrMode_i12, LdFrm, iii, opc, "\t$Rt, $addr",
+ [(set GPRnopc:$Rt, (opnode addrmode_imm12:$addr))]> {
+ bits<4> Rt;
+ bits<17> addr;
+ let Inst{23} = addr{12}; // U (add = ('U' == 1))
+ let Inst{19-16} = addr{16-13}; // Rn
+ let Inst{15-12} = Rt;
+ let Inst{11-0} = addr{11-0}; // imm12
+ }
+ def rs : AI2ldst<0b011, 1, isByte, (outs GPRnopc:$Rt),
+ (ins ldst_so_reg:$shift),
+ AddrModeNone, LdFrm, iir, opc, "\t$Rt, $shift",
+ [(set GPRnopc:$Rt, (opnode ldst_so_reg:$shift))]> {
+ bits<4> Rt;
+ bits<17> shift;
+ let shift{4} = 0; // Inst{4} = 0
+ let Inst{23} = shift{12}; // U (add = ('U' == 1))
+ let Inst{19-16} = shift{16-13}; // Rn
+ let Inst{15-12} = Rt;
+ let Inst{11-0} = shift{11-0};
+ }
+}
+}
+
+
+multiclass AI_str1<bit isByte, string opc, InstrItinClass iii,
+ InstrItinClass iir, PatFrag opnode> {
+ // Note: We use the complex addrmode_imm12 rather than just an input
+ // GPR and a constrained immediate so that we can use this to match
+ // frame index references and avoid matching constant pool references.
+ def i12 : AI2ldst<0b010, 0, isByte, (outs),
+ (ins GPR:$Rt, addrmode_imm12:$addr),
+ AddrMode_i12, StFrm, iii, opc, "\t$Rt, $addr",
+ [(opnode GPR:$Rt, addrmode_imm12:$addr)]> {
+ bits<4> Rt;
+ bits<17> addr;
+ let Inst{23} = addr{12}; // U (add = ('U' == 1))
+ let Inst{19-16} = addr{16-13}; // Rn
+ let Inst{15-12} = Rt;
+ let Inst{11-0} = addr{11-0}; // imm12
+ }
+ def rs : AI2ldst<0b011, 0, isByte, (outs), (ins GPR:$Rt, ldst_so_reg:$shift),
+ AddrModeNone, StFrm, iir, opc, "\t$Rt, $shift",
+ [(opnode GPR:$Rt, ldst_so_reg:$shift)]> {
+ bits<4> Rt;
+ bits<17> shift;
+ let shift{4} = 0; // Inst{4} = 0
+ let Inst{23} = shift{12}; // U (add = ('U' == 1))
+ let Inst{19-16} = shift{16-13}; // Rn
+ let Inst{15-12} = Rt;
+ let Inst{11-0} = shift{11-0};
+ }
+}
+
+multiclass AI_str1nopc<bit isByte, string opc, InstrItinClass iii,
+ InstrItinClass iir, PatFrag opnode> {
+ // Note: We use the complex addrmode_imm12 rather than just an input
+ // GPR and a constrained immediate so that we can use this to match
+ // frame index references and avoid matching constant pool references.
+ def i12 : AI2ldst<0b010, 0, isByte, (outs),
+ (ins GPRnopc:$Rt, addrmode_imm12:$addr),
+ AddrMode_i12, StFrm, iii, opc, "\t$Rt, $addr",
+ [(opnode GPRnopc:$Rt, addrmode_imm12:$addr)]> {
+ bits<4> Rt;
+ bits<17> addr;
+ let Inst{23} = addr{12}; // U (add = ('U' == 1))
+ let Inst{19-16} = addr{16-13}; // Rn
+ let Inst{15-12} = Rt;
+ let Inst{11-0} = addr{11-0}; // imm12
+ }
+ def rs : AI2ldst<0b011, 0, isByte, (outs),
+ (ins GPRnopc:$Rt, ldst_so_reg:$shift),
+ AddrModeNone, StFrm, iir, opc, "\t$Rt, $shift",
+ [(opnode GPRnopc:$Rt, ldst_so_reg:$shift)]> {
+ bits<4> Rt;
+ bits<17> shift;
+ let shift{4} = 0; // Inst{4} = 0
+ let Inst{23} = shift{12}; // U (add = ('U' == 1))
+ let Inst{19-16} = shift{16-13}; // Rn
+ let Inst{15-12} = Rt;
+ let Inst{11-0} = shift{11-0};
+ }
+}
+
+
+//===----------------------------------------------------------------------===//
+// Instructions
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Miscellaneous Instructions.
+//
+
+/// CONSTPOOL_ENTRY - This instruction represents a floating constant pool in
+/// the function. The first operand is the ID# for this instruction, the second
+/// is the index into the MachineConstantPool that this is, the third is the
+/// size in bytes of this constant pool entry.
+let hasSideEffects = 0, isNotDuplicable = 1 in
+def CONSTPOOL_ENTRY :
+PseudoInst<(outs), (ins cpinst_operand:$instid, cpinst_operand:$cpidx,
+ i32imm:$size), NoItinerary, []>;
+
+/// A jumptable consisting of direct 32-bit addresses of the destination basic
+/// blocks (either absolute, or relative to the start of the jump-table in PIC
+/// mode). Used mostly in ARM and Thumb-1 modes.
+def JUMPTABLE_ADDRS :
+PseudoInst<(outs), (ins cpinst_operand:$instid, cpinst_operand:$cpidx,
+ i32imm:$size), NoItinerary, []>;
+
+/// A jumptable consisting of 32-bit jump instructions. Used for Thumb-2 tables
+/// that cannot be optimised to use TBB or TBH.
+def JUMPTABLE_INSTS :
+PseudoInst<(outs), (ins cpinst_operand:$instid, cpinst_operand:$cpidx,
+ i32imm:$size), NoItinerary, []>;
+
+/// A jumptable consisting of 8-bit unsigned integers representing offsets from
+/// a TBB instruction.
+def JUMPTABLE_TBB :
+PseudoInst<(outs), (ins cpinst_operand:$instid, cpinst_operand:$cpidx,
+ i32imm:$size), NoItinerary, []>;
+
+/// A jumptable consisting of 16-bit unsigned integers representing offsets from
+/// a TBH instruction.
+def JUMPTABLE_TBH :
+PseudoInst<(outs), (ins cpinst_operand:$instid, cpinst_operand:$cpidx,
+ i32imm:$size), NoItinerary, []>;
+
+
+// FIXME: Marking these as hasSideEffects is necessary to prevent machine DCE
+// from removing one half of the matched pairs. That breaks PEI, which assumes
+// these will always be in pairs, and asserts if it finds otherwise. Better way?
+let Defs = [SP], Uses = [SP], hasSideEffects = 1 in {
+def ADJCALLSTACKUP :
+PseudoInst<(outs), (ins i32imm:$amt1, i32imm:$amt2, pred:$p), NoItinerary,
+ [(ARMcallseq_end timm:$amt1, timm:$amt2)]>;
+
+def ADJCALLSTACKDOWN :
+PseudoInst<(outs), (ins i32imm:$amt, pred:$p), NoItinerary,
+ [(ARMcallseq_start timm:$amt)]>;
+}
+
+def HINT : AI<(outs), (ins imm0_239:$imm), MiscFrm, NoItinerary,
+ "hint", "\t$imm", [(int_arm_hint imm0_239:$imm)]>,
+ Requires<[IsARM, HasV6]> {
+ bits<8> imm;
+ let Inst{27-8} = 0b00110010000011110000;
+ let Inst{7-0} = imm;
+}
+
+def : InstAlias<"nop$p", (HINT 0, pred:$p)>, Requires<[IsARM, HasV6K]>;
+def : InstAlias<"yield$p", (HINT 1, pred:$p)>, Requires<[IsARM, HasV6K]>;
+def : InstAlias<"wfe$p", (HINT 2, pred:$p)>, Requires<[IsARM, HasV6K]>;
+def : InstAlias<"wfi$p", (HINT 3, pred:$p)>, Requires<[IsARM, HasV6K]>;
+def : InstAlias<"sev$p", (HINT 4, pred:$p)>, Requires<[IsARM, HasV6K]>;
+def : InstAlias<"sevl$p", (HINT 5, pred:$p)>, Requires<[IsARM, HasV8]>;
+
+def SEL : AI<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), DPFrm, NoItinerary, "sel",
+ "\t$Rd, $Rn, $Rm", []>, Requires<[IsARM, HasV6]> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<4> Rm;
+ let Inst{3-0} = Rm;
+ let Inst{15-12} = Rd;
+ let Inst{19-16} = Rn;
+ let Inst{27-20} = 0b01101000;
+ let Inst{7-4} = 0b1011;
+ let Inst{11-8} = 0b1111;
+ let Unpredictable{11-8} = 0b1111;
+}
+
+// The 16-bit operand $val can be used by a debugger to store more information
+// about the breakpoint.
+def BKPT : AInoP<(outs), (ins imm0_65535:$val), MiscFrm, NoItinerary,
+ "bkpt", "\t$val", []>, Requires<[IsARM]> {
+ bits<16> val;
+ let Inst{3-0} = val{3-0};
+ let Inst{19-8} = val{15-4};
+ let Inst{27-20} = 0b00010010;
+ let Inst{31-28} = 0xe; // AL
+ let Inst{7-4} = 0b0111;
+}
+// default immediate for breakpoint mnemonic
+def : InstAlias<"bkpt", (BKPT 0)>, Requires<[IsARM]>;
+
+def HLT : AInoP<(outs), (ins imm0_65535:$val), MiscFrm, NoItinerary,
+ "hlt", "\t$val", []>, Requires<[IsARM, HasV8]> {
+ bits<16> val;
+ let Inst{3-0} = val{3-0};
+ let Inst{19-8} = val{15-4};
+ let Inst{27-20} = 0b00010000;
+ let Inst{31-28} = 0xe; // AL
+ let Inst{7-4} = 0b0111;
+}
+
+// Change Processor State
+// FIXME: We should use InstAlias to handle the optional operands.
+class CPS<dag iops, string asm_ops>
+ : AXI<(outs), iops, MiscFrm, NoItinerary, !strconcat("cps", asm_ops),
+ []>, Requires<[IsARM]> {
+ bits<2> imod;
+ bits<3> iflags;
+ bits<5> mode;
+ bit M;
+
+ let Inst{31-28} = 0b1111;
+ let Inst{27-20} = 0b00010000;
+ let Inst{19-18} = imod;
+ let Inst{17} = M; // Enabled if mode is set;
+ let Inst{16-9} = 0b00000000;
+ let Inst{8-6} = iflags;
+ let Inst{5} = 0;
+ let Inst{4-0} = mode;
+}
+
+let DecoderMethod = "DecodeCPSInstruction" in {
+let M = 1 in
+ def CPS3p : CPS<(ins imod_op:$imod, iflags_op:$iflags, imm0_31:$mode),
+ "$imod\t$iflags, $mode">;
+let mode = 0, M = 0 in
+ def CPS2p : CPS<(ins imod_op:$imod, iflags_op:$iflags), "$imod\t$iflags">;
+
+let imod = 0, iflags = 0, M = 1 in
+ def CPS1p : CPS<(ins imm0_31:$mode), "\t$mode">;
+}
+
+// Preload signals the memory system of possible future data/instruction access.
+multiclass APreLoad<bits<1> read, bits<1> data, string opc> {
+
+ def i12 : AXIM<(outs), (ins addrmode_imm12:$addr), AddrMode_i12, MiscFrm,
+ IIC_Preload, !strconcat(opc, "\t$addr"),
+ [(ARMPreload addrmode_imm12:$addr, (i32 read), (i32 data))]>,
+ Sched<[WritePreLd]> {
+ bits<4> Rt;
+ bits<17> addr;
+ let Inst{31-26} = 0b111101;
+ let Inst{25} = 0; // 0 for immediate form
+ let Inst{24} = data;
+ let Inst{23} = addr{12}; // U (add = ('U' == 1))
+ let Inst{22} = read;
+ let Inst{21-20} = 0b01;
+ let Inst{19-16} = addr{16-13}; // Rn
+ let Inst{15-12} = 0b1111;
+ let Inst{11-0} = addr{11-0}; // imm12
+ }
+
+ def rs : AXI<(outs), (ins ldst_so_reg:$shift), MiscFrm, IIC_Preload,
+ !strconcat(opc, "\t$shift"),
+ [(ARMPreload ldst_so_reg:$shift, (i32 read), (i32 data))]>,
+ Sched<[WritePreLd]> {
+ bits<17> shift;
+ let Inst{31-26} = 0b111101;
+ let Inst{25} = 1; // 1 for register form
+ let Inst{24} = data;
+ let Inst{23} = shift{12}; // U (add = ('U' == 1))
+ let Inst{22} = read;
+ let Inst{21-20} = 0b01;
+ let Inst{19-16} = shift{16-13}; // Rn
+ let Inst{15-12} = 0b1111;
+ let Inst{11-0} = shift{11-0};
+ let Inst{4} = 0;
+ }
+}
+
+defm PLD : APreLoad<1, 1, "pld">, Requires<[IsARM]>;
+defm PLDW : APreLoad<0, 1, "pldw">, Requires<[IsARM,HasV7,HasMP]>;
+defm PLI : APreLoad<1, 0, "pli">, Requires<[IsARM,HasV7]>;
+
+def SETEND : AXI<(outs), (ins setend_op:$end), MiscFrm, NoItinerary,
+ "setend\t$end", []>, Requires<[IsARM]>, Deprecated<HasV8Ops> {
+ bits<1> end;
+ let Inst{31-10} = 0b1111000100000001000000;
+ let Inst{9} = end;
+ let Inst{8-0} = 0;
+}
+
+def DBG : AI<(outs), (ins imm0_15:$opt), MiscFrm, NoItinerary, "dbg", "\t$opt",
+ [(int_arm_dbg imm0_15:$opt)]>, Requires<[IsARM, HasV7]> {
+ bits<4> opt;
+ let Inst{27-4} = 0b001100100000111100001111;
+ let Inst{3-0} = opt;
+}
+
+// A8.8.247 UDF - Undefined (Encoding A1)
+def UDF : AInoP<(outs), (ins imm0_65535:$imm16), MiscFrm, NoItinerary,
+ "udf", "\t$imm16", [(int_arm_undefined imm0_65535:$imm16)]> {
+ bits<16> imm16;
+ let Inst{31-28} = 0b1110; // AL
+ let Inst{27-25} = 0b011;
+ let Inst{24-20} = 0b11111;
+ let Inst{19-8} = imm16{15-4};
+ let Inst{7-4} = 0b1111;
+ let Inst{3-0} = imm16{3-0};
+}
+
+/*
+ * A5.4 Permanently UNDEFINED instructions.
+ *
+ * For most targets use UDF #65006, for which the OS will generate SIGTRAP.
+ * Other UDF encodings generate SIGILL.
+ *
+ * NaCl's OS instead chooses an ARM UDF encoding that's also a UDF in Thumb.
+ * Encoding A1:
+ * 1110 0111 1111 iiii iiii iiii 1111 iiii
+ * Encoding T1:
+ * 1101 1110 iiii iiii
+ * It uses the following encoding:
+ * 1110 0111 1111 1110 1101 1110 1111 0000
+ * - In ARM: UDF #60896;
+ * - In Thumb: UDF #254 followed by a branch-to-self.
+ */
+let isBarrier = 1, isTerminator = 1 in
+def TRAPNaCl : AXI<(outs), (ins), MiscFrm, NoItinerary,
+ "trap", [(trap)]>,
+ Requires<[IsARM,UseNaClTrap]> {
+ let Inst = 0xe7fedef0;
+}
+let isBarrier = 1, isTerminator = 1 in
+def TRAP : AXI<(outs), (ins), MiscFrm, NoItinerary,
+ "trap", [(trap)]>,
+ Requires<[IsARM,DontUseNaClTrap]> {
+ let Inst = 0xe7ffdefe;
+}
+
+// Address computation and loads and stores in PIC mode.
+let isNotDuplicable = 1 in {
+def PICADD : ARMPseudoInst<(outs GPR:$dst), (ins GPR:$a, pclabel:$cp, pred:$p),
+ 4, IIC_iALUr,
+ [(set GPR:$dst, (ARMpic_add GPR:$a, imm:$cp))]>,
+ Sched<[WriteALU, ReadALU]>;
+
+let AddedComplexity = 10 in {
+def PICLDR : ARMPseudoInst<(outs GPR:$dst), (ins addrmodepc:$addr, pred:$p),
+ 4, IIC_iLoad_r,
+ [(set GPR:$dst, (load addrmodepc:$addr))]>;
+
+def PICLDRH : ARMPseudoInst<(outs GPR:$Rt), (ins addrmodepc:$addr, pred:$p),
+ 4, IIC_iLoad_bh_r,
+ [(set GPR:$Rt, (zextloadi16 addrmodepc:$addr))]>;
+
+def PICLDRB : ARMPseudoInst<(outs GPR:$Rt), (ins addrmodepc:$addr, pred:$p),
+ 4, IIC_iLoad_bh_r,
+ [(set GPR:$Rt, (zextloadi8 addrmodepc:$addr))]>;
+
+def PICLDRSH : ARMPseudoInst<(outs GPR:$Rt), (ins addrmodepc:$addr, pred:$p),
+ 4, IIC_iLoad_bh_r,
+ [(set GPR:$Rt, (sextloadi16 addrmodepc:$addr))]>;
+
+def PICLDRSB : ARMPseudoInst<(outs GPR:$Rt), (ins addrmodepc:$addr, pred:$p),
+ 4, IIC_iLoad_bh_r,
+ [(set GPR:$Rt, (sextloadi8 addrmodepc:$addr))]>;
+}
+let AddedComplexity = 10 in {
+def PICSTR : ARMPseudoInst<(outs), (ins GPR:$src, addrmodepc:$addr, pred:$p),
+ 4, IIC_iStore_r, [(store GPR:$src, addrmodepc:$addr)]>;
+
+def PICSTRH : ARMPseudoInst<(outs), (ins GPR:$src, addrmodepc:$addr, pred:$p),
+ 4, IIC_iStore_bh_r, [(truncstorei16 GPR:$src,
+ addrmodepc:$addr)]>;
+
+def PICSTRB : ARMPseudoInst<(outs), (ins GPR:$src, addrmodepc:$addr, pred:$p),
+ 4, IIC_iStore_bh_r, [(truncstorei8 GPR:$src, addrmodepc:$addr)]>;
+}
+} // isNotDuplicable = 1
+
+
+// LEApcrel - Load a pc-relative address into a register without offending the
+// assembler.
+let hasSideEffects = 0, isReMaterializable = 1 in
+// The 'adr' mnemonic encodes differently if the label is before or after
+// the instruction. The {24-21} opcode bits are set by the fixup, as we don't
+// know until then which form of the instruction will be used.
+def ADR : AI1<{0,?,?,0}, (outs GPR:$Rd), (ins adrlabel:$label),
+ MiscFrm, IIC_iALUi, "adr", "\t$Rd, $label", []>,
+ Sched<[WriteALU, ReadALU]> {
+ bits<4> Rd;
+ bits<14> label;
+ let Inst{27-25} = 0b001;
+ let Inst{24} = 0;
+ let Inst{23-22} = label{13-12};
+ let Inst{21} = 0;
+ let Inst{20} = 0;
+ let Inst{19-16} = 0b1111;
+ let Inst{15-12} = Rd;
+ let Inst{11-0} = label{11-0};
+}
+
+let hasSideEffects = 1 in {
+def LEApcrel : ARMPseudoInst<(outs GPR:$Rd), (ins i32imm:$label, pred:$p),
+ 4, IIC_iALUi, []>, Sched<[WriteALU, ReadALU]>;
+
+def LEApcrelJT : ARMPseudoInst<(outs GPR:$Rd),
+ (ins i32imm:$label, pred:$p),
+ 4, IIC_iALUi, []>, Sched<[WriteALU, ReadALU]>;
+}
+
+//===----------------------------------------------------------------------===//
+// Control Flow Instructions.
+//
+
+let isReturn = 1, isTerminator = 1, isBarrier = 1 in {
+ // ARMV4T and above
+ def BX_RET : AI<(outs), (ins), BrMiscFrm, IIC_Br,
+ "bx", "\tlr", [(ARMretflag)]>,
+ Requires<[IsARM, HasV4T]>, Sched<[WriteBr]> {
+ let Inst{27-0} = 0b0001001011111111111100011110;
+ }
+
+ // ARMV4 only
+ def MOVPCLR : AI<(outs), (ins), BrMiscFrm, IIC_Br,
+ "mov", "\tpc, lr", [(ARMretflag)]>,
+ Requires<[IsARM, NoV4T]>, Sched<[WriteBr]> {
+ let Inst{27-0} = 0b0001101000001111000000001110;
+ }
+
+ // Exception return: N.b. doesn't set CPSR as far as we're concerned (it sets
+ // the user-space one).
+ def SUBS_PC_LR : ARMPseudoInst<(outs), (ins i32imm:$offset, pred:$p),
+ 4, IIC_Br,
+ [(ARMintretflag imm:$offset)]>;
+}
+
+// Indirect branches
+let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
+ // ARMV4T and above
+ def BX : AXI<(outs), (ins GPR:$dst), BrMiscFrm, IIC_Br, "bx\t$dst",
+ [(brind GPR:$dst)]>,
+ Requires<[IsARM, HasV4T]>, Sched<[WriteBr]> {
+ bits<4> dst;
+ let Inst{31-4} = 0b1110000100101111111111110001;
+ let Inst{3-0} = dst;
+ }
+
+ def BX_pred : AI<(outs), (ins GPR:$dst), BrMiscFrm, IIC_Br,
+ "bx", "\t$dst", [/* pattern left blank */]>,
+ Requires<[IsARM, HasV4T]>, Sched<[WriteBr]> {
+ bits<4> dst;
+ let Inst{27-4} = 0b000100101111111111110001;
+ let Inst{3-0} = dst;
+ }
+}
+
+// SP is marked as a use to prevent stack-pointer assignments that appear
+// immediately before calls from potentially appearing dead.
+let isCall = 1,
+ // FIXME: Do we really need a non-predicated version? If so, it should
+ // at least be a pseudo instruction expanding to the predicated version
+ // at MC lowering time.
+ Defs = [LR], Uses = [SP] in {
+ def BL : ABXI<0b1011, (outs), (ins bl_target:$func),
+ IIC_Br, "bl\t$func",
+ [(ARMcall tglobaladdr:$func)]>,
+ Requires<[IsARM]>, Sched<[WriteBrL]> {
+ let Inst{31-28} = 0b1110;
+ bits<24> func;
+ let Inst{23-0} = func;
+ let DecoderMethod = "DecodeBranchImmInstruction";
+ }
+
+ def BL_pred : ABI<0b1011, (outs), (ins bl_target:$func),
+ IIC_Br, "bl", "\t$func",
+ [(ARMcall_pred tglobaladdr:$func)]>,
+ Requires<[IsARM]>, Sched<[WriteBrL]> {
+ bits<24> func;
+ let Inst{23-0} = func;
+ let DecoderMethod = "DecodeBranchImmInstruction";
+ }
+
+ // ARMv5T and above
+ def BLX : AXI<(outs), (ins GPR:$func), BrMiscFrm,
+ IIC_Br, "blx\t$func",
+ [(ARMcall GPR:$func)]>,
+ Requires<[IsARM, HasV5T]>, Sched<[WriteBrL]> {
+ bits<4> func;
+ let Inst{31-4} = 0b1110000100101111111111110011;
+ let Inst{3-0} = func;
+ }
+
+ def BLX_pred : AI<(outs), (ins GPR:$func), BrMiscFrm,
+ IIC_Br, "blx", "\t$func",
+ [(ARMcall_pred GPR:$func)]>,
+ Requires<[IsARM, HasV5T]>, Sched<[WriteBrL]> {
+ bits<4> func;
+ let Inst{27-4} = 0b000100101111111111110011;
+ let Inst{3-0} = func;
+ }
+
+ // ARMv4T
+ // Note: Restrict $func to the tGPR regclass to prevent it being in LR.
+ def BX_CALL : ARMPseudoInst<(outs), (ins tGPR:$func),
+ 8, IIC_Br, [(ARMcall_nolink tGPR:$func)]>,
+ Requires<[IsARM, HasV4T]>, Sched<[WriteBr]>;
+
+ // ARMv4
+ def BMOVPCRX_CALL : ARMPseudoInst<(outs), (ins tGPR:$func),
+ 8, IIC_Br, [(ARMcall_nolink tGPR:$func)]>,
+ Requires<[IsARM, NoV4T]>, Sched<[WriteBr]>;
+
+ // mov lr, pc; b if callee is marked noreturn to avoid confusing the
+ // return stack predictor.
+ def BMOVPCB_CALL : ARMPseudoInst<(outs), (ins bl_target:$func),
+ 8, IIC_Br, [(ARMcall_nolink tglobaladdr:$func)]>,
+ Requires<[IsARM]>, Sched<[WriteBr]>;
+}
+
+let isBranch = 1, isTerminator = 1 in {
+ // FIXME: should be able to write a pattern for ARMBrcond, but can't use
+ // a two-value operand where a dag node expects two operands. :(
+ def Bcc : ABI<0b1010, (outs), (ins br_target:$target),
+ IIC_Br, "b", "\t$target",
+ [/*(ARMbrcond bb:$target, imm:$cc, CCR:$ccr)*/]>,
+ Sched<[WriteBr]> {
+ bits<24> target;
+ let Inst{23-0} = target;
+ let DecoderMethod = "DecodeBranchImmInstruction";
+ }
+
+ let isBarrier = 1 in {
+ // B is "predicable" since it's just a Bcc with an 'always' condition.
+ let isPredicable = 1 in
+ // FIXME: We shouldn't need this pseudo at all. Just using Bcc directly
+ // should be sufficient.
+ // FIXME: Is B really a Barrier? That doesn't seem right.
+ def B : ARMPseudoExpand<(outs), (ins br_target:$target), 4, IIC_Br,
+ [(br bb:$target)], (Bcc br_target:$target, (ops 14, zero_reg))>,
+ Sched<[WriteBr]>;
+
+ let Size = 4, isNotDuplicable = 1, isIndirectBranch = 1 in {
+ def BR_JTr : ARMPseudoInst<(outs),
+ (ins GPR:$target, i32imm:$jt),
+ 0, IIC_Br,
+ [(ARMbrjt GPR:$target, tjumptable:$jt)]>,
+ Sched<[WriteBr]>;
+ // FIXME: This shouldn't use the generic "addrmode2," but rather be split
+ // into i12 and rs suffixed versions.
+ def BR_JTm : ARMPseudoInst<(outs),
+ (ins addrmode2:$target, i32imm:$jt),
+ 0, IIC_Br,
+ [(ARMbrjt (i32 (load addrmode2:$target)),
+ tjumptable:$jt)]>, Sched<[WriteBrTbl]>;
+ def BR_JTadd : ARMPseudoInst<(outs),
+ (ins GPR:$target, GPR:$idx, i32imm:$jt),
+ 0, IIC_Br,
+ [(ARMbrjt (add GPR:$target, GPR:$idx), tjumptable:$jt)]>,
+ Sched<[WriteBrTbl]>;
+ } // isNotDuplicable = 1, isIndirectBranch = 1
+ } // isBarrier = 1
+
+}
+
+// BLX (immediate)
+def BLXi : AXI<(outs), (ins blx_target:$target), BrMiscFrm, NoItinerary,
+ "blx\t$target", []>,
+ Requires<[IsARM, HasV5T]>, Sched<[WriteBrL]> {
+ let Inst{31-25} = 0b1111101;
+ bits<25> target;
+ let Inst{23-0} = target{24-1};
+ let Inst{24} = target{0};
+ let isCall = 1;
+}
+
+// Branch and Exchange Jazelle
+def BXJ : ABI<0b0001, (outs), (ins GPR:$func), NoItinerary, "bxj", "\t$func",
+ [/* pattern left blank */]>, Sched<[WriteBr]> {
+ bits<4> func;
+ let Inst{23-20} = 0b0010;
+ let Inst{19-8} = 0xfff;
+ let Inst{7-4} = 0b0010;
+ let Inst{3-0} = func;
+ let isBranch = 1;
+}
+
+// Tail calls.
+
+let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [SP] in {
+ def TCRETURNdi : PseudoInst<(outs), (ins i32imm:$dst), IIC_Br, []>,
+ Sched<[WriteBr]>;
+
+ def TCRETURNri : PseudoInst<(outs), (ins tcGPR:$dst), IIC_Br, []>,
+ Sched<[WriteBr]>;
+
+ def TAILJMPd : ARMPseudoExpand<(outs), (ins br_target:$dst),
+ 4, IIC_Br, [],
+ (Bcc br_target:$dst, (ops 14, zero_reg))>,
+ Requires<[IsARM]>, Sched<[WriteBr]>;
+
+ def TAILJMPr : ARMPseudoExpand<(outs), (ins tcGPR:$dst),
+ 4, IIC_Br, [],
+ (BX GPR:$dst)>, Sched<[WriteBr]>,
+ Requires<[IsARM]>;
+}
+
+// Secure Monitor Call is a system instruction.
+def SMC : ABI<0b0001, (outs), (ins imm0_15:$opt), NoItinerary, "smc", "\t$opt",
+ []>, Requires<[IsARM, HasTrustZone]> {
+ bits<4> opt;
+ let Inst{23-4} = 0b01100000000000000111;
+ let Inst{3-0} = opt;
+}
+def : MnemonicAlias<"smi", "smc">;
+
+// Supervisor Call (Software Interrupt)
+let isCall = 1, Uses = [SP] in {
+def SVC : ABI<0b1111, (outs), (ins imm24b:$svc), IIC_Br, "svc", "\t$svc", []>,
+ Sched<[WriteBr]> {
+ bits<24> svc;
+ let Inst{23-0} = svc;
+}
+}
+
+// Store Return State
+class SRSI<bit wb, string asm>
+ : XI<(outs), (ins imm0_31:$mode), AddrModeNone, 4, IndexModeNone, BrFrm,
+ NoItinerary, asm, "", []> {
+ bits<5> mode;
+ let Inst{31-28} = 0b1111;
+ let Inst{27-25} = 0b100;
+ let Inst{22} = 1;
+ let Inst{21} = wb;
+ let Inst{20} = 0;
+ let Inst{19-16} = 0b1101; // SP
+ let Inst{15-5} = 0b00000101000;
+ let Inst{4-0} = mode;
+}
+
+def SRSDA : SRSI<0, "srsda\tsp, $mode"> {
+ let Inst{24-23} = 0;
+}
+def SRSDA_UPD : SRSI<1, "srsda\tsp!, $mode"> {
+ let Inst{24-23} = 0;
+}
+def SRSDB : SRSI<0, "srsdb\tsp, $mode"> {
+ let Inst{24-23} = 0b10;
+}
+def SRSDB_UPD : SRSI<1, "srsdb\tsp!, $mode"> {
+ let Inst{24-23} = 0b10;
+}
+def SRSIA : SRSI<0, "srsia\tsp, $mode"> {
+ let Inst{24-23} = 0b01;
+}
+def SRSIA_UPD : SRSI<1, "srsia\tsp!, $mode"> {
+ let Inst{24-23} = 0b01;
+}
+def SRSIB : SRSI<0, "srsib\tsp, $mode"> {
+ let Inst{24-23} = 0b11;
+}
+def SRSIB_UPD : SRSI<1, "srsib\tsp!, $mode"> {
+ let Inst{24-23} = 0b11;
+}
+
+def : ARMInstAlias<"srsda $mode", (SRSDA imm0_31:$mode)>;
+def : ARMInstAlias<"srsda $mode!", (SRSDA_UPD imm0_31:$mode)>;
+
+def : ARMInstAlias<"srsdb $mode", (SRSDB imm0_31:$mode)>;
+def : ARMInstAlias<"srsdb $mode!", (SRSDB_UPD imm0_31:$mode)>;
+
+def : ARMInstAlias<"srsia $mode", (SRSIA imm0_31:$mode)>;
+def : ARMInstAlias<"srsia $mode!", (SRSIA_UPD imm0_31:$mode)>;
+
+def : ARMInstAlias<"srsib $mode", (SRSIB imm0_31:$mode)>;
+def : ARMInstAlias<"srsib $mode!", (SRSIB_UPD imm0_31:$mode)>;
+
+// Return From Exception
+class RFEI<bit wb, string asm>
+ : XI<(outs), (ins GPR:$Rn), AddrModeNone, 4, IndexModeNone, BrFrm,
+ NoItinerary, asm, "", []> {
+ bits<4> Rn;
+ let Inst{31-28} = 0b1111;
+ let Inst{27-25} = 0b100;
+ let Inst{22} = 0;
+ let Inst{21} = wb;
+ let Inst{20} = 1;
+ let Inst{19-16} = Rn;
+ let Inst{15-0} = 0xa00;
+}
+
+def RFEDA : RFEI<0, "rfeda\t$Rn"> {
+ let Inst{24-23} = 0;
+}
+def RFEDA_UPD : RFEI<1, "rfeda\t$Rn!"> {
+ let Inst{24-23} = 0;
+}
+def RFEDB : RFEI<0, "rfedb\t$Rn"> {
+ let Inst{24-23} = 0b10;
+}
+def RFEDB_UPD : RFEI<1, "rfedb\t$Rn!"> {
+ let Inst{24-23} = 0b10;
+}
+def RFEIA : RFEI<0, "rfeia\t$Rn"> {
+ let Inst{24-23} = 0b01;
+}
+def RFEIA_UPD : RFEI<1, "rfeia\t$Rn!"> {
+ let Inst{24-23} = 0b01;
+}
+def RFEIB : RFEI<0, "rfeib\t$Rn"> {
+ let Inst{24-23} = 0b11;
+}
+def RFEIB_UPD : RFEI<1, "rfeib\t$Rn!"> {
+ let Inst{24-23} = 0b11;
+}
+
+// Hypervisor Call is a system instruction
+let isCall = 1 in {
+def HVC : AInoP< (outs), (ins imm0_65535:$imm), BrFrm, NoItinerary,
+ "hvc", "\t$imm", []>,
+ Requires<[IsARM, HasVirtualization]> {
+ bits<16> imm;
+
+ // Even though HVC isn't predicable, it's encoding includes a condition field.
+ // The instruction is undefined if the condition field is 0xf otherwise it is
+ // unpredictable if it isn't condition AL (0xe).
+ let Inst{31-28} = 0b1110;
+ let Unpredictable{31-28} = 0b1111;
+ let Inst{27-24} = 0b0001;
+ let Inst{23-20} = 0b0100;
+ let Inst{19-8} = imm{15-4};
+ let Inst{7-4} = 0b0111;
+ let Inst{3-0} = imm{3-0};
+}
+}
+
+// Return from exception in Hypervisor mode.
+let isReturn = 1, isBarrier = 1, isTerminator = 1, Defs = [PC] in
+def ERET : ABI<0b0001, (outs), (ins), NoItinerary, "eret", "", []>,
+ Requires<[IsARM, HasVirtualization]> {
+ let Inst{23-0} = 0b011000000000000001101110;
+}
+
+//===----------------------------------------------------------------------===//
+// Load / Store Instructions.
+//
+
+// Load
+
+
+defm LDR : AI_ldr1<0, "ldr", IIC_iLoad_r, IIC_iLoad_si,
+ UnOpFrag<(load node:$Src)>>;
+defm LDRB : AI_ldr1nopc<1, "ldrb", IIC_iLoad_bh_r, IIC_iLoad_bh_si,
+ UnOpFrag<(zextloadi8 node:$Src)>>;
+defm STR : AI_str1<0, "str", IIC_iStore_r, IIC_iStore_si,
+ BinOpFrag<(store node:$LHS, node:$RHS)>>;
+defm STRB : AI_str1nopc<1, "strb", IIC_iStore_bh_r, IIC_iStore_bh_si,
+ BinOpFrag<(truncstorei8 node:$LHS, node:$RHS)>>;
+
+// Special LDR for loads from non-pc-relative constpools.
+let canFoldAsLoad = 1, mayLoad = 1, hasSideEffects = 0,
+ isReMaterializable = 1, isCodeGenOnly = 1 in
+def LDRcp : AI2ldst<0b010, 1, 0, (outs GPR:$Rt), (ins addrmode_imm12:$addr),
+ AddrMode_i12, LdFrm, IIC_iLoad_r, "ldr", "\t$Rt, $addr",
+ []> {
+ bits<4> Rt;
+ bits<17> addr;
+ let Inst{23} = addr{12}; // U (add = ('U' == 1))
+ let Inst{19-16} = 0b1111;
+ let Inst{15-12} = Rt;
+ let Inst{11-0} = addr{11-0}; // imm12
+}
+
+// Loads with zero extension
+def LDRH : AI3ld<0b1011, 1, (outs GPR:$Rt), (ins addrmode3:$addr), LdMiscFrm,
+ IIC_iLoad_bh_r, "ldrh", "\t$Rt, $addr",
+ [(set GPR:$Rt, (zextloadi16 addrmode3:$addr))]>;
+
+// Loads with sign extension
+def LDRSH : AI3ld<0b1111, 1, (outs GPR:$Rt), (ins addrmode3:$addr), LdMiscFrm,
+ IIC_iLoad_bh_r, "ldrsh", "\t$Rt, $addr",
+ [(set GPR:$Rt, (sextloadi16 addrmode3:$addr))]>;
+
+def LDRSB : AI3ld<0b1101, 1, (outs GPR:$Rt), (ins addrmode3:$addr), LdMiscFrm,
+ IIC_iLoad_bh_r, "ldrsb", "\t$Rt, $addr",
+ [(set GPR:$Rt, (sextloadi8 addrmode3:$addr))]>;
+
+let mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 in {
+ // Load doubleword
+ def LDRD : AI3ld<0b1101, 0, (outs GPR:$Rt, GPR:$Rt2), (ins addrmode3:$addr),
+ LdMiscFrm, IIC_iLoad_d_r, "ldrd", "\t$Rt, $Rt2, $addr", []>,
+ Requires<[IsARM, HasV5TE]>;
+}
+
+def LDA : AIldracq<0b00, (outs GPR:$Rt), (ins addr_offset_none:$addr),
+ NoItinerary, "lda", "\t$Rt, $addr", []>;
+def LDAB : AIldracq<0b10, (outs GPR:$Rt), (ins addr_offset_none:$addr),
+ NoItinerary, "ldab", "\t$Rt, $addr", []>;
+def LDAH : AIldracq<0b11, (outs GPR:$Rt), (ins addr_offset_none:$addr),
+ NoItinerary, "ldah", "\t$Rt, $addr", []>;
+
+// Indexed loads
+multiclass AI2_ldridx<bit isByte, string opc,
+ InstrItinClass iii, InstrItinClass iir> {
+ def _PRE_IMM : AI2ldstidx<1, isByte, 1, (outs GPR:$Rt, GPR:$Rn_wb),
+ (ins addrmode_imm12_pre:$addr), IndexModePre, LdFrm, iii,
+ opc, "\t$Rt, $addr!", "$addr.base = $Rn_wb", []> {
+ bits<17> addr;
+ let Inst{25} = 0;
+ let Inst{23} = addr{12};
+ let Inst{19-16} = addr{16-13};
+ let Inst{11-0} = addr{11-0};
+ let DecoderMethod = "DecodeLDRPreImm";
+ }
+
+ def _PRE_REG : AI2ldstidx<1, isByte, 1, (outs GPR:$Rt, GPR:$Rn_wb),
+ (ins ldst_so_reg:$addr), IndexModePre, LdFrm, iir,
+ opc, "\t$Rt, $addr!", "$addr.base = $Rn_wb", []> {
+ bits<17> addr;
+ let Inst{25} = 1;
+ let Inst{23} = addr{12};
+ let Inst{19-16} = addr{16-13};
+ let Inst{11-0} = addr{11-0};
+ let Inst{4} = 0;
+ let DecoderMethod = "DecodeLDRPreReg";
+ }
+
+ def _POST_REG : AI2ldstidx<1, isByte, 0, (outs GPR:$Rt, GPR:$Rn_wb),
+ (ins addr_offset_none:$addr, am2offset_reg:$offset),
+ IndexModePost, LdFrm, iir,
+ opc, "\t$Rt, $addr, $offset",
+ "$addr.base = $Rn_wb", []> {
+ // {12} isAdd
+ // {11-0} imm12/Rm
+ bits<14> offset;
+ bits<4> addr;
+ let Inst{25} = 1;
+ let Inst{23} = offset{12};
+ let Inst{19-16} = addr;
+ let Inst{11-0} = offset{11-0};
+ let Inst{4} = 0;
+
+ let DecoderMethod = "DecodeAddrMode2IdxInstruction";
+ }
+
+ def _POST_IMM : AI2ldstidx<1, isByte, 0, (outs GPR:$Rt, GPR:$Rn_wb),
+ (ins addr_offset_none:$addr, am2offset_imm:$offset),
+ IndexModePost, LdFrm, iii,
+ opc, "\t$Rt, $addr, $offset",
+ "$addr.base = $Rn_wb", []> {
+ // {12} isAdd
+ // {11-0} imm12/Rm
+ bits<14> offset;
+ bits<4> addr;
+ let Inst{25} = 0;
+ let Inst{23} = offset{12};
+ let Inst{19-16} = addr;
+ let Inst{11-0} = offset{11-0};
+
+ let DecoderMethod = "DecodeAddrMode2IdxInstruction";
+ }
+
+}
+
+let mayLoad = 1, hasSideEffects = 0 in {
+// FIXME: for LDR_PRE_REG etc. the itineray should be either IIC_iLoad_ru or
+// IIC_iLoad_siu depending on whether it the offset register is shifted.
+defm LDR : AI2_ldridx<0, "ldr", IIC_iLoad_iu, IIC_iLoad_ru>;
+defm LDRB : AI2_ldridx<1, "ldrb", IIC_iLoad_bh_iu, IIC_iLoad_bh_ru>;
+}
+
+multiclass AI3_ldridx<bits<4> op, string opc, InstrItinClass itin> {
+ def _PRE : AI3ldstidx<op, 1, 1, (outs GPR:$Rt, GPR:$Rn_wb),
+ (ins addrmode3_pre:$addr), IndexModePre,
+ LdMiscFrm, itin,
+ opc, "\t$Rt, $addr!", "$addr.base = $Rn_wb", []> {
+ bits<14> addr;
+ let Inst{23} = addr{8}; // U bit
+ let Inst{22} = addr{13}; // 1 == imm8, 0 == Rm
+ let Inst{19-16} = addr{12-9}; // Rn
+ let Inst{11-8} = addr{7-4}; // imm7_4/zero
+ let Inst{3-0} = addr{3-0}; // imm3_0/Rm
+ let DecoderMethod = "DecodeAddrMode3Instruction";
+ }
+ def _POST : AI3ldstidx<op, 1, 0, (outs GPR:$Rt, GPR:$Rn_wb),
+ (ins addr_offset_none:$addr, am3offset:$offset),
+ IndexModePost, LdMiscFrm, itin,
+ opc, "\t$Rt, $addr, $offset", "$addr.base = $Rn_wb",
+ []> {
+ bits<10> offset;
+ bits<4> addr;
+ let Inst{23} = offset{8}; // U bit
+ let Inst{22} = offset{9}; // 1 == imm8, 0 == Rm
+ let Inst{19-16} = addr;
+ let Inst{11-8} = offset{7-4}; // imm7_4/zero
+ let Inst{3-0} = offset{3-0}; // imm3_0/Rm
+ let DecoderMethod = "DecodeAddrMode3Instruction";
+ }
+}
+
+let mayLoad = 1, hasSideEffects = 0 in {
+defm LDRH : AI3_ldridx<0b1011, "ldrh", IIC_iLoad_bh_ru>;
+defm LDRSH : AI3_ldridx<0b1111, "ldrsh", IIC_iLoad_bh_ru>;
+defm LDRSB : AI3_ldridx<0b1101, "ldrsb", IIC_iLoad_bh_ru>;
+let hasExtraDefRegAllocReq = 1 in {
+def LDRD_PRE : AI3ldstidx<0b1101, 0, 1, (outs GPR:$Rt, GPR:$Rt2, GPR:$Rn_wb),
+ (ins addrmode3_pre:$addr), IndexModePre,
+ LdMiscFrm, IIC_iLoad_d_ru,
+ "ldrd", "\t$Rt, $Rt2, $addr!",
+ "$addr.base = $Rn_wb", []> {
+ bits<14> addr;
+ let Inst{23} = addr{8}; // U bit
+ let Inst{22} = addr{13}; // 1 == imm8, 0 == Rm
+ let Inst{19-16} = addr{12-9}; // Rn
+ let Inst{11-8} = addr{7-4}; // imm7_4/zero
+ let Inst{3-0} = addr{3-0}; // imm3_0/Rm
+ let DecoderMethod = "DecodeAddrMode3Instruction";
+}
+def LDRD_POST: AI3ldstidx<0b1101, 0, 0, (outs GPR:$Rt, GPR:$Rt2, GPR:$Rn_wb),
+ (ins addr_offset_none:$addr, am3offset:$offset),
+ IndexModePost, LdMiscFrm, IIC_iLoad_d_ru,
+ "ldrd", "\t$Rt, $Rt2, $addr, $offset",
+ "$addr.base = $Rn_wb", []> {
+ bits<10> offset;
+ bits<4> addr;
+ let Inst{23} = offset{8}; // U bit
+ let Inst{22} = offset{9}; // 1 == imm8, 0 == Rm
+ let Inst{19-16} = addr;
+ let Inst{11-8} = offset{7-4}; // imm7_4/zero
+ let Inst{3-0} = offset{3-0}; // imm3_0/Rm
+ let DecoderMethod = "DecodeAddrMode3Instruction";
+}
+} // hasExtraDefRegAllocReq = 1
+} // mayLoad = 1, hasSideEffects = 0
+
+// LDRT, LDRBT, LDRSBT, LDRHT, LDRSHT.
+let mayLoad = 1, hasSideEffects = 0 in {
+def LDRT_POST_REG : AI2ldstidx<1, 0, 0, (outs GPR:$Rt, GPR:$Rn_wb),
+ (ins addr_offset_none:$addr, am2offset_reg:$offset),
+ IndexModePost, LdFrm, IIC_iLoad_ru,
+ "ldrt", "\t$Rt, $addr, $offset",
+ "$addr.base = $Rn_wb", []> {
+ // {12} isAdd
+ // {11-0} imm12/Rm
+ bits<14> offset;
+ bits<4> addr;
+ let Inst{25} = 1;
+ let Inst{23} = offset{12};
+ let Inst{21} = 1; // overwrite
+ let Inst{19-16} = addr;
+ let Inst{11-5} = offset{11-5};
+ let Inst{4} = 0;
+ let Inst{3-0} = offset{3-0};
+ let DecoderMethod = "DecodeAddrMode2IdxInstruction";
+}
+
+def LDRT_POST_IMM
+ : AI2ldstidx<1, 0, 0, (outs GPR:$Rt, GPR:$Rn_wb),
+ (ins addr_offset_none:$addr, am2offset_imm:$offset),
+ IndexModePost, LdFrm, IIC_iLoad_ru,
+ "ldrt", "\t$Rt, $addr, $offset", "$addr.base = $Rn_wb", []> {
+ // {12} isAdd
+ // {11-0} imm12/Rm
+ bits<14> offset;
+ bits<4> addr;
+ let Inst{25} = 0;
+ let Inst{23} = offset{12};
+ let Inst{21} = 1; // overwrite
+ let Inst{19-16} = addr;
+ let Inst{11-0} = offset{11-0};
+ let DecoderMethod = "DecodeAddrMode2IdxInstruction";
+}
+
+def LDRBT_POST_REG : AI2ldstidx<1, 1, 0, (outs GPR:$Rt, GPR:$Rn_wb),
+ (ins addr_offset_none:$addr, am2offset_reg:$offset),
+ IndexModePost, LdFrm, IIC_iLoad_bh_ru,
+ "ldrbt", "\t$Rt, $addr, $offset",
+ "$addr.base = $Rn_wb", []> {
+ // {12} isAdd
+ // {11-0} imm12/Rm
+ bits<14> offset;
+ bits<4> addr;
+ let Inst{25} = 1;
+ let Inst{23} = offset{12};
+ let Inst{21} = 1; // overwrite
+ let Inst{19-16} = addr;
+ let Inst{11-5} = offset{11-5};
+ let Inst{4} = 0;
+ let Inst{3-0} = offset{3-0};
+ let DecoderMethod = "DecodeAddrMode2IdxInstruction";
+}
+
+def LDRBT_POST_IMM
+ : AI2ldstidx<1, 1, 0, (outs GPR:$Rt, GPR:$Rn_wb),
+ (ins addr_offset_none:$addr, am2offset_imm:$offset),
+ IndexModePost, LdFrm, IIC_iLoad_bh_ru,
+ "ldrbt", "\t$Rt, $addr, $offset", "$addr.base = $Rn_wb", []> {
+ // {12} isAdd
+ // {11-0} imm12/Rm
+ bits<14> offset;
+ bits<4> addr;
+ let Inst{25} = 0;
+ let Inst{23} = offset{12};
+ let Inst{21} = 1; // overwrite
+ let Inst{19-16} = addr;
+ let Inst{11-0} = offset{11-0};
+ let DecoderMethod = "DecodeAddrMode2IdxInstruction";
+}
+
+multiclass AI3ldrT<bits<4> op, string opc> {
+ def i : AI3ldstidxT<op, 1, (outs GPR:$Rt, GPR:$base_wb),
+ (ins addr_offset_none:$addr, postidx_imm8:$offset),
+ IndexModePost, LdMiscFrm, IIC_iLoad_bh_ru, opc,
+ "\t$Rt, $addr, $offset", "$addr.base = $base_wb", []> {
+ bits<9> offset;
+ let Inst{23} = offset{8};
+ let Inst{22} = 1;
+ let Inst{11-8} = offset{7-4};
+ let Inst{3-0} = offset{3-0};
+ }
+ def r : AI3ldstidxT<op, 1, (outs GPRnopc:$Rt, GPRnopc:$base_wb),
+ (ins addr_offset_none:$addr, postidx_reg:$Rm),
+ IndexModePost, LdMiscFrm, IIC_iLoad_bh_ru, opc,
+ "\t$Rt, $addr, $Rm", "$addr.base = $base_wb", []> {
+ bits<5> Rm;
+ let Inst{23} = Rm{4};
+ let Inst{22} = 0;
+ let Inst{11-8} = 0;
+ let Unpredictable{11-8} = 0b1111;
+ let Inst{3-0} = Rm{3-0};
+ let DecoderMethod = "DecodeLDR";
+ }
+}
+
+defm LDRSBT : AI3ldrT<0b1101, "ldrsbt">;
+defm LDRHT : AI3ldrT<0b1011, "ldrht">;
+defm LDRSHT : AI3ldrT<0b1111, "ldrsht">;
+}
+
+def LDRT_POST
+ : ARMAsmPseudo<"ldrt${q} $Rt, $addr", (ins addr_offset_none:$addr, pred:$q),
+ (outs GPR:$Rt)>;
+
+def LDRBT_POST
+ : ARMAsmPseudo<"ldrbt${q} $Rt, $addr", (ins addr_offset_none:$addr, pred:$q),
+ (outs GPR:$Rt)>;
+
+// Store
+
+// Stores with truncate
+def STRH : AI3str<0b1011, (outs), (ins GPR:$Rt, addrmode3:$addr), StMiscFrm,
+ IIC_iStore_bh_r, "strh", "\t$Rt, $addr",
+ [(truncstorei16 GPR:$Rt, addrmode3:$addr)]>;
+
+// Store doubleword
+let mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1 in {
+ def STRD : AI3str<0b1111, (outs), (ins GPR:$Rt, GPR:$Rt2, addrmode3:$addr),
+ StMiscFrm, IIC_iStore_d_r, "strd", "\t$Rt, $Rt2, $addr", []>,
+ Requires<[IsARM, HasV5TE]> {
+ let Inst{21} = 0;
+ }
+}
+
+// Indexed stores
+multiclass AI2_stridx<bit isByte, string opc,
+ InstrItinClass iii, InstrItinClass iir> {
+ def _PRE_IMM : AI2ldstidx<0, isByte, 1, (outs GPR:$Rn_wb),
+ (ins GPR:$Rt, addrmode_imm12_pre:$addr), IndexModePre,
+ StFrm, iii,
+ opc, "\t$Rt, $addr!",
+ "$addr.base = $Rn_wb,@earlyclobber $Rn_wb", []> {
+ bits<17> addr;
+ let Inst{25} = 0;
+ let Inst{23} = addr{12}; // U (add = ('U' == 1))
+ let Inst{19-16} = addr{16-13}; // Rn
+ let Inst{11-0} = addr{11-0}; // imm12
+ let DecoderMethod = "DecodeSTRPreImm";
+ }
+
+ def _PRE_REG : AI2ldstidx<0, isByte, 1, (outs GPR:$Rn_wb),
+ (ins GPR:$Rt, ldst_so_reg:$addr),
+ IndexModePre, StFrm, iir,
+ opc, "\t$Rt, $addr!",
+ "$addr.base = $Rn_wb,@earlyclobber $Rn_wb", []> {
+ bits<17> addr;
+ let Inst{25} = 1;
+ let Inst{23} = addr{12}; // U (add = ('U' == 1))
+ let Inst{19-16} = addr{16-13}; // Rn
+ let Inst{11-0} = addr{11-0};
+ let Inst{4} = 0; // Inst{4} = 0
+ let DecoderMethod = "DecodeSTRPreReg";
+ }
+ def _POST_REG : AI2ldstidx<0, isByte, 0, (outs GPR:$Rn_wb),
+ (ins GPR:$Rt, addr_offset_none:$addr, am2offset_reg:$offset),
+ IndexModePost, StFrm, iir,
+ opc, "\t$Rt, $addr, $offset",
+ "$addr.base = $Rn_wb,@earlyclobber $Rn_wb", []> {
+ // {12} isAdd
+ // {11-0} imm12/Rm
+ bits<14> offset;
+ bits<4> addr;
+ let Inst{25} = 1;
+ let Inst{23} = offset{12};
+ let Inst{19-16} = addr;
+ let Inst{11-0} = offset{11-0};
+ let Inst{4} = 0;
+
+ let DecoderMethod = "DecodeAddrMode2IdxInstruction";
+ }
+
+ def _POST_IMM : AI2ldstidx<0, isByte, 0, (outs GPR:$Rn_wb),
+ (ins GPR:$Rt, addr_offset_none:$addr, am2offset_imm:$offset),
+ IndexModePost, StFrm, iii,
+ opc, "\t$Rt, $addr, $offset",
+ "$addr.base = $Rn_wb,@earlyclobber $Rn_wb", []> {
+ // {12} isAdd
+ // {11-0} imm12/Rm
+ bits<14> offset;
+ bits<4> addr;
+ let Inst{25} = 0;
+ let Inst{23} = offset{12};
+ let Inst{19-16} = addr;
+ let Inst{11-0} = offset{11-0};
+
+ let DecoderMethod = "DecodeAddrMode2IdxInstruction";
+ }
+}
+
+let mayStore = 1, hasSideEffects = 0 in {
+// FIXME: for STR_PRE_REG etc. the itineray should be either IIC_iStore_ru or
+// IIC_iStore_siu depending on whether it the offset register is shifted.
+defm STR : AI2_stridx<0, "str", IIC_iStore_iu, IIC_iStore_ru>;
+defm STRB : AI2_stridx<1, "strb", IIC_iStore_bh_iu, IIC_iStore_bh_ru>;
+}
+
+def : ARMPat<(post_store GPR:$Rt, addr_offset_none:$addr,
+ am2offset_reg:$offset),
+ (STR_POST_REG GPR:$Rt, addr_offset_none:$addr,
+ am2offset_reg:$offset)>;
+def : ARMPat<(post_store GPR:$Rt, addr_offset_none:$addr,
+ am2offset_imm:$offset),
+ (STR_POST_IMM GPR:$Rt, addr_offset_none:$addr,
+ am2offset_imm:$offset)>;
+def : ARMPat<(post_truncsti8 GPR:$Rt, addr_offset_none:$addr,
+ am2offset_reg:$offset),
+ (STRB_POST_REG GPR:$Rt, addr_offset_none:$addr,
+ am2offset_reg:$offset)>;
+def : ARMPat<(post_truncsti8 GPR:$Rt, addr_offset_none:$addr,
+ am2offset_imm:$offset),
+ (STRB_POST_IMM GPR:$Rt, addr_offset_none:$addr,
+ am2offset_imm:$offset)>;
+
+// Pseudo-instructions for pattern matching the pre-indexed stores. We can't
+// put the patterns on the instruction definitions directly as ISel wants
+// the address base and offset to be separate operands, not a single
+// complex operand like we represent the instructions themselves. The
+// pseudos map between the two.
+let usesCustomInserter = 1,
+ Constraints = "$Rn = $Rn_wb,@earlyclobber $Rn_wb" in {
+def STRi_preidx: ARMPseudoInst<(outs GPR:$Rn_wb),
+ (ins GPR:$Rt, GPR:$Rn, am2offset_imm:$offset, pred:$p),
+ 4, IIC_iStore_ru,
+ [(set GPR:$Rn_wb,
+ (pre_store GPR:$Rt, GPR:$Rn, am2offset_imm:$offset))]>;
+def STRr_preidx: ARMPseudoInst<(outs GPR:$Rn_wb),
+ (ins GPR:$Rt, GPR:$Rn, am2offset_reg:$offset, pred:$p),
+ 4, IIC_iStore_ru,
+ [(set GPR:$Rn_wb,
+ (pre_store GPR:$Rt, GPR:$Rn, am2offset_reg:$offset))]>;
+def STRBi_preidx: ARMPseudoInst<(outs GPR:$Rn_wb),
+ (ins GPR:$Rt, GPR:$Rn, am2offset_imm:$offset, pred:$p),
+ 4, IIC_iStore_ru,
+ [(set GPR:$Rn_wb,
+ (pre_truncsti8 GPR:$Rt, GPR:$Rn, am2offset_imm:$offset))]>;
+def STRBr_preidx: ARMPseudoInst<(outs GPR:$Rn_wb),
+ (ins GPR:$Rt, GPR:$Rn, am2offset_reg:$offset, pred:$p),
+ 4, IIC_iStore_ru,
+ [(set GPR:$Rn_wb,
+ (pre_truncsti8 GPR:$Rt, GPR:$Rn, am2offset_reg:$offset))]>;
+def STRH_preidx: ARMPseudoInst<(outs GPR:$Rn_wb),
+ (ins GPR:$Rt, GPR:$Rn, am3offset:$offset, pred:$p),
+ 4, IIC_iStore_ru,
+ [(set GPR:$Rn_wb,
+ (pre_truncsti16 GPR:$Rt, GPR:$Rn, am3offset:$offset))]>;
+}
+
+
+
+def STRH_PRE : AI3ldstidx<0b1011, 0, 1, (outs GPR:$Rn_wb),
+ (ins GPR:$Rt, addrmode3_pre:$addr), IndexModePre,
+ StMiscFrm, IIC_iStore_bh_ru,
+ "strh", "\t$Rt, $addr!",
+ "$addr.base = $Rn_wb,@earlyclobber $Rn_wb", []> {
+ bits<14> addr;
+ let Inst{23} = addr{8}; // U bit
+ let Inst{22} = addr{13}; // 1 == imm8, 0 == Rm
+ let Inst{19-16} = addr{12-9}; // Rn
+ let Inst{11-8} = addr{7-4}; // imm7_4/zero
+ let Inst{3-0} = addr{3-0}; // imm3_0/Rm
+ let DecoderMethod = "DecodeAddrMode3Instruction";
+}
+
+def STRH_POST : AI3ldstidx<0b1011, 0, 0, (outs GPR:$Rn_wb),
+ (ins GPR:$Rt, addr_offset_none:$addr, am3offset:$offset),
+ IndexModePost, StMiscFrm, IIC_iStore_bh_ru,
+ "strh", "\t$Rt, $addr, $offset",
+ "$addr.base = $Rn_wb,@earlyclobber $Rn_wb",
+ [(set GPR:$Rn_wb, (post_truncsti16 GPR:$Rt,
+ addr_offset_none:$addr,
+ am3offset:$offset))]> {
+ bits<10> offset;
+ bits<4> addr;
+ let Inst{23} = offset{8}; // U bit
+ let Inst{22} = offset{9}; // 1 == imm8, 0 == Rm
+ let Inst{19-16} = addr;
+ let Inst{11-8} = offset{7-4}; // imm7_4/zero
+ let Inst{3-0} = offset{3-0}; // imm3_0/Rm
+ let DecoderMethod = "DecodeAddrMode3Instruction";
+}
+
+let mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1 in {
+def STRD_PRE : AI3ldstidx<0b1111, 0, 1, (outs GPR:$Rn_wb),
+ (ins GPR:$Rt, GPR:$Rt2, addrmode3_pre:$addr),
+ IndexModePre, StMiscFrm, IIC_iStore_d_ru,
+ "strd", "\t$Rt, $Rt2, $addr!",
+ "$addr.base = $Rn_wb", []> {
+ bits<14> addr;
+ let Inst{23} = addr{8}; // U bit
+ let Inst{22} = addr{13}; // 1 == imm8, 0 == Rm
+ let Inst{19-16} = addr{12-9}; // Rn
+ let Inst{11-8} = addr{7-4}; // imm7_4/zero
+ let Inst{3-0} = addr{3-0}; // imm3_0/Rm
+ let DecoderMethod = "DecodeAddrMode3Instruction";
+}
+
+def STRD_POST: AI3ldstidx<0b1111, 0, 0, (outs GPR:$Rn_wb),
+ (ins GPR:$Rt, GPR:$Rt2, addr_offset_none:$addr,
+ am3offset:$offset),
+ IndexModePost, StMiscFrm, IIC_iStore_d_ru,
+ "strd", "\t$Rt, $Rt2, $addr, $offset",
+ "$addr.base = $Rn_wb", []> {
+ bits<10> offset;
+ bits<4> addr;
+ let Inst{23} = offset{8}; // U bit
+ let Inst{22} = offset{9}; // 1 == imm8, 0 == Rm
+ let Inst{19-16} = addr;
+ let Inst{11-8} = offset{7-4}; // imm7_4/zero
+ let Inst{3-0} = offset{3-0}; // imm3_0/Rm
+ let DecoderMethod = "DecodeAddrMode3Instruction";
+}
+} // mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1
+
+// STRT, STRBT, and STRHT
+
+def STRBT_POST_REG : AI2ldstidx<0, 1, 0, (outs GPR:$Rn_wb),
+ (ins GPR:$Rt, addr_offset_none:$addr, am2offset_reg:$offset),
+ IndexModePost, StFrm, IIC_iStore_bh_ru,
+ "strbt", "\t$Rt, $addr, $offset",
+ "$addr.base = $Rn_wb", []> {
+ // {12} isAdd
+ // {11-0} imm12/Rm
+ bits<14> offset;
+ bits<4> addr;
+ let Inst{25} = 1;
+ let Inst{23} = offset{12};
+ let Inst{21} = 1; // overwrite
+ let Inst{19-16} = addr;
+ let Inst{11-5} = offset{11-5};
+ let Inst{4} = 0;
+ let Inst{3-0} = offset{3-0};
+ let DecoderMethod = "DecodeAddrMode2IdxInstruction";
+}
+
+def STRBT_POST_IMM
+ : AI2ldstidx<0, 1, 0, (outs GPR:$Rn_wb),
+ (ins GPR:$Rt, addr_offset_none:$addr, am2offset_imm:$offset),
+ IndexModePost, StFrm, IIC_iStore_bh_ru,
+ "strbt", "\t$Rt, $addr, $offset", "$addr.base = $Rn_wb", []> {
+ // {12} isAdd
+ // {11-0} imm12/Rm
+ bits<14> offset;
+ bits<4> addr;
+ let Inst{25} = 0;
+ let Inst{23} = offset{12};
+ let Inst{21} = 1; // overwrite
+ let Inst{19-16} = addr;
+ let Inst{11-0} = offset{11-0};
+ let DecoderMethod = "DecodeAddrMode2IdxInstruction";
+}
+
+def STRBT_POST
+ : ARMAsmPseudo<"strbt${q} $Rt, $addr",
+ (ins GPR:$Rt, addr_offset_none:$addr, pred:$q)>;
+
+let mayStore = 1, hasSideEffects = 0 in {
+def STRT_POST_REG : AI2ldstidx<0, 0, 0, (outs GPR:$Rn_wb),
+ (ins GPR:$Rt, addr_offset_none:$addr, am2offset_reg:$offset),
+ IndexModePost, StFrm, IIC_iStore_ru,
+ "strt", "\t$Rt, $addr, $offset",
+ "$addr.base = $Rn_wb", []> {
+ // {12} isAdd
+ // {11-0} imm12/Rm
+ bits<14> offset;
+ bits<4> addr;
+ let Inst{25} = 1;
+ let Inst{23} = offset{12};
+ let Inst{21} = 1; // overwrite
+ let Inst{19-16} = addr;
+ let Inst{11-5} = offset{11-5};
+ let Inst{4} = 0;
+ let Inst{3-0} = offset{3-0};
+ let DecoderMethod = "DecodeAddrMode2IdxInstruction";
+}
+
+def STRT_POST_IMM
+ : AI2ldstidx<0, 0, 0, (outs GPR:$Rn_wb),
+ (ins GPR:$Rt, addr_offset_none:$addr, am2offset_imm:$offset),
+ IndexModePost, StFrm, IIC_iStore_ru,
+ "strt", "\t$Rt, $addr, $offset", "$addr.base = $Rn_wb", []> {
+ // {12} isAdd
+ // {11-0} imm12/Rm
+ bits<14> offset;
+ bits<4> addr;
+ let Inst{25} = 0;
+ let Inst{23} = offset{12};
+ let Inst{21} = 1; // overwrite
+ let Inst{19-16} = addr;
+ let Inst{11-0} = offset{11-0};
+ let DecoderMethod = "DecodeAddrMode2IdxInstruction";
+}
+}
+
+def STRT_POST
+ : ARMAsmPseudo<"strt${q} $Rt, $addr",
+ (ins GPR:$Rt, addr_offset_none:$addr, pred:$q)>;
+
+multiclass AI3strT<bits<4> op, string opc> {
+ def i : AI3ldstidxT<op, 0, (outs GPR:$base_wb),
+ (ins GPR:$Rt, addr_offset_none:$addr, postidx_imm8:$offset),
+ IndexModePost, StMiscFrm, IIC_iStore_bh_ru, opc,
+ "\t$Rt, $addr, $offset", "$addr.base = $base_wb", []> {
+ bits<9> offset;
+ let Inst{23} = offset{8};
+ let Inst{22} = 1;
+ let Inst{11-8} = offset{7-4};
+ let Inst{3-0} = offset{3-0};
+ }
+ def r : AI3ldstidxT<op, 0, (outs GPR:$base_wb),
+ (ins GPR:$Rt, addr_offset_none:$addr, postidx_reg:$Rm),
+ IndexModePost, StMiscFrm, IIC_iStore_bh_ru, opc,
+ "\t$Rt, $addr, $Rm", "$addr.base = $base_wb", []> {
+ bits<5> Rm;
+ let Inst{23} = Rm{4};
+ let Inst{22} = 0;
+ let Inst{11-8} = 0;
+ let Inst{3-0} = Rm{3-0};
+ }
+}
+
+
+defm STRHT : AI3strT<0b1011, "strht">;
+
+def STL : AIstrrel<0b00, (outs), (ins GPR:$Rt, addr_offset_none:$addr),
+ NoItinerary, "stl", "\t$Rt, $addr", []>;
+def STLB : AIstrrel<0b10, (outs), (ins GPR:$Rt, addr_offset_none:$addr),
+ NoItinerary, "stlb", "\t$Rt, $addr", []>;
+def STLH : AIstrrel<0b11, (outs), (ins GPR:$Rt, addr_offset_none:$addr),
+ NoItinerary, "stlh", "\t$Rt, $addr", []>;
+
+//===----------------------------------------------------------------------===//
+// Load / store multiple Instructions.
+//
+
+multiclass arm_ldst_mult<string asm, string sfx, bit L_bit, bit P_bit, Format f,
+ InstrItinClass itin, InstrItinClass itin_upd> {
+ // IA is the default, so no need for an explicit suffix on the
+ // mnemonic here. Without it is the canonical spelling.
+ def IA :
+ AXI4<(outs), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
+ IndexModeNone, f, itin,
+ !strconcat(asm, "${p}\t$Rn, $regs", sfx), "", []> {
+ let Inst{24-23} = 0b01; // Increment After
+ let Inst{22} = P_bit;
+ let Inst{21} = 0; // No writeback
+ let Inst{20} = L_bit;
+ }
+ def IA_UPD :
+ AXI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
+ IndexModeUpd, f, itin_upd,
+ !strconcat(asm, "${p}\t$Rn!, $regs", sfx), "$Rn = $wb", []> {
+ let Inst{24-23} = 0b01; // Increment After
+ let Inst{22} = P_bit;
+ let Inst{21} = 1; // Writeback
+ let Inst{20} = L_bit;
+
+ let DecoderMethod = "DecodeMemMultipleWritebackInstruction";
+ }
+ def DA :
+ AXI4<(outs), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
+ IndexModeNone, f, itin,
+ !strconcat(asm, "da${p}\t$Rn, $regs", sfx), "", []> {
+ let Inst{24-23} = 0b00; // Decrement After
+ let Inst{22} = P_bit;
+ let Inst{21} = 0; // No writeback
+ let Inst{20} = L_bit;
+ }
+ def DA_UPD :
+ AXI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
+ IndexModeUpd, f, itin_upd,
+ !strconcat(asm, "da${p}\t$Rn!, $regs", sfx), "$Rn = $wb", []> {
+ let Inst{24-23} = 0b00; // Decrement After
+ let Inst{22} = P_bit;
+ let Inst{21} = 1; // Writeback
+ let Inst{20} = L_bit;
+
+ let DecoderMethod = "DecodeMemMultipleWritebackInstruction";
+ }
+ def DB :
+ AXI4<(outs), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
+ IndexModeNone, f, itin,
+ !strconcat(asm, "db${p}\t$Rn, $regs", sfx), "", []> {
+ let Inst{24-23} = 0b10; // Decrement Before
+ let Inst{22} = P_bit;
+ let Inst{21} = 0; // No writeback
+ let Inst{20} = L_bit;
+ }
+ def DB_UPD :
+ AXI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
+ IndexModeUpd, f, itin_upd,
+ !strconcat(asm, "db${p}\t$Rn!, $regs", sfx), "$Rn = $wb", []> {
+ let Inst{24-23} = 0b10; // Decrement Before
+ let Inst{22} = P_bit;
+ let Inst{21} = 1; // Writeback
+ let Inst{20} = L_bit;
+
+ let DecoderMethod = "DecodeMemMultipleWritebackInstruction";
+ }
+ def IB :
+ AXI4<(outs), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
+ IndexModeNone, f, itin,
+ !strconcat(asm, "ib${p}\t$Rn, $regs", sfx), "", []> {
+ let Inst{24-23} = 0b11; // Increment Before
+ let Inst{22} = P_bit;
+ let Inst{21} = 0; // No writeback
+ let Inst{20} = L_bit;
+ }
+ def IB_UPD :
+ AXI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
+ IndexModeUpd, f, itin_upd,
+ !strconcat(asm, "ib${p}\t$Rn!, $regs", sfx), "$Rn = $wb", []> {
+ let Inst{24-23} = 0b11; // Increment Before
+ let Inst{22} = P_bit;
+ let Inst{21} = 1; // Writeback
+ let Inst{20} = L_bit;
+
+ let DecoderMethod = "DecodeMemMultipleWritebackInstruction";
+ }
+}
+
+let hasSideEffects = 0 in {
+
+let mayLoad = 1, hasExtraDefRegAllocReq = 1 in
+defm LDM : arm_ldst_mult<"ldm", "", 1, 0, LdStMulFrm, IIC_iLoad_m,
+ IIC_iLoad_mu>, ComplexDeprecationPredicate<"ARMLoad">;
+
+let mayStore = 1, hasExtraSrcRegAllocReq = 1 in
+defm STM : arm_ldst_mult<"stm", "", 0, 0, LdStMulFrm, IIC_iStore_m,
+ IIC_iStore_mu>,
+ ComplexDeprecationPredicate<"ARMStore">;
+
+} // hasSideEffects
+
+// FIXME: remove when we have a way to marking a MI with these properties.
+// FIXME: Should pc be an implicit operand like PICADD, etc?
+let isReturn = 1, isTerminator = 1, isBarrier = 1, mayLoad = 1,
+ hasExtraDefRegAllocReq = 1, isCodeGenOnly = 1 in
+def LDMIA_RET : ARMPseudoExpand<(outs GPR:$wb), (ins GPR:$Rn, pred:$p,
+ reglist:$regs, variable_ops),
+ 4, IIC_iLoad_mBr, [],
+ (LDMIA_UPD GPR:$wb, GPR:$Rn, pred:$p, reglist:$regs)>,
+ RegConstraint<"$Rn = $wb">;
+
+let mayLoad = 1, hasExtraDefRegAllocReq = 1 in
+defm sysLDM : arm_ldst_mult<"ldm", " ^", 1, 1, LdStMulFrm, IIC_iLoad_m,
+ IIC_iLoad_mu>;
+
+let mayStore = 1, hasExtraSrcRegAllocReq = 1 in
+defm sysSTM : arm_ldst_mult<"stm", " ^", 0, 1, LdStMulFrm, IIC_iStore_m,
+ IIC_iStore_mu>;
+
+
+
+//===----------------------------------------------------------------------===//
+// Move Instructions.
+//
+
+let hasSideEffects = 0 in
+def MOVr : AsI1<0b1101, (outs GPR:$Rd), (ins GPR:$Rm), DPFrm, IIC_iMOVr,
+ "mov", "\t$Rd, $Rm", []>, UnaryDP, Sched<[WriteALU]> {
+ bits<4> Rd;
+ bits<4> Rm;
+
+ let Inst{19-16} = 0b0000;
+ let Inst{11-4} = 0b00000000;
+ let Inst{25} = 0;
+ let Inst{3-0} = Rm;
+ let Inst{15-12} = Rd;
+}
+
+// A version for the smaller set of tail call registers.
+let hasSideEffects = 0 in
+def MOVr_TC : AsI1<0b1101, (outs tcGPR:$Rd), (ins tcGPR:$Rm), DPFrm,
+ IIC_iMOVr, "mov", "\t$Rd, $Rm", []>, UnaryDP, Sched<[WriteALU]> {
+ bits<4> Rd;
+ bits<4> Rm;
+
+ let Inst{11-4} = 0b00000000;
+ let Inst{25} = 0;
+ let Inst{3-0} = Rm;
+ let Inst{15-12} = Rd;
+}
+
+def MOVsr : AsI1<0b1101, (outs GPRnopc:$Rd), (ins shift_so_reg_reg:$src),
+ DPSoRegRegFrm, IIC_iMOVsr,
+ "mov", "\t$Rd, $src",
+ [(set GPRnopc:$Rd, shift_so_reg_reg:$src)]>, UnaryDP,
+ Sched<[WriteALU]> {
+ bits<4> Rd;
+ bits<12> src;
+ let Inst{15-12} = Rd;
+ let Inst{19-16} = 0b0000;
+ let Inst{11-8} = src{11-8};
+ let Inst{7} = 0;
+ let Inst{6-5} = src{6-5};
+ let Inst{4} = 1;
+ let Inst{3-0} = src{3-0};
+ let Inst{25} = 0;
+}
+
+def MOVsi : AsI1<0b1101, (outs GPR:$Rd), (ins shift_so_reg_imm:$src),
+ DPSoRegImmFrm, IIC_iMOVsr,
+ "mov", "\t$Rd, $src", [(set GPR:$Rd, shift_so_reg_imm:$src)]>,
+ UnaryDP, Sched<[WriteALU]> {
+ bits<4> Rd;
+ bits<12> src;
+ let Inst{15-12} = Rd;
+ let Inst{19-16} = 0b0000;
+ let Inst{11-5} = src{11-5};
+ let Inst{4} = 0;
+ let Inst{3-0} = src{3-0};
+ let Inst{25} = 0;
+}
+
+let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in
+def MOVi : AsI1<0b1101, (outs GPR:$Rd), (ins mod_imm:$imm), DPFrm, IIC_iMOVi,
+ "mov", "\t$Rd, $imm", [(set GPR:$Rd, mod_imm:$imm)]>, UnaryDP,
+ Sched<[WriteALU]> {
+ bits<4> Rd;
+ bits<12> imm;
+ let Inst{25} = 1;
+ let Inst{15-12} = Rd;
+ let Inst{19-16} = 0b0000;
+ let Inst{11-0} = imm;
+}
+
+let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in
+def MOVi16 : AI1<0b1000, (outs GPR:$Rd), (ins imm0_65535_expr:$imm),
+ DPFrm, IIC_iMOVi,
+ "movw", "\t$Rd, $imm",
+ [(set GPR:$Rd, imm0_65535:$imm)]>,
+ Requires<[IsARM, HasV6T2]>, UnaryDP, Sched<[WriteALU]> {
+ bits<4> Rd;
+ bits<16> imm;
+ let Inst{15-12} = Rd;
+ let Inst{11-0} = imm{11-0};
+ let Inst{19-16} = imm{15-12};
+ let Inst{20} = 0;
+ let Inst{25} = 1;
+ let DecoderMethod = "DecodeArmMOVTWInstruction";
+}
+
+def : InstAlias<"mov${p} $Rd, $imm",
+ (MOVi16 GPR:$Rd, imm0_65535_expr:$imm, pred:$p)>,
+ Requires<[IsARM]>;
+
+def MOVi16_ga_pcrel : PseudoInst<(outs GPR:$Rd),
+ (ins i32imm:$addr, pclabel:$id), IIC_iMOVi, []>,
+ Sched<[WriteALU]>;
+
+let Constraints = "$src = $Rd" in {
+def MOVTi16 : AI1<0b1010, (outs GPRnopc:$Rd),
+ (ins GPR:$src, imm0_65535_expr:$imm),
+ DPFrm, IIC_iMOVi,
+ "movt", "\t$Rd, $imm",
+ [(set GPRnopc:$Rd,
+ (or (and GPR:$src, 0xffff),
+ lo16AllZero:$imm))]>, UnaryDP,
+ Requires<[IsARM, HasV6T2]>, Sched<[WriteALU]> {
+ bits<4> Rd;
+ bits<16> imm;
+ let Inst{15-12} = Rd;
+ let Inst{11-0} = imm{11-0};
+ let Inst{19-16} = imm{15-12};
+ let Inst{20} = 0;
+ let Inst{25} = 1;
+ let DecoderMethod = "DecodeArmMOVTWInstruction";
+}
+
+def MOVTi16_ga_pcrel : PseudoInst<(outs GPR:$Rd),
+ (ins GPR:$src, i32imm:$addr, pclabel:$id), IIC_iMOVi, []>,
+ Sched<[WriteALU]>;
+
+} // Constraints
+
+def : ARMPat<(or GPR:$src, 0xffff0000), (MOVTi16 GPR:$src, 0xffff)>,
+ Requires<[IsARM, HasV6T2]>;
+
+let Uses = [CPSR] in
+def RRX: PseudoInst<(outs GPR:$Rd), (ins GPR:$Rm), IIC_iMOVsi,
+ [(set GPR:$Rd, (ARMrrx GPR:$Rm))]>, UnaryDP,
+ Requires<[IsARM]>, Sched<[WriteALU]>;
+
+// These aren't really mov instructions, but we have to define them this way
+// due to flag operands.
+
+let Defs = [CPSR] in {
+def MOVsrl_flag : PseudoInst<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVsi,
+ [(set GPR:$dst, (ARMsrl_flag GPR:$src))]>, UnaryDP,
+ Sched<[WriteALU]>, Requires<[IsARM]>;
+def MOVsra_flag : PseudoInst<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVsi,
+ [(set GPR:$dst, (ARMsra_flag GPR:$src))]>, UnaryDP,
+ Sched<[WriteALU]>, Requires<[IsARM]>;
+}
+
+//===----------------------------------------------------------------------===//
+// Extend Instructions.
+//
+
+// Sign extenders
+
+def SXTB : AI_ext_rrot<0b01101010,
+ "sxtb", UnOpFrag<(sext_inreg node:$Src, i8)>>;
+def SXTH : AI_ext_rrot<0b01101011,
+ "sxth", UnOpFrag<(sext_inreg node:$Src, i16)>>;
+
+def SXTAB : AI_exta_rrot<0b01101010,
+ "sxtab", BinOpFrag<(add node:$LHS, (sext_inreg node:$RHS, i8))>>;
+def SXTAH : AI_exta_rrot<0b01101011,
+ "sxtah", BinOpFrag<(add node:$LHS, (sext_inreg node:$RHS,i16))>>;
+
+def SXTB16 : AI_ext_rrot_np<0b01101000, "sxtb16">;
+
+def SXTAB16 : AI_exta_rrot_np<0b01101000, "sxtab16">;
+
+// Zero extenders
+
+let AddedComplexity = 16 in {
+def UXTB : AI_ext_rrot<0b01101110,
+ "uxtb" , UnOpFrag<(and node:$Src, 0x000000FF)>>;
+def UXTH : AI_ext_rrot<0b01101111,
+ "uxth" , UnOpFrag<(and node:$Src, 0x0000FFFF)>>;
+def UXTB16 : AI_ext_rrot<0b01101100,
+ "uxtb16", UnOpFrag<(and node:$Src, 0x00FF00FF)>>;
+
+// FIXME: This pattern incorrectly assumes the shl operator is a rotate.
+// The transformation should probably be done as a combiner action
+// instead so we can include a check for masking back in the upper
+// eight bits of the source into the lower eight bits of the result.
+//def : ARMV6Pat<(and (shl GPR:$Src, (i32 8)), 0xFF00FF),
+// (UXTB16r_rot GPR:$Src, 3)>;
+def : ARMV6Pat<(and (srl GPR:$Src, (i32 8)), 0xFF00FF),
+ (UXTB16 GPR:$Src, 1)>;
+
+def UXTAB : AI_exta_rrot<0b01101110, "uxtab",
+ BinOpFrag<(add node:$LHS, (and node:$RHS, 0x00FF))>>;
+def UXTAH : AI_exta_rrot<0b01101111, "uxtah",
+ BinOpFrag<(add node:$LHS, (and node:$RHS, 0xFFFF))>>;
+}
+
+// This isn't safe in general, the add is two 16-bit units, not a 32-bit add.
+def UXTAB16 : AI_exta_rrot_np<0b01101100, "uxtab16">;
+
+
+def SBFX : I<(outs GPRnopc:$Rd),
+ (ins GPRnopc:$Rn, imm0_31:$lsb, imm1_32:$width),
+ AddrMode1, 4, IndexModeNone, DPFrm, IIC_iUNAsi,
+ "sbfx", "\t$Rd, $Rn, $lsb, $width", "", []>,
+ Requires<[IsARM, HasV6T2]> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<5> lsb;
+ bits<5> width;
+ let Inst{27-21} = 0b0111101;
+ let Inst{6-4} = 0b101;
+ let Inst{20-16} = width;
+ let Inst{15-12} = Rd;
+ let Inst{11-7} = lsb;
+ let Inst{3-0} = Rn;
+}
+
+def UBFX : I<(outs GPRnopc:$Rd),
+ (ins GPRnopc:$Rn, imm0_31:$lsb, imm1_32:$width),
+ AddrMode1, 4, IndexModeNone, DPFrm, IIC_iUNAsi,
+ "ubfx", "\t$Rd, $Rn, $lsb, $width", "", []>,
+ Requires<[IsARM, HasV6T2]> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<5> lsb;
+ bits<5> width;
+ let Inst{27-21} = 0b0111111;
+ let Inst{6-4} = 0b101;
+ let Inst{20-16} = width;
+ let Inst{15-12} = Rd;
+ let Inst{11-7} = lsb;
+ let Inst{3-0} = Rn;
+}
+
+//===----------------------------------------------------------------------===//
+// Arithmetic Instructions.
+//
+
+defm ADD : AsI1_bin_irs<0b0100, "add",
+ IIC_iALUi, IIC_iALUr, IIC_iALUsr,
+ BinOpFrag<(add node:$LHS, node:$RHS)>, 1>;
+defm SUB : AsI1_bin_irs<0b0010, "sub",
+ IIC_iALUi, IIC_iALUr, IIC_iALUsr,
+ BinOpFrag<(sub node:$LHS, node:$RHS)>>;
+
+// ADD and SUB with 's' bit set.
+//
+// Currently, ADDS/SUBS are pseudo opcodes that exist only in the
+// selection DAG. They are "lowered" to real ADD/SUB opcodes by
+// AdjustInstrPostInstrSelection where we determine whether or not to
+// set the "s" bit based on CPSR liveness.
+//
+// FIXME: Eliminate ADDS/SUBS pseudo opcodes after adding tablegen
+// support for an optional CPSR definition that corresponds to the DAG
+// node's second value. We can then eliminate the implicit def of CPSR.
+defm ADDS : AsI1_bin_s_irs<IIC_iALUi, IIC_iALUr, IIC_iALUsr,
+ BinOpFrag<(ARMaddc node:$LHS, node:$RHS)>, 1>;
+defm SUBS : AsI1_bin_s_irs<IIC_iALUi, IIC_iALUr, IIC_iALUsr,
+ BinOpFrag<(ARMsubc node:$LHS, node:$RHS)>>;
+
+defm ADC : AI1_adde_sube_irs<0b0101, "adc",
+ BinOpWithFlagFrag<(ARMadde node:$LHS, node:$RHS, node:$FLAG)>, 1>;
+defm SBC : AI1_adde_sube_irs<0b0110, "sbc",
+ BinOpWithFlagFrag<(ARMsube node:$LHS, node:$RHS, node:$FLAG)>>;
+
+defm RSB : AsI1_rbin_irs<0b0011, "rsb",
+ IIC_iALUi, IIC_iALUr, IIC_iALUsr,
+ BinOpFrag<(sub node:$LHS, node:$RHS)>>;
+
+// FIXME: Eliminate them if we can write def : Pat patterns which defines
+// CPSR and the implicit def of CPSR is not needed.
+defm RSBS : AsI1_rbin_s_is<IIC_iALUi, IIC_iALUr, IIC_iALUsr,
+ BinOpFrag<(ARMsubc node:$LHS, node:$RHS)>>;
+
+defm RSC : AI1_rsc_irs<0b0111, "rsc",
+ BinOpWithFlagFrag<(ARMsube node:$LHS, node:$RHS, node:$FLAG)>>;
+
+// (sub X, imm) gets canonicalized to (add X, -imm). Match this form.
+// The assume-no-carry-in form uses the negation of the input since add/sub
+// assume opposite meanings of the carry flag (i.e., carry == !borrow).
+// See the definition of AddWithCarry() in the ARM ARM A2.2.1 for the gory
+// details.
+def : ARMPat<(add GPR:$src, mod_imm_neg:$imm),
+ (SUBri GPR:$src, mod_imm_neg:$imm)>;
+def : ARMPat<(ARMaddc GPR:$src, mod_imm_neg:$imm),
+ (SUBSri GPR:$src, mod_imm_neg:$imm)>;
+
+def : ARMPat<(add GPR:$src, imm0_65535_neg:$imm),
+ (SUBrr GPR:$src, (MOVi16 (imm_neg_XFORM imm:$imm)))>,
+ Requires<[IsARM, HasV6T2]>;
+def : ARMPat<(ARMaddc GPR:$src, imm0_65535_neg:$imm),
+ (SUBSrr GPR:$src, (MOVi16 (imm_neg_XFORM imm:$imm)))>,
+ Requires<[IsARM, HasV6T2]>;
+
+// The with-carry-in form matches bitwise not instead of the negation.
+// Effectively, the inverse interpretation of the carry flag already accounts
+// for part of the negation.
+def : ARMPat<(ARMadde GPR:$src, mod_imm_not:$imm, CPSR),
+ (SBCri GPR:$src, mod_imm_not:$imm)>;
+def : ARMPat<(ARMadde GPR:$src, imm0_65535_neg:$imm, CPSR),
+ (SBCrr GPR:$src, (MOVi16 (imm_not_XFORM imm:$imm)))>,
+ Requires<[IsARM, HasV6T2]>;
+
+// Note: These are implemented in C++ code, because they have to generate
+// ADD/SUBrs instructions, which use a complex pattern that a xform function
+// cannot produce.
+// (mul X, 2^n+1) -> (add (X << n), X)
+// (mul X, 2^n-1) -> (rsb X, (X << n))
+
+// ARM Arithmetic Instruction
+// GPR:$dst = GPR:$a op GPR:$b
+class AAI<bits<8> op27_20, bits<8> op11_4, string opc,
+ list<dag> pattern = [],
+ dag iops = (ins GPRnopc:$Rn, GPRnopc:$Rm),
+ string asm = "\t$Rd, $Rn, $Rm">
+ : AI<(outs GPRnopc:$Rd), iops, DPFrm, IIC_iALUr, opc, asm, pattern>,
+ Sched<[WriteALU, ReadALU, ReadALU]> {
+ bits<4> Rn;
+ bits<4> Rd;
+ bits<4> Rm;
+ let Inst{27-20} = op27_20;
+ let Inst{11-4} = op11_4;
+ let Inst{19-16} = Rn;
+ let Inst{15-12} = Rd;
+ let Inst{3-0} = Rm;
+
+ let Unpredictable{11-8} = 0b1111;
+}
+
+// Saturating add/subtract
+
+let DecoderMethod = "DecodeQADDInstruction" in
+def QADD : AAI<0b00010000, 0b00000101, "qadd",
+ [(set GPRnopc:$Rd, (int_arm_qadd GPRnopc:$Rm, GPRnopc:$Rn))],
+ (ins GPRnopc:$Rm, GPRnopc:$Rn), "\t$Rd, $Rm, $Rn">;
+
+def QSUB : AAI<0b00010010, 0b00000101, "qsub",
+ [(set GPRnopc:$Rd, (int_arm_qsub GPRnopc:$Rm, GPRnopc:$Rn))],
+ (ins GPRnopc:$Rm, GPRnopc:$Rn), "\t$Rd, $Rm, $Rn">;
+def QDADD : AAI<0b00010100, 0b00000101, "qdadd", [],
+ (ins GPRnopc:$Rm, GPRnopc:$Rn),
+ "\t$Rd, $Rm, $Rn">;
+def QDSUB : AAI<0b00010110, 0b00000101, "qdsub", [],
+ (ins GPRnopc:$Rm, GPRnopc:$Rn),
+ "\t$Rd, $Rm, $Rn">;
+
+def QADD16 : AAI<0b01100010, 0b11110001, "qadd16">;
+def QADD8 : AAI<0b01100010, 0b11111001, "qadd8">;
+def QASX : AAI<0b01100010, 0b11110011, "qasx">;
+def QSAX : AAI<0b01100010, 0b11110101, "qsax">;
+def QSUB16 : AAI<0b01100010, 0b11110111, "qsub16">;
+def QSUB8 : AAI<0b01100010, 0b11111111, "qsub8">;
+def UQADD16 : AAI<0b01100110, 0b11110001, "uqadd16">;
+def UQADD8 : AAI<0b01100110, 0b11111001, "uqadd8">;
+def UQASX : AAI<0b01100110, 0b11110011, "uqasx">;
+def UQSAX : AAI<0b01100110, 0b11110101, "uqsax">;
+def UQSUB16 : AAI<0b01100110, 0b11110111, "uqsub16">;
+def UQSUB8 : AAI<0b01100110, 0b11111111, "uqsub8">;
+
+// Signed/Unsigned add/subtract
+
+def SASX : AAI<0b01100001, 0b11110011, "sasx">;
+def SADD16 : AAI<0b01100001, 0b11110001, "sadd16">;
+def SADD8 : AAI<0b01100001, 0b11111001, "sadd8">;
+def SSAX : AAI<0b01100001, 0b11110101, "ssax">;
+def SSUB16 : AAI<0b01100001, 0b11110111, "ssub16">;
+def SSUB8 : AAI<0b01100001, 0b11111111, "ssub8">;
+def UASX : AAI<0b01100101, 0b11110011, "uasx">;
+def UADD16 : AAI<0b01100101, 0b11110001, "uadd16">;
+def UADD8 : AAI<0b01100101, 0b11111001, "uadd8">;
+def USAX : AAI<0b01100101, 0b11110101, "usax">;
+def USUB16 : AAI<0b01100101, 0b11110111, "usub16">;
+def USUB8 : AAI<0b01100101, 0b11111111, "usub8">;
+
+// Signed/Unsigned halving add/subtract
+
+def SHASX : AAI<0b01100011, 0b11110011, "shasx">;
+def SHADD16 : AAI<0b01100011, 0b11110001, "shadd16">;
+def SHADD8 : AAI<0b01100011, 0b11111001, "shadd8">;
+def SHSAX : AAI<0b01100011, 0b11110101, "shsax">;
+def SHSUB16 : AAI<0b01100011, 0b11110111, "shsub16">;
+def SHSUB8 : AAI<0b01100011, 0b11111111, "shsub8">;
+def UHASX : AAI<0b01100111, 0b11110011, "uhasx">;
+def UHADD16 : AAI<0b01100111, 0b11110001, "uhadd16">;
+def UHADD8 : AAI<0b01100111, 0b11111001, "uhadd8">;
+def UHSAX : AAI<0b01100111, 0b11110101, "uhsax">;
+def UHSUB16 : AAI<0b01100111, 0b11110111, "uhsub16">;
+def UHSUB8 : AAI<0b01100111, 0b11111111, "uhsub8">;
+
+// Unsigned Sum of Absolute Differences [and Accumulate].
+
+def USAD8 : AI<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
+ MulFrm /* for convenience */, NoItinerary, "usad8",
+ "\t$Rd, $Rn, $Rm", []>,
+ Requires<[IsARM, HasV6]>, Sched<[WriteALU, ReadALU, ReadALU]> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<4> Rm;
+ let Inst{27-20} = 0b01111000;
+ let Inst{15-12} = 0b1111;
+ let Inst{7-4} = 0b0001;
+ let Inst{19-16} = Rd;
+ let Inst{11-8} = Rm;
+ let Inst{3-0} = Rn;
+}
+def USADA8 : AI<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
+ MulFrm /* for convenience */, NoItinerary, "usada8",
+ "\t$Rd, $Rn, $Rm, $Ra", []>,
+ Requires<[IsARM, HasV6]>, Sched<[WriteALU, ReadALU, ReadALU]>{
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<4> Rm;
+ bits<4> Ra;
+ let Inst{27-20} = 0b01111000;
+ let Inst{7-4} = 0b0001;
+ let Inst{19-16} = Rd;
+ let Inst{15-12} = Ra;
+ let Inst{11-8} = Rm;
+ let Inst{3-0} = Rn;
+}
+
+// Signed/Unsigned saturate
+
+def SSAT : AI<(outs GPRnopc:$Rd),
+ (ins imm1_32:$sat_imm, GPRnopc:$Rn, shift_imm:$sh),
+ SatFrm, NoItinerary, "ssat", "\t$Rd, $sat_imm, $Rn$sh", []> {
+ bits<4> Rd;
+ bits<5> sat_imm;
+ bits<4> Rn;
+ bits<8> sh;
+ let Inst{27-21} = 0b0110101;
+ let Inst{5-4} = 0b01;
+ let Inst{20-16} = sat_imm;
+ let Inst{15-12} = Rd;
+ let Inst{11-7} = sh{4-0};
+ let Inst{6} = sh{5};
+ let Inst{3-0} = Rn;
+}
+
+def SSAT16 : AI<(outs GPRnopc:$Rd),
+ (ins imm1_16:$sat_imm, GPRnopc:$Rn), SatFrm,
+ NoItinerary, "ssat16", "\t$Rd, $sat_imm, $Rn", []> {
+ bits<4> Rd;
+ bits<4> sat_imm;
+ bits<4> Rn;
+ let Inst{27-20} = 0b01101010;
+ let Inst{11-4} = 0b11110011;
+ let Inst{15-12} = Rd;
+ let Inst{19-16} = sat_imm;
+ let Inst{3-0} = Rn;
+}
+
+def USAT : AI<(outs GPRnopc:$Rd),
+ (ins imm0_31:$sat_imm, GPRnopc:$Rn, shift_imm:$sh),
+ SatFrm, NoItinerary, "usat", "\t$Rd, $sat_imm, $Rn$sh", []> {
+ bits<4> Rd;
+ bits<5> sat_imm;
+ bits<4> Rn;
+ bits<8> sh;
+ let Inst{27-21} = 0b0110111;
+ let Inst{5-4} = 0b01;
+ let Inst{15-12} = Rd;
+ let Inst{11-7} = sh{4-0};
+ let Inst{6} = sh{5};
+ let Inst{20-16} = sat_imm;
+ let Inst{3-0} = Rn;
+}
+
+def USAT16 : AI<(outs GPRnopc:$Rd),
+ (ins imm0_15:$sat_imm, GPRnopc:$Rn), SatFrm,
+ NoItinerary, "usat16", "\t$Rd, $sat_imm, $Rn", []> {
+ bits<4> Rd;
+ bits<4> sat_imm;
+ bits<4> Rn;
+ let Inst{27-20} = 0b01101110;
+ let Inst{11-4} = 0b11110011;
+ let Inst{15-12} = Rd;
+ let Inst{19-16} = sat_imm;
+ let Inst{3-0} = Rn;
+}
+
+def : ARMV6Pat<(int_arm_ssat GPRnopc:$a, imm1_32:$pos),
+ (SSAT imm1_32:$pos, GPRnopc:$a, 0)>;
+def : ARMV6Pat<(int_arm_usat GPRnopc:$a, imm0_31:$pos),
+ (USAT imm0_31:$pos, GPRnopc:$a, 0)>;
+
+//===----------------------------------------------------------------------===//
+// Bitwise Instructions.
+//
+
+defm AND : AsI1_bin_irs<0b0000, "and",
+ IIC_iBITi, IIC_iBITr, IIC_iBITsr,
+ BinOpFrag<(and node:$LHS, node:$RHS)>, 1>;
+defm ORR : AsI1_bin_irs<0b1100, "orr",
+ IIC_iBITi, IIC_iBITr, IIC_iBITsr,
+ BinOpFrag<(or node:$LHS, node:$RHS)>, 1>;
+defm EOR : AsI1_bin_irs<0b0001, "eor",
+ IIC_iBITi, IIC_iBITr, IIC_iBITsr,
+ BinOpFrag<(xor node:$LHS, node:$RHS)>, 1>;
+defm BIC : AsI1_bin_irs<0b1110, "bic",
+ IIC_iBITi, IIC_iBITr, IIC_iBITsr,
+ BinOpFrag<(and node:$LHS, (not node:$RHS))>>;
+
+// FIXME: bf_inv_mask_imm should be two operands, the lsb and the msb, just
+// like in the actual instruction encoding. The complexity of mapping the mask
+// to the lsb/msb pair should be handled by ISel, not encapsulated in the
+// instruction description.
+def BFC : I<(outs GPR:$Rd), (ins GPR:$src, bf_inv_mask_imm:$imm),
+ AddrMode1, 4, IndexModeNone, DPFrm, IIC_iUNAsi,
+ "bfc", "\t$Rd, $imm", "$src = $Rd",
+ [(set GPR:$Rd, (and GPR:$src, bf_inv_mask_imm:$imm))]>,
+ Requires<[IsARM, HasV6T2]> {
+ bits<4> Rd;
+ bits<10> imm;
+ let Inst{27-21} = 0b0111110;
+ let Inst{6-0} = 0b0011111;
+ let Inst{15-12} = Rd;
+ let Inst{11-7} = imm{4-0}; // lsb
+ let Inst{20-16} = imm{9-5}; // msb
+}
+
+// A8.6.18 BFI - Bitfield insert (Encoding A1)
+def BFI:I<(outs GPRnopc:$Rd), (ins GPRnopc:$src, GPR:$Rn, bf_inv_mask_imm:$imm),
+ AddrMode1, 4, IndexModeNone, DPFrm, IIC_iUNAsi,
+ "bfi", "\t$Rd, $Rn, $imm", "$src = $Rd",
+ [(set GPRnopc:$Rd, (ARMbfi GPRnopc:$src, GPR:$Rn,
+ bf_inv_mask_imm:$imm))]>,
+ Requires<[IsARM, HasV6T2]> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<10> imm;
+ let Inst{27-21} = 0b0111110;
+ let Inst{6-4} = 0b001; // Rn: Inst{3-0} != 15
+ let Inst{15-12} = Rd;
+ let Inst{11-7} = imm{4-0}; // lsb
+ let Inst{20-16} = imm{9-5}; // width
+ let Inst{3-0} = Rn;
+}
+
+def MVNr : AsI1<0b1111, (outs GPR:$Rd), (ins GPR:$Rm), DPFrm, IIC_iMVNr,
+ "mvn", "\t$Rd, $Rm",
+ [(set GPR:$Rd, (not GPR:$Rm))]>, UnaryDP, Sched<[WriteALU]> {
+ bits<4> Rd;
+ bits<4> Rm;
+ let Inst{25} = 0;
+ let Inst{19-16} = 0b0000;
+ let Inst{11-4} = 0b00000000;
+ let Inst{15-12} = Rd;
+ let Inst{3-0} = Rm;
+}
+def MVNsi : AsI1<0b1111, (outs GPR:$Rd), (ins so_reg_imm:$shift),
+ DPSoRegImmFrm, IIC_iMVNsr, "mvn", "\t$Rd, $shift",
+ [(set GPR:$Rd, (not so_reg_imm:$shift))]>, UnaryDP,
+ Sched<[WriteALU]> {
+ bits<4> Rd;
+ bits<12> shift;
+ let Inst{25} = 0;
+ let Inst{19-16} = 0b0000;
+ let Inst{15-12} = Rd;
+ let Inst{11-5} = shift{11-5};
+ let Inst{4} = 0;
+ let Inst{3-0} = shift{3-0};
+}
+def MVNsr : AsI1<0b1111, (outs GPR:$Rd), (ins so_reg_reg:$shift),
+ DPSoRegRegFrm, IIC_iMVNsr, "mvn", "\t$Rd, $shift",
+ [(set GPR:$Rd, (not so_reg_reg:$shift))]>, UnaryDP,
+ Sched<[WriteALU]> {
+ bits<4> Rd;
+ bits<12> shift;
+ let Inst{25} = 0;
+ let Inst{19-16} = 0b0000;
+ let Inst{15-12} = Rd;
+ let Inst{11-8} = shift{11-8};
+ let Inst{7} = 0;
+ let Inst{6-5} = shift{6-5};
+ let Inst{4} = 1;
+ let Inst{3-0} = shift{3-0};
+}
+let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in
+def MVNi : AsI1<0b1111, (outs GPR:$Rd), (ins mod_imm:$imm), DPFrm,
+ IIC_iMVNi, "mvn", "\t$Rd, $imm",
+ [(set GPR:$Rd, mod_imm_not:$imm)]>,UnaryDP, Sched<[WriteALU]> {
+ bits<4> Rd;
+ bits<12> imm;
+ let Inst{25} = 1;
+ let Inst{19-16} = 0b0000;
+ let Inst{15-12} = Rd;
+ let Inst{11-0} = imm;
+}
+
+def : ARMPat<(and GPR:$src, mod_imm_not:$imm),
+ (BICri GPR:$src, mod_imm_not:$imm)>;
+
+//===----------------------------------------------------------------------===//
+// Multiply Instructions.
+//
+class AsMul1I32<bits<7> opcod, dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : AsMul1I<opcod, oops, iops, itin, opc, asm, pattern> {
+ bits<4> Rd;
+ bits<4> Rm;
+ bits<4> Rn;
+ let Inst{19-16} = Rd;
+ let Inst{11-8} = Rm;
+ let Inst{3-0} = Rn;
+}
+class AsMul1I64<bits<7> opcod, dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : AsMul1I<opcod, oops, iops, itin, opc, asm, pattern> {
+ bits<4> RdLo;
+ bits<4> RdHi;
+ bits<4> Rm;
+ bits<4> Rn;
+ let Inst{19-16} = RdHi;
+ let Inst{15-12} = RdLo;
+ let Inst{11-8} = Rm;
+ let Inst{3-0} = Rn;
+}
+class AsMla1I64<bits<7> opcod, dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : AsMul1I<opcod, oops, iops, itin, opc, asm, pattern> {
+ bits<4> RdLo;
+ bits<4> RdHi;
+ bits<4> Rm;
+ bits<4> Rn;
+ let Inst{19-16} = RdHi;
+ let Inst{15-12} = RdLo;
+ let Inst{11-8} = Rm;
+ let Inst{3-0} = Rn;
+}
+
+// FIXME: The v5 pseudos are only necessary for the additional Constraint
+// property. Remove them when it's possible to add those properties
+// on an individual MachineInstr, not just an instruction description.
+let isCommutable = 1, TwoOperandAliasConstraint = "$Rn = $Rd" in {
+def MUL : AsMul1I32<0b0000000, (outs GPRnopc:$Rd),
+ (ins GPRnopc:$Rn, GPRnopc:$Rm),
+ IIC_iMUL32, "mul", "\t$Rd, $Rn, $Rm",
+ [(set GPRnopc:$Rd, (mul GPRnopc:$Rn, GPRnopc:$Rm))]>,
+ Requires<[IsARM, HasV6]> {
+ let Inst{15-12} = 0b0000;
+ let Unpredictable{15-12} = 0b1111;
+}
+
+let Constraints = "@earlyclobber $Rd" in
+def MULv5: ARMPseudoExpand<(outs GPRnopc:$Rd), (ins GPRnopc:$Rn, GPRnopc:$Rm,
+ pred:$p, cc_out:$s),
+ 4, IIC_iMUL32,
+ [(set GPRnopc:$Rd, (mul GPRnopc:$Rn, GPRnopc:$Rm))],
+ (MUL GPRnopc:$Rd, GPRnopc:$Rn, GPRnopc:$Rm, pred:$p, cc_out:$s)>,
+ Requires<[IsARM, NoV6, UseMulOps]>;
+}
+
+def MLA : AsMul1I32<0b0000001, (outs GPRnopc:$Rd),
+ (ins GPRnopc:$Rn, GPRnopc:$Rm, GPRnopc:$Ra),
+ IIC_iMAC32, "mla", "\t$Rd, $Rn, $Rm, $Ra",
+ [(set GPRnopc:$Rd, (add (mul GPRnopc:$Rn, GPRnopc:$Rm), GPRnopc:$Ra))]>,
+ Requires<[IsARM, HasV6, UseMulOps]> {
+ bits<4> Ra;
+ let Inst{15-12} = Ra;
+}
+
+let Constraints = "@earlyclobber $Rd" in
+def MLAv5: ARMPseudoExpand<(outs GPRnopc:$Rd),
+ (ins GPRnopc:$Rn, GPRnopc:$Rm, GPRnopc:$Ra,
+ pred:$p, cc_out:$s), 4, IIC_iMAC32,
+ [(set GPRnopc:$Rd, (add (mul GPRnopc:$Rn, GPRnopc:$Rm), GPRnopc:$Ra))],
+ (MLA GPRnopc:$Rd, GPRnopc:$Rn, GPRnopc:$Rm, GPRnopc:$Ra, pred:$p, cc_out:$s)>,
+ Requires<[IsARM, NoV6]>;
+
+def MLS : AMul1I<0b0000011, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
+ IIC_iMAC32, "mls", "\t$Rd, $Rn, $Rm, $Ra",
+ [(set GPR:$Rd, (sub GPR:$Ra, (mul GPR:$Rn, GPR:$Rm)))]>,
+ Requires<[IsARM, HasV6T2, UseMulOps]> {
+ bits<4> Rd;
+ bits<4> Rm;
+ bits<4> Rn;
+ bits<4> Ra;
+ let Inst{19-16} = Rd;
+ let Inst{15-12} = Ra;
+ let Inst{11-8} = Rm;
+ let Inst{3-0} = Rn;
+}
+
+// Extra precision multiplies with low / high results
+let hasSideEffects = 0 in {
+let isCommutable = 1 in {
+def SMULL : AsMul1I64<0b0000110, (outs GPR:$RdLo, GPR:$RdHi),
+ (ins GPR:$Rn, GPR:$Rm), IIC_iMUL64,
+ "smull", "\t$RdLo, $RdHi, $Rn, $Rm", []>,
+ Requires<[IsARM, HasV6]>;
+
+def UMULL : AsMul1I64<0b0000100, (outs GPR:$RdLo, GPR:$RdHi),
+ (ins GPR:$Rn, GPR:$Rm), IIC_iMUL64,
+ "umull", "\t$RdLo, $RdHi, $Rn, $Rm", []>,
+ Requires<[IsARM, HasV6]>;
+
+let Constraints = "@earlyclobber $RdLo,@earlyclobber $RdHi" in {
+def SMULLv5 : ARMPseudoExpand<(outs GPR:$RdLo, GPR:$RdHi),
+ (ins GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s),
+ 4, IIC_iMUL64, [],
+ (SMULL GPR:$RdLo, GPR:$RdHi, GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s)>,
+ Requires<[IsARM, NoV6]>;
+
+def UMULLv5 : ARMPseudoExpand<(outs GPR:$RdLo, GPR:$RdHi),
+ (ins GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s),
+ 4, IIC_iMUL64, [],
+ (UMULL GPR:$RdLo, GPR:$RdHi, GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s)>,
+ Requires<[IsARM, NoV6]>;
+}
+}
+
+// Multiply + accumulate
+def SMLAL : AsMla1I64<0b0000111, (outs GPR:$RdLo, GPR:$RdHi),
+ (ins GPR:$Rn, GPR:$Rm, GPR:$RLo, GPR:$RHi), IIC_iMAC64,
+ "smlal", "\t$RdLo, $RdHi, $Rn, $Rm", []>,
+ RegConstraint<"$RLo = $RdLo, $RHi = $RdHi">, Requires<[IsARM, HasV6]>;
+def UMLAL : AsMla1I64<0b0000101, (outs GPR:$RdLo, GPR:$RdHi),
+ (ins GPR:$Rn, GPR:$Rm, GPR:$RLo, GPR:$RHi), IIC_iMAC64,
+ "umlal", "\t$RdLo, $RdHi, $Rn, $Rm", []>,
+ RegConstraint<"$RLo = $RdLo, $RHi = $RdHi">, Requires<[IsARM, HasV6]>;
+
+def UMAAL : AMul1I <0b0000010, (outs GPR:$RdLo, GPR:$RdHi),
+ (ins GPR:$Rn, GPR:$Rm), IIC_iMAC64,
+ "umaal", "\t$RdLo, $RdHi, $Rn, $Rm", []>,
+ Requires<[IsARM, HasV6]> {
+ bits<4> RdLo;
+ bits<4> RdHi;
+ bits<4> Rm;
+ bits<4> Rn;
+ let Inst{19-16} = RdHi;
+ let Inst{15-12} = RdLo;
+ let Inst{11-8} = Rm;
+ let Inst{3-0} = Rn;
+}
+
+let Constraints =
+ "@earlyclobber $RdLo,@earlyclobber $RdHi,$RLo = $RdLo,$RHi = $RdHi" in {
+def SMLALv5 : ARMPseudoExpand<(outs GPR:$RdLo, GPR:$RdHi),
+ (ins GPR:$Rn, GPR:$Rm, GPR:$RLo, GPR:$RHi, pred:$p, cc_out:$s),
+ 4, IIC_iMAC64, [],
+ (SMLAL GPR:$RdLo, GPR:$RdHi, GPR:$Rn, GPR:$Rm, GPR:$RLo, GPR:$RHi,
+ pred:$p, cc_out:$s)>,
+ Requires<[IsARM, NoV6]>;
+def UMLALv5 : ARMPseudoExpand<(outs GPR:$RdLo, GPR:$RdHi),
+ (ins GPR:$Rn, GPR:$Rm, GPR:$RLo, GPR:$RHi, pred:$p, cc_out:$s),
+ 4, IIC_iMAC64, [],
+ (UMLAL GPR:$RdLo, GPR:$RdHi, GPR:$Rn, GPR:$Rm, GPR:$RLo, GPR:$RHi,
+ pred:$p, cc_out:$s)>,
+ Requires<[IsARM, NoV6]>;
+}
+
+} // hasSideEffects
+
+// Most significant word multiply
+def SMMUL : AMul2I <0b0111010, 0b0001, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
+ IIC_iMUL32, "smmul", "\t$Rd, $Rn, $Rm",
+ [(set GPR:$Rd, (mulhs GPR:$Rn, GPR:$Rm))]>,
+ Requires<[IsARM, HasV6]> {
+ let Inst{15-12} = 0b1111;
+}
+
+def SMMULR : AMul2I <0b0111010, 0b0011, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
+ IIC_iMUL32, "smmulr", "\t$Rd, $Rn, $Rm", []>,
+ Requires<[IsARM, HasV6]> {
+ let Inst{15-12} = 0b1111;
+}
+
+def SMMLA : AMul2Ia <0b0111010, 0b0001, (outs GPR:$Rd),
+ (ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
+ IIC_iMAC32, "smmla", "\t$Rd, $Rn, $Rm, $Ra",
+ [(set GPR:$Rd, (add (mulhs GPR:$Rn, GPR:$Rm), GPR:$Ra))]>,
+ Requires<[IsARM, HasV6, UseMulOps]>;
+
+def SMMLAR : AMul2Ia <0b0111010, 0b0011, (outs GPR:$Rd),
+ (ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
+ IIC_iMAC32, "smmlar", "\t$Rd, $Rn, $Rm, $Ra", []>,
+ Requires<[IsARM, HasV6]>;
+
+def SMMLS : AMul2Ia <0b0111010, 0b1101, (outs GPR:$Rd),
+ (ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
+ IIC_iMAC32, "smmls", "\t$Rd, $Rn, $Rm, $Ra", []>,
+ Requires<[IsARM, HasV6, UseMulOps]>;
+
+def SMMLSR : AMul2Ia <0b0111010, 0b1111, (outs GPR:$Rd),
+ (ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
+ IIC_iMAC32, "smmlsr", "\t$Rd, $Rn, $Rm, $Ra", []>,
+ Requires<[IsARM, HasV6]>;
+
+multiclass AI_smul<string opc, PatFrag opnode> {
+ def BB : AMulxyI<0b0001011, 0b00, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
+ IIC_iMUL16, !strconcat(opc, "bb"), "\t$Rd, $Rn, $Rm",
+ [(set GPR:$Rd, (opnode (sext_inreg GPR:$Rn, i16),
+ (sext_inreg GPR:$Rm, i16)))]>,
+ Requires<[IsARM, HasV5TE]>;
+
+ def BT : AMulxyI<0b0001011, 0b10, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
+ IIC_iMUL16, !strconcat(opc, "bt"), "\t$Rd, $Rn, $Rm",
+ [(set GPR:$Rd, (opnode (sext_inreg GPR:$Rn, i16),
+ (sra GPR:$Rm, (i32 16))))]>,
+ Requires<[IsARM, HasV5TE]>;
+
+ def TB : AMulxyI<0b0001011, 0b01, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
+ IIC_iMUL16, !strconcat(opc, "tb"), "\t$Rd, $Rn, $Rm",
+ [(set GPR:$Rd, (opnode (sra GPR:$Rn, (i32 16)),
+ (sext_inreg GPR:$Rm, i16)))]>,
+ Requires<[IsARM, HasV5TE]>;
+
+ def TT : AMulxyI<0b0001011, 0b11, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
+ IIC_iMUL16, !strconcat(opc, "tt"), "\t$Rd, $Rn, $Rm",
+ [(set GPR:$Rd, (opnode (sra GPR:$Rn, (i32 16)),
+ (sra GPR:$Rm, (i32 16))))]>,
+ Requires<[IsARM, HasV5TE]>;
+
+ def WB : AMulxyI<0b0001001, 0b01, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
+ IIC_iMUL16, !strconcat(opc, "wb"), "\t$Rd, $Rn, $Rm",
+ []>,
+ Requires<[IsARM, HasV5TE]>;
+
+ def WT : AMulxyI<0b0001001, 0b11, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
+ IIC_iMUL16, !strconcat(opc, "wt"), "\t$Rd, $Rn, $Rm",
+ []>,
+ Requires<[IsARM, HasV5TE]>;
+}
+
+
+multiclass AI_smla<string opc, PatFrag opnode> {
+ let DecoderMethod = "DecodeSMLAInstruction" in {
+ def BB : AMulxyIa<0b0001000, 0b00, (outs GPRnopc:$Rd),
+ (ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra),
+ IIC_iMAC16, !strconcat(opc, "bb"), "\t$Rd, $Rn, $Rm, $Ra",
+ [(set GPRnopc:$Rd, (add GPR:$Ra,
+ (opnode (sext_inreg GPRnopc:$Rn, i16),
+ (sext_inreg GPRnopc:$Rm, i16))))]>,
+ Requires<[IsARM, HasV5TE, UseMulOps]>;
+
+ def BT : AMulxyIa<0b0001000, 0b10, (outs GPRnopc:$Rd),
+ (ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra),
+ IIC_iMAC16, !strconcat(opc, "bt"), "\t$Rd, $Rn, $Rm, $Ra",
+ [(set GPRnopc:$Rd,
+ (add GPR:$Ra, (opnode (sext_inreg GPRnopc:$Rn, i16),
+ (sra GPRnopc:$Rm, (i32 16)))))]>,
+ Requires<[IsARM, HasV5TE, UseMulOps]>;
+
+ def TB : AMulxyIa<0b0001000, 0b01, (outs GPRnopc:$Rd),
+ (ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra),
+ IIC_iMAC16, !strconcat(opc, "tb"), "\t$Rd, $Rn, $Rm, $Ra",
+ [(set GPRnopc:$Rd,
+ (add GPR:$Ra, (opnode (sra GPRnopc:$Rn, (i32 16)),
+ (sext_inreg GPRnopc:$Rm, i16))))]>,
+ Requires<[IsARM, HasV5TE, UseMulOps]>;
+
+ def TT : AMulxyIa<0b0001000, 0b11, (outs GPRnopc:$Rd),
+ (ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra),
+ IIC_iMAC16, !strconcat(opc, "tt"), "\t$Rd, $Rn, $Rm, $Ra",
+ [(set GPRnopc:$Rd,
+ (add GPR:$Ra, (opnode (sra GPRnopc:$Rn, (i32 16)),
+ (sra GPRnopc:$Rm, (i32 16)))))]>,
+ Requires<[IsARM, HasV5TE, UseMulOps]>;
+
+ def WB : AMulxyIa<0b0001001, 0b00, (outs GPRnopc:$Rd),
+ (ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra),
+ IIC_iMAC16, !strconcat(opc, "wb"), "\t$Rd, $Rn, $Rm, $Ra",
+ []>,
+ Requires<[IsARM, HasV5TE, UseMulOps]>;
+
+ def WT : AMulxyIa<0b0001001, 0b10, (outs GPRnopc:$Rd),
+ (ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra),
+ IIC_iMAC16, !strconcat(opc, "wt"), "\t$Rd, $Rn, $Rm, $Ra",
+ []>,
+ Requires<[IsARM, HasV5TE, UseMulOps]>;
+ }
+}
+
+defm SMUL : AI_smul<"smul", BinOpFrag<(mul node:$LHS, node:$RHS)>>;
+defm SMLA : AI_smla<"smla", BinOpFrag<(mul node:$LHS, node:$RHS)>>;
+
+// Halfword multiply accumulate long: SMLAL<x><y>.
+def SMLALBB : AMulxyI64<0b0001010, 0b00, (outs GPRnopc:$RdLo, GPRnopc:$RdHi),
+ (ins GPRnopc:$Rn, GPRnopc:$Rm),
+ IIC_iMAC64, "smlalbb", "\t$RdLo, $RdHi, $Rn, $Rm", []>,
+ Requires<[IsARM, HasV5TE]>;
+
+def SMLALBT : AMulxyI64<0b0001010, 0b10, (outs GPRnopc:$RdLo, GPRnopc:$RdHi),
+ (ins GPRnopc:$Rn, GPRnopc:$Rm),
+ IIC_iMAC64, "smlalbt", "\t$RdLo, $RdHi, $Rn, $Rm", []>,
+ Requires<[IsARM, HasV5TE]>;
+
+def SMLALTB : AMulxyI64<0b0001010, 0b01, (outs GPRnopc:$RdLo, GPRnopc:$RdHi),
+ (ins GPRnopc:$Rn, GPRnopc:$Rm),
+ IIC_iMAC64, "smlaltb", "\t$RdLo, $RdHi, $Rn, $Rm", []>,
+ Requires<[IsARM, HasV5TE]>;
+
+def SMLALTT : AMulxyI64<0b0001010, 0b11, (outs GPRnopc:$RdLo, GPRnopc:$RdHi),
+ (ins GPRnopc:$Rn, GPRnopc:$Rm),
+ IIC_iMAC64, "smlaltt", "\t$RdLo, $RdHi, $Rn, $Rm", []>,
+ Requires<[IsARM, HasV5TE]>;
+
+// Helper class for AI_smld.
+class AMulDualIbase<bit long, bit sub, bit swap, dag oops, dag iops,
+ InstrItinClass itin, string opc, string asm>
+ : AI<oops, iops, MulFrm, itin, opc, asm, []>, Requires<[IsARM, HasV6]> {
+ bits<4> Rn;
+ bits<4> Rm;
+ let Inst{27-23} = 0b01110;
+ let Inst{22} = long;
+ let Inst{21-20} = 0b00;
+ let Inst{11-8} = Rm;
+ let Inst{7} = 0;
+ let Inst{6} = sub;
+ let Inst{5} = swap;
+ let Inst{4} = 1;
+ let Inst{3-0} = Rn;
+}
+class AMulDualI<bit long, bit sub, bit swap, dag oops, dag iops,
+ InstrItinClass itin, string opc, string asm>
+ : AMulDualIbase<long, sub, swap, oops, iops, itin, opc, asm> {
+ bits<4> Rd;
+ let Inst{15-12} = 0b1111;
+ let Inst{19-16} = Rd;
+}
+class AMulDualIa<bit long, bit sub, bit swap, dag oops, dag iops,
+ InstrItinClass itin, string opc, string asm>
+ : AMulDualIbase<long, sub, swap, oops, iops, itin, opc, asm> {
+ bits<4> Ra;
+ bits<4> Rd;
+ let Inst{19-16} = Rd;
+ let Inst{15-12} = Ra;
+}
+class AMulDualI64<bit long, bit sub, bit swap, dag oops, dag iops,
+ InstrItinClass itin, string opc, string asm>
+ : AMulDualIbase<long, sub, swap, oops, iops, itin, opc, asm> {
+ bits<4> RdLo;
+ bits<4> RdHi;
+ let Inst{19-16} = RdHi;
+ let Inst{15-12} = RdLo;
+}
+
+multiclass AI_smld<bit sub, string opc> {
+
+ def D : AMulDualIa<0, sub, 0, (outs GPRnopc:$Rd),
+ (ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra),
+ NoItinerary, !strconcat(opc, "d"), "\t$Rd, $Rn, $Rm, $Ra">;
+
+ def DX: AMulDualIa<0, sub, 1, (outs GPRnopc:$Rd),
+ (ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra),
+ NoItinerary, !strconcat(opc, "dx"), "\t$Rd, $Rn, $Rm, $Ra">;
+
+ def LD: AMulDualI64<1, sub, 0, (outs GPRnopc:$RdLo, GPRnopc:$RdHi),
+ (ins GPRnopc:$Rn, GPRnopc:$Rm), NoItinerary,
+ !strconcat(opc, "ld"), "\t$RdLo, $RdHi, $Rn, $Rm">;
+
+ def LDX : AMulDualI64<1, sub, 1, (outs GPRnopc:$RdLo, GPRnopc:$RdHi),
+ (ins GPRnopc:$Rn, GPRnopc:$Rm), NoItinerary,
+ !strconcat(opc, "ldx"),"\t$RdLo, $RdHi, $Rn, $Rm">;
+
+}
+
+defm SMLA : AI_smld<0, "smla">;
+defm SMLS : AI_smld<1, "smls">;
+
+multiclass AI_sdml<bit sub, string opc> {
+
+ def D:AMulDualI<0, sub, 0, (outs GPRnopc:$Rd), (ins GPRnopc:$Rn, GPRnopc:$Rm),
+ NoItinerary, !strconcat(opc, "d"), "\t$Rd, $Rn, $Rm">;
+ def DX:AMulDualI<0, sub, 1, (outs GPRnopc:$Rd),(ins GPRnopc:$Rn, GPRnopc:$Rm),
+ NoItinerary, !strconcat(opc, "dx"), "\t$Rd, $Rn, $Rm">;
+}
+
+defm SMUA : AI_sdml<0, "smua">;
+defm SMUS : AI_sdml<1, "smus">;
+
+//===----------------------------------------------------------------------===//
+// Division Instructions (ARMv7-A with virtualization extension)
+//
+def SDIV : ADivA1I<0b001, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), IIC_iDIV,
+ "sdiv", "\t$Rd, $Rn, $Rm",
+ [(set GPR:$Rd, (sdiv GPR:$Rn, GPR:$Rm))]>,
+ Requires<[IsARM, HasDivideInARM]>;
+
+def UDIV : ADivA1I<0b011, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), IIC_iDIV,
+ "udiv", "\t$Rd, $Rn, $Rm",
+ [(set GPR:$Rd, (udiv GPR:$Rn, GPR:$Rm))]>,
+ Requires<[IsARM, HasDivideInARM]>;
+
+//===----------------------------------------------------------------------===//
+// Misc. Arithmetic Instructions.
+//
+
+def CLZ : AMiscA1I<0b00010110, 0b0001, (outs GPR:$Rd), (ins GPR:$Rm),
+ IIC_iUNAr, "clz", "\t$Rd, $Rm",
+ [(set GPR:$Rd, (ctlz GPR:$Rm))]>, Requires<[IsARM, HasV5T]>,
+ Sched<[WriteALU]>;
+
+def RBIT : AMiscA1I<0b01101111, 0b0011, (outs GPR:$Rd), (ins GPR:$Rm),
+ IIC_iUNAr, "rbit", "\t$Rd, $Rm",
+ [(set GPR:$Rd, (bitreverse GPR:$Rm))]>,
+ Requires<[IsARM, HasV6T2]>,
+ Sched<[WriteALU]>;
+
+def REV : AMiscA1I<0b01101011, 0b0011, (outs GPR:$Rd), (ins GPR:$Rm),
+ IIC_iUNAr, "rev", "\t$Rd, $Rm",
+ [(set GPR:$Rd, (bswap GPR:$Rm))]>, Requires<[IsARM, HasV6]>,
+ Sched<[WriteALU]>;
+
+let AddedComplexity = 5 in
+def REV16 : AMiscA1I<0b01101011, 0b1011, (outs GPR:$Rd), (ins GPR:$Rm),
+ IIC_iUNAr, "rev16", "\t$Rd, $Rm",
+ [(set GPR:$Rd, (rotr (bswap GPR:$Rm), (i32 16)))]>,
+ Requires<[IsARM, HasV6]>,
+ Sched<[WriteALU]>;
+
+def : ARMV6Pat<(srl (bswap (extloadi16 addrmode3:$addr)), (i32 16)),
+ (REV16 (LDRH addrmode3:$addr))>;
+def : ARMV6Pat<(truncstorei16 (srl (bswap GPR:$Rn), (i32 16)), addrmode3:$addr),
+ (STRH (REV16 GPR:$Rn), addrmode3:$addr)>;
+
+let AddedComplexity = 5 in
+def REVSH : AMiscA1I<0b01101111, 0b1011, (outs GPR:$Rd), (ins GPR:$Rm),
+ IIC_iUNAr, "revsh", "\t$Rd, $Rm",
+ [(set GPR:$Rd, (sra (bswap GPR:$Rm), (i32 16)))]>,
+ Requires<[IsARM, HasV6]>,
+ Sched<[WriteALU]>;
+
+def : ARMV6Pat<(or (sra (shl GPR:$Rm, (i32 24)), (i32 16)),
+ (and (srl GPR:$Rm, (i32 8)), 0xFF)),
+ (REVSH GPR:$Rm)>;
+
+def PKHBT : APKHI<0b01101000, 0, (outs GPRnopc:$Rd),
+ (ins GPRnopc:$Rn, GPRnopc:$Rm, pkh_lsl_amt:$sh),
+ IIC_iALUsi, "pkhbt", "\t$Rd, $Rn, $Rm$sh",
+ [(set GPRnopc:$Rd, (or (and GPRnopc:$Rn, 0xFFFF),
+ (and (shl GPRnopc:$Rm, pkh_lsl_amt:$sh),
+ 0xFFFF0000)))]>,
+ Requires<[IsARM, HasV6]>,
+ Sched<[WriteALUsi, ReadALU]>;
+
+// Alternate cases for PKHBT where identities eliminate some nodes.
+def : ARMV6Pat<(or (and GPRnopc:$Rn, 0xFFFF), (and GPRnopc:$Rm, 0xFFFF0000)),
+ (PKHBT GPRnopc:$Rn, GPRnopc:$Rm, 0)>;
+def : ARMV6Pat<(or (and GPRnopc:$Rn, 0xFFFF), (shl GPRnopc:$Rm, imm16_31:$sh)),
+ (PKHBT GPRnopc:$Rn, GPRnopc:$Rm, imm16_31:$sh)>;
+
+// Note: Shifts of 1-15 bits will be transformed to srl instead of sra and
+// will match the pattern below.
+def PKHTB : APKHI<0b01101000, 1, (outs GPRnopc:$Rd),
+ (ins GPRnopc:$Rn, GPRnopc:$Rm, pkh_asr_amt:$sh),
+ IIC_iBITsi, "pkhtb", "\t$Rd, $Rn, $Rm$sh",
+ [(set GPRnopc:$Rd, (or (and GPRnopc:$Rn, 0xFFFF0000),
+ (and (sra GPRnopc:$Rm, pkh_asr_amt:$sh),
+ 0xFFFF)))]>,
+ Requires<[IsARM, HasV6]>,
+ Sched<[WriteALUsi, ReadALU]>;
+
+// Alternate cases for PKHTB where identities eliminate some nodes. Note that
+// a shift amount of 0 is *not legal* here, it is PKHBT instead.
+// We also can not replace a srl (17..31) by an arithmetic shift we would use in
+// pkhtb src1, src2, asr (17..31).
+def : ARMV6Pat<(or (and GPRnopc:$src1, 0xFFFF0000),
+ (srl GPRnopc:$src2, imm16:$sh)),
+ (PKHTB GPRnopc:$src1, GPRnopc:$src2, imm16:$sh)>;
+def : ARMV6Pat<(or (and GPRnopc:$src1, 0xFFFF0000),
+ (sra GPRnopc:$src2, imm16_31:$sh)),
+ (PKHTB GPRnopc:$src1, GPRnopc:$src2, imm16_31:$sh)>;
+def : ARMV6Pat<(or (and GPRnopc:$src1, 0xFFFF0000),
+ (and (srl GPRnopc:$src2, imm1_15:$sh), 0xFFFF)),
+ (PKHTB GPRnopc:$src1, GPRnopc:$src2, imm1_15:$sh)>;
+
+//===----------------------------------------------------------------------===//
+// CRC Instructions
+//
+// Polynomials:
+// + CRC32{B,H,W} 0x04C11DB7
+// + CRC32C{B,H,W} 0x1EDC6F41
+//
+
+class AI_crc32<bit C, bits<2> sz, string suffix, SDPatternOperator builtin>
+ : AInoP<(outs GPRnopc:$Rd), (ins GPRnopc:$Rn, GPRnopc:$Rm), MiscFrm, NoItinerary,
+ !strconcat("crc32", suffix), "\t$Rd, $Rn, $Rm",
+ [(set GPRnopc:$Rd, (builtin GPRnopc:$Rn, GPRnopc:$Rm))]>,
+ Requires<[IsARM, HasV8, HasCRC]> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<4> Rm;
+
+ let Inst{31-28} = 0b1110;
+ let Inst{27-23} = 0b00010;
+ let Inst{22-21} = sz;
+ let Inst{20} = 0;
+ let Inst{19-16} = Rn;
+ let Inst{15-12} = Rd;
+ let Inst{11-10} = 0b00;
+ let Inst{9} = C;
+ let Inst{8} = 0;
+ let Inst{7-4} = 0b0100;
+ let Inst{3-0} = Rm;
+
+ let Unpredictable{11-8} = 0b1101;
+}
+
+def CRC32B : AI_crc32<0, 0b00, "b", int_arm_crc32b>;
+def CRC32CB : AI_crc32<1, 0b00, "cb", int_arm_crc32cb>;
+def CRC32H : AI_crc32<0, 0b01, "h", int_arm_crc32h>;
+def CRC32CH : AI_crc32<1, 0b01, "ch", int_arm_crc32ch>;
+def CRC32W : AI_crc32<0, 0b10, "w", int_arm_crc32w>;
+def CRC32CW : AI_crc32<1, 0b10, "cw", int_arm_crc32cw>;
+
+//===----------------------------------------------------------------------===//
+// ARMv8.1a Privilege Access Never extension
+//
+// SETPAN #imm1
+
+def SETPAN : AInoP<(outs), (ins imm0_1:$imm), MiscFrm, NoItinerary, "setpan",
+ "\t$imm", []>, Requires<[IsARM, HasV8, HasV8_1a]> {
+ bits<1> imm;
+
+ let Inst{31-28} = 0b1111;
+ let Inst{27-20} = 0b00010001;
+ let Inst{19-16} = 0b0000;
+ let Inst{15-10} = 0b000000;
+ let Inst{9} = imm;
+ let Inst{8} = 0b0;
+ let Inst{7-4} = 0b0000;
+ let Inst{3-0} = 0b0000;
+
+ let Unpredictable{19-16} = 0b1111;
+ let Unpredictable{15-10} = 0b111111;
+ let Unpredictable{8} = 0b1;
+ let Unpredictable{3-0} = 0b1111;
+}
+
+//===----------------------------------------------------------------------===//
+// Comparison Instructions...
+//
+
+defm CMP : AI1_cmp_irs<0b1010, "cmp",
+ IIC_iCMPi, IIC_iCMPr, IIC_iCMPsr,
+ BinOpFrag<(ARMcmp node:$LHS, node:$RHS)>>;
+
+// ARMcmpZ can re-use the above instruction definitions.
+def : ARMPat<(ARMcmpZ GPR:$src, mod_imm:$imm),
+ (CMPri GPR:$src, mod_imm:$imm)>;
+def : ARMPat<(ARMcmpZ GPR:$src, GPR:$rhs),
+ (CMPrr GPR:$src, GPR:$rhs)>;
+def : ARMPat<(ARMcmpZ GPR:$src, so_reg_imm:$rhs),
+ (CMPrsi GPR:$src, so_reg_imm:$rhs)>;
+def : ARMPat<(ARMcmpZ GPR:$src, so_reg_reg:$rhs),
+ (CMPrsr GPR:$src, so_reg_reg:$rhs)>;
+
+// CMN register-integer
+let isCompare = 1, Defs = [CPSR] in {
+def CMNri : AI1<0b1011, (outs), (ins GPR:$Rn, mod_imm:$imm), DPFrm, IIC_iCMPi,
+ "cmn", "\t$Rn, $imm",
+ [(ARMcmn GPR:$Rn, mod_imm:$imm)]>,
+ Sched<[WriteCMP, ReadALU]> {
+ bits<4> Rn;
+ bits<12> imm;
+ let Inst{25} = 1;
+ let Inst{20} = 1;
+ let Inst{19-16} = Rn;
+ let Inst{15-12} = 0b0000;
+ let Inst{11-0} = imm;
+
+ let Unpredictable{15-12} = 0b1111;
+}
+
+// CMN register-register/shift
+def CMNzrr : AI1<0b1011, (outs), (ins GPR:$Rn, GPR:$Rm), DPFrm, IIC_iCMPr,
+ "cmn", "\t$Rn, $Rm",
+ [(BinOpFrag<(ARMcmpZ node:$LHS,(ineg node:$RHS))>
+ GPR:$Rn, GPR:$Rm)]>, Sched<[WriteCMP, ReadALU, ReadALU]> {
+ bits<4> Rn;
+ bits<4> Rm;
+ let isCommutable = 1;
+ let Inst{25} = 0;
+ let Inst{20} = 1;
+ let Inst{19-16} = Rn;
+ let Inst{15-12} = 0b0000;
+ let Inst{11-4} = 0b00000000;
+ let Inst{3-0} = Rm;
+
+ let Unpredictable{15-12} = 0b1111;
+}
+
+def CMNzrsi : AI1<0b1011, (outs),
+ (ins GPR:$Rn, so_reg_imm:$shift), DPSoRegImmFrm, IIC_iCMPsr,
+ "cmn", "\t$Rn, $shift",
+ [(BinOpFrag<(ARMcmpZ node:$LHS,(ineg node:$RHS))>
+ GPR:$Rn, so_reg_imm:$shift)]>,
+ Sched<[WriteCMPsi, ReadALU]> {
+ bits<4> Rn;
+ bits<12> shift;
+ let Inst{25} = 0;
+ let Inst{20} = 1;
+ let Inst{19-16} = Rn;
+ let Inst{15-12} = 0b0000;
+ let Inst{11-5} = shift{11-5};
+ let Inst{4} = 0;
+ let Inst{3-0} = shift{3-0};
+
+ let Unpredictable{15-12} = 0b1111;
+}
+
+def CMNzrsr : AI1<0b1011, (outs),
+ (ins GPRnopc:$Rn, so_reg_reg:$shift), DPSoRegRegFrm, IIC_iCMPsr,
+ "cmn", "\t$Rn, $shift",
+ [(BinOpFrag<(ARMcmpZ node:$LHS,(ineg node:$RHS))>
+ GPRnopc:$Rn, so_reg_reg:$shift)]>,
+ Sched<[WriteCMPsr, ReadALU]> {
+ bits<4> Rn;
+ bits<12> shift;
+ let Inst{25} = 0;
+ let Inst{20} = 1;
+ let Inst{19-16} = Rn;
+ let Inst{15-12} = 0b0000;
+ let Inst{11-8} = shift{11-8};
+ let Inst{7} = 0;
+ let Inst{6-5} = shift{6-5};
+ let Inst{4} = 1;
+ let Inst{3-0} = shift{3-0};
+
+ let Unpredictable{15-12} = 0b1111;
+}
+
+}
+
+def : ARMPat<(ARMcmp GPR:$src, mod_imm_neg:$imm),
+ (CMNri GPR:$src, mod_imm_neg:$imm)>;
+
+def : ARMPat<(ARMcmpZ GPR:$src, mod_imm_neg:$imm),
+ (CMNri GPR:$src, mod_imm_neg:$imm)>;
+
+// Note that TST/TEQ don't set all the same flags that CMP does!
+defm TST : AI1_cmp_irs<0b1000, "tst",
+ IIC_iTSTi, IIC_iTSTr, IIC_iTSTsr,
+ BinOpFrag<(ARMcmpZ (and_su node:$LHS, node:$RHS), 0)>, 1,
+ "DecodeTSTInstruction">;
+defm TEQ : AI1_cmp_irs<0b1001, "teq",
+ IIC_iTSTi, IIC_iTSTr, IIC_iTSTsr,
+ BinOpFrag<(ARMcmpZ (xor_su node:$LHS, node:$RHS), 0)>, 1>;
+
+// Pseudo i64 compares for some floating point compares.
+let usesCustomInserter = 1, isBranch = 1, isTerminator = 1,
+ Defs = [CPSR] in {
+def BCCi64 : PseudoInst<(outs),
+ (ins i32imm:$cc, GPR:$lhs1, GPR:$lhs2, GPR:$rhs1, GPR:$rhs2, brtarget:$dst),
+ IIC_Br,
+ [(ARMBcci64 imm:$cc, GPR:$lhs1, GPR:$lhs2, GPR:$rhs1, GPR:$rhs2, bb:$dst)]>,
+ Sched<[WriteBr]>;
+
+def BCCZi64 : PseudoInst<(outs),
+ (ins i32imm:$cc, GPR:$lhs1, GPR:$lhs2, brtarget:$dst), IIC_Br,
+ [(ARMBcci64 imm:$cc, GPR:$lhs1, GPR:$lhs2, 0, 0, bb:$dst)]>,
+ Sched<[WriteBr]>;
+} // usesCustomInserter
+
+
+// Conditional moves
+let hasSideEffects = 0 in {
+
+let isCommutable = 1, isSelect = 1 in
+def MOVCCr : ARMPseudoInst<(outs GPR:$Rd),
+ (ins GPR:$false, GPR:$Rm, cmovpred:$p),
+ 4, IIC_iCMOVr,
+ [(set GPR:$Rd, (ARMcmov GPR:$false, GPR:$Rm,
+ cmovpred:$p))]>,
+ RegConstraint<"$false = $Rd">, Sched<[WriteALU]>;
+
+def MOVCCsi : ARMPseudoInst<(outs GPR:$Rd),
+ (ins GPR:$false, so_reg_imm:$shift, cmovpred:$p),
+ 4, IIC_iCMOVsr,
+ [(set GPR:$Rd,
+ (ARMcmov GPR:$false, so_reg_imm:$shift,
+ cmovpred:$p))]>,
+ RegConstraint<"$false = $Rd">, Sched<[WriteALU]>;
+def MOVCCsr : ARMPseudoInst<(outs GPR:$Rd),
+ (ins GPR:$false, so_reg_reg:$shift, cmovpred:$p),
+ 4, IIC_iCMOVsr,
+ [(set GPR:$Rd, (ARMcmov GPR:$false, so_reg_reg:$shift,
+ cmovpred:$p))]>,
+ RegConstraint<"$false = $Rd">, Sched<[WriteALU]>;
+
+
+let isMoveImm = 1 in
+def MOVCCi16
+ : ARMPseudoInst<(outs GPR:$Rd),
+ (ins GPR:$false, imm0_65535_expr:$imm, cmovpred:$p),
+ 4, IIC_iMOVi,
+ [(set GPR:$Rd, (ARMcmov GPR:$false, imm0_65535:$imm,
+ cmovpred:$p))]>,
+ RegConstraint<"$false = $Rd">, Requires<[IsARM, HasV6T2]>,
+ Sched<[WriteALU]>;
+
+let isMoveImm = 1 in
+def MOVCCi : ARMPseudoInst<(outs GPR:$Rd),
+ (ins GPR:$false, mod_imm:$imm, cmovpred:$p),
+ 4, IIC_iCMOVi,
+ [(set GPR:$Rd, (ARMcmov GPR:$false, mod_imm:$imm,
+ cmovpred:$p))]>,
+ RegConstraint<"$false = $Rd">, Sched<[WriteALU]>;
+
+// Two instruction predicate mov immediate.
+let isMoveImm = 1 in
+def MOVCCi32imm
+ : ARMPseudoInst<(outs GPR:$Rd),
+ (ins GPR:$false, i32imm:$src, cmovpred:$p),
+ 8, IIC_iCMOVix2,
+ [(set GPR:$Rd, (ARMcmov GPR:$false, imm:$src,
+ cmovpred:$p))]>,
+ RegConstraint<"$false = $Rd">, Requires<[IsARM, HasV6T2]>;
+
+let isMoveImm = 1 in
+def MVNCCi : ARMPseudoInst<(outs GPR:$Rd),
+ (ins GPR:$false, mod_imm:$imm, cmovpred:$p),
+ 4, IIC_iCMOVi,
+ [(set GPR:$Rd, (ARMcmov GPR:$false, mod_imm_not:$imm,
+ cmovpred:$p))]>,
+ RegConstraint<"$false = $Rd">, Sched<[WriteALU]>;
+
+} // hasSideEffects
+
+
+//===----------------------------------------------------------------------===//
+// Atomic operations intrinsics
+//
+
+def MemBarrierOptOperand : AsmOperandClass {
+ let Name = "MemBarrierOpt";
+ let ParserMethod = "parseMemBarrierOptOperand";
+}
+def memb_opt : Operand<i32> {
+ let PrintMethod = "printMemBOption";
+ let ParserMatchClass = MemBarrierOptOperand;
+ let DecoderMethod = "DecodeMemBarrierOption";
+}
+
+def InstSyncBarrierOptOperand : AsmOperandClass {
+ let Name = "InstSyncBarrierOpt";
+ let ParserMethod = "parseInstSyncBarrierOptOperand";
+}
+def instsyncb_opt : Operand<i32> {
+ let PrintMethod = "printInstSyncBOption";
+ let ParserMatchClass = InstSyncBarrierOptOperand;
+ let DecoderMethod = "DecodeInstSyncBarrierOption";
+}
+
+// Memory barriers protect the atomic sequences
+let hasSideEffects = 1 in {
+def DMB : AInoP<(outs), (ins memb_opt:$opt), MiscFrm, NoItinerary,
+ "dmb", "\t$opt", [(int_arm_dmb (i32 imm0_15:$opt))]>,
+ Requires<[IsARM, HasDB]> {
+ bits<4> opt;
+ let Inst{31-4} = 0xf57ff05;
+ let Inst{3-0} = opt;
+}
+
+def DSB : AInoP<(outs), (ins memb_opt:$opt), MiscFrm, NoItinerary,
+ "dsb", "\t$opt", [(int_arm_dsb (i32 imm0_15:$opt))]>,
+ Requires<[IsARM, HasDB]> {
+ bits<4> opt;
+ let Inst{31-4} = 0xf57ff04;
+ let Inst{3-0} = opt;
+}
+
+// ISB has only full system option
+def ISB : AInoP<(outs), (ins instsyncb_opt:$opt), MiscFrm, NoItinerary,
+ "isb", "\t$opt", [(int_arm_isb (i32 imm0_15:$opt))]>,
+ Requires<[IsARM, HasDB]> {
+ bits<4> opt;
+ let Inst{31-4} = 0xf57ff06;
+ let Inst{3-0} = opt;
+}
+}
+
+let usesCustomInserter = 1, Defs = [CPSR] in {
+
+// Pseudo instruction that combines movs + predicated rsbmi
+// to implement integer ABS
+ def ABS : ARMPseudoInst<(outs GPR:$dst), (ins GPR:$src), 8, NoItinerary, []>;
+}
+
+let usesCustomInserter = 1 in {
+ def COPY_STRUCT_BYVAL_I32 : PseudoInst<
+ (outs), (ins GPR:$dst, GPR:$src, i32imm:$size, i32imm:$alignment),
+ NoItinerary,
+ [(ARMcopystructbyval GPR:$dst, GPR:$src, imm:$size, imm:$alignment)]>;
+}
+
+let hasPostISelHook = 1, Constraints = "$newdst = $dst, $newsrc = $src" in {
+ // %newsrc, %newdst = MEMCPY %dst, %src, N, ...N scratch regs...
+ // Copies N registers worth of memory from address %src to address %dst
+ // and returns the incremented addresses. N scratch register will
+ // be attached for the copy to use.
+ def MEMCPY : PseudoInst<
+ (outs GPR:$newdst, GPR:$newsrc),
+ (ins GPR:$dst, GPR:$src, i32imm:$nreg, variable_ops),
+ NoItinerary,
+ [(set GPR:$newdst, GPR:$newsrc,
+ (ARMmemcopy GPR:$dst, GPR:$src, imm:$nreg))]>;
+}
+
+def ldrex_1 : PatFrag<(ops node:$ptr), (int_arm_ldrex node:$ptr), [{
+ return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i8;
+}]>;
+
+def ldrex_2 : PatFrag<(ops node:$ptr), (int_arm_ldrex node:$ptr), [{
+ return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i16;
+}]>;
+
+def ldrex_4 : PatFrag<(ops node:$ptr), (int_arm_ldrex node:$ptr), [{
+ return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i32;
+}]>;
+
+def strex_1 : PatFrag<(ops node:$val, node:$ptr),
+ (int_arm_strex node:$val, node:$ptr), [{
+ return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i8;
+}]>;
+
+def strex_2 : PatFrag<(ops node:$val, node:$ptr),
+ (int_arm_strex node:$val, node:$ptr), [{
+ return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i16;
+}]>;
+
+def strex_4 : PatFrag<(ops node:$val, node:$ptr),
+ (int_arm_strex node:$val, node:$ptr), [{
+ return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i32;
+}]>;
+
+def ldaex_1 : PatFrag<(ops node:$ptr), (int_arm_ldaex node:$ptr), [{
+ return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i8;
+}]>;
+
+def ldaex_2 : PatFrag<(ops node:$ptr), (int_arm_ldaex node:$ptr), [{
+ return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i16;
+}]>;
+
+def ldaex_4 : PatFrag<(ops node:$ptr), (int_arm_ldaex node:$ptr), [{
+ return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i32;
+}]>;
+
+def stlex_1 : PatFrag<(ops node:$val, node:$ptr),
+ (int_arm_stlex node:$val, node:$ptr), [{
+ return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i8;
+}]>;
+
+def stlex_2 : PatFrag<(ops node:$val, node:$ptr),
+ (int_arm_stlex node:$val, node:$ptr), [{
+ return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i16;
+}]>;
+
+def stlex_4 : PatFrag<(ops node:$val, node:$ptr),
+ (int_arm_stlex node:$val, node:$ptr), [{
+ return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i32;
+}]>;
+
+let mayLoad = 1 in {
+def LDREXB : AIldrex<0b10, (outs GPR:$Rt), (ins addr_offset_none:$addr),
+ NoItinerary, "ldrexb", "\t$Rt, $addr",
+ [(set GPR:$Rt, (ldrex_1 addr_offset_none:$addr))]>;
+def LDREXH : AIldrex<0b11, (outs GPR:$Rt), (ins addr_offset_none:$addr),
+ NoItinerary, "ldrexh", "\t$Rt, $addr",
+ [(set GPR:$Rt, (ldrex_2 addr_offset_none:$addr))]>;
+def LDREX : AIldrex<0b00, (outs GPR:$Rt), (ins addr_offset_none:$addr),
+ NoItinerary, "ldrex", "\t$Rt, $addr",
+ [(set GPR:$Rt, (ldrex_4 addr_offset_none:$addr))]>;
+let hasExtraDefRegAllocReq = 1 in
+def LDREXD : AIldrex<0b01, (outs GPRPairOp:$Rt),(ins addr_offset_none:$addr),
+ NoItinerary, "ldrexd", "\t$Rt, $addr", []> {
+ let DecoderMethod = "DecodeDoubleRegLoad";
+}
+
+def LDAEXB : AIldaex<0b10, (outs GPR:$Rt), (ins addr_offset_none:$addr),
+ NoItinerary, "ldaexb", "\t$Rt, $addr",
+ [(set GPR:$Rt, (ldaex_1 addr_offset_none:$addr))]>;
+def LDAEXH : AIldaex<0b11, (outs GPR:$Rt), (ins addr_offset_none:$addr),
+ NoItinerary, "ldaexh", "\t$Rt, $addr",
+ [(set GPR:$Rt, (ldaex_2 addr_offset_none:$addr))]>;
+def LDAEX : AIldaex<0b00, (outs GPR:$Rt), (ins addr_offset_none:$addr),
+ NoItinerary, "ldaex", "\t$Rt, $addr",
+ [(set GPR:$Rt, (ldaex_4 addr_offset_none:$addr))]>;
+let hasExtraDefRegAllocReq = 1 in
+def LDAEXD : AIldaex<0b01, (outs GPRPairOp:$Rt),(ins addr_offset_none:$addr),
+ NoItinerary, "ldaexd", "\t$Rt, $addr", []> {
+ let DecoderMethod = "DecodeDoubleRegLoad";
+}
+}
+
+let mayStore = 1, Constraints = "@earlyclobber $Rd" in {
+def STREXB: AIstrex<0b10, (outs GPR:$Rd), (ins GPR:$Rt, addr_offset_none:$addr),
+ NoItinerary, "strexb", "\t$Rd, $Rt, $addr",
+ [(set GPR:$Rd, (strex_1 GPR:$Rt,
+ addr_offset_none:$addr))]>;
+def STREXH: AIstrex<0b11, (outs GPR:$Rd), (ins GPR:$Rt, addr_offset_none:$addr),
+ NoItinerary, "strexh", "\t$Rd, $Rt, $addr",
+ [(set GPR:$Rd, (strex_2 GPR:$Rt,
+ addr_offset_none:$addr))]>;
+def STREX : AIstrex<0b00, (outs GPR:$Rd), (ins GPR:$Rt, addr_offset_none:$addr),
+ NoItinerary, "strex", "\t$Rd, $Rt, $addr",
+ [(set GPR:$Rd, (strex_4 GPR:$Rt,
+ addr_offset_none:$addr))]>;
+let hasExtraSrcRegAllocReq = 1 in
+def STREXD : AIstrex<0b01, (outs GPR:$Rd),
+ (ins GPRPairOp:$Rt, addr_offset_none:$addr),
+ NoItinerary, "strexd", "\t$Rd, $Rt, $addr", []> {
+ let DecoderMethod = "DecodeDoubleRegStore";
+}
+def STLEXB: AIstlex<0b10, (outs GPR:$Rd), (ins GPR:$Rt, addr_offset_none:$addr),
+ NoItinerary, "stlexb", "\t$Rd, $Rt, $addr",
+ [(set GPR:$Rd,
+ (stlex_1 GPR:$Rt, addr_offset_none:$addr))]>;
+def STLEXH: AIstlex<0b11, (outs GPR:$Rd), (ins GPR:$Rt, addr_offset_none:$addr),
+ NoItinerary, "stlexh", "\t$Rd, $Rt, $addr",
+ [(set GPR:$Rd,
+ (stlex_2 GPR:$Rt, addr_offset_none:$addr))]>;
+def STLEX : AIstlex<0b00, (outs GPR:$Rd), (ins GPR:$Rt, addr_offset_none:$addr),
+ NoItinerary, "stlex", "\t$Rd, $Rt, $addr",
+ [(set GPR:$Rd,
+ (stlex_4 GPR:$Rt, addr_offset_none:$addr))]>;
+let hasExtraSrcRegAllocReq = 1 in
+def STLEXD : AIstlex<0b01, (outs GPR:$Rd),
+ (ins GPRPairOp:$Rt, addr_offset_none:$addr),
+ NoItinerary, "stlexd", "\t$Rd, $Rt, $addr", []> {
+ let DecoderMethod = "DecodeDoubleRegStore";
+}
+}
+
+def CLREX : AXI<(outs), (ins), MiscFrm, NoItinerary, "clrex",
+ [(int_arm_clrex)]>,
+ Requires<[IsARM, HasV6K]> {
+ let Inst{31-0} = 0b11110101011111111111000000011111;
+}
+
+def : ARMPat<(strex_1 (and GPR:$Rt, 0xff), addr_offset_none:$addr),
+ (STREXB GPR:$Rt, addr_offset_none:$addr)>;
+def : ARMPat<(strex_2 (and GPR:$Rt, 0xffff), addr_offset_none:$addr),
+ (STREXH GPR:$Rt, addr_offset_none:$addr)>;
+
+def : ARMPat<(stlex_1 (and GPR:$Rt, 0xff), addr_offset_none:$addr),
+ (STLEXB GPR:$Rt, addr_offset_none:$addr)>;
+def : ARMPat<(stlex_2 (and GPR:$Rt, 0xffff), addr_offset_none:$addr),
+ (STLEXH GPR:$Rt, addr_offset_none:$addr)>;
+
+class acquiring_load<PatFrag base>
+ : PatFrag<(ops node:$ptr), (base node:$ptr), [{
+ AtomicOrdering Ordering = cast<AtomicSDNode>(N)->getOrdering();
+ return isAtLeastAcquire(Ordering);
+}]>;
+
+def atomic_load_acquire_8 : acquiring_load<atomic_load_8>;
+def atomic_load_acquire_16 : acquiring_load<atomic_load_16>;
+def atomic_load_acquire_32 : acquiring_load<atomic_load_32>;
+
+class releasing_store<PatFrag base>
+ : PatFrag<(ops node:$ptr, node:$val), (base node:$ptr, node:$val), [{
+ AtomicOrdering Ordering = cast<AtomicSDNode>(N)->getOrdering();
+ return isAtLeastRelease(Ordering);
+}]>;
+
+def atomic_store_release_8 : releasing_store<atomic_store_8>;
+def atomic_store_release_16 : releasing_store<atomic_store_16>;
+def atomic_store_release_32 : releasing_store<atomic_store_32>;
+
+let AddedComplexity = 8 in {
+ def : ARMPat<(atomic_load_acquire_8 addr_offset_none:$addr), (LDAB addr_offset_none:$addr)>;
+ def : ARMPat<(atomic_load_acquire_16 addr_offset_none:$addr), (LDAH addr_offset_none:$addr)>;
+ def : ARMPat<(atomic_load_acquire_32 addr_offset_none:$addr), (LDA addr_offset_none:$addr)>;
+ def : ARMPat<(atomic_store_release_8 addr_offset_none:$addr, GPR:$val), (STLB GPR:$val, addr_offset_none:$addr)>;
+ def : ARMPat<(atomic_store_release_16 addr_offset_none:$addr, GPR:$val), (STLH GPR:$val, addr_offset_none:$addr)>;
+ def : ARMPat<(atomic_store_release_32 addr_offset_none:$addr, GPR:$val), (STL GPR:$val, addr_offset_none:$addr)>;
+}
+
+// SWP/SWPB are deprecated in V6/V7.
+let mayLoad = 1, mayStore = 1 in {
+def SWP : AIswp<0, (outs GPRnopc:$Rt),
+ (ins GPRnopc:$Rt2, addr_offset_none:$addr), "swp", []>,
+ Requires<[PreV8]>;
+def SWPB: AIswp<1, (outs GPRnopc:$Rt),
+ (ins GPRnopc:$Rt2, addr_offset_none:$addr), "swpb", []>,
+ Requires<[PreV8]>;
+}
+
+//===----------------------------------------------------------------------===//
+// Coprocessor Instructions.
+//
+
+def CDP : ABI<0b1110, (outs), (ins p_imm:$cop, imm0_15:$opc1,
+ c_imm:$CRd, c_imm:$CRn, c_imm:$CRm, imm0_7:$opc2),
+ NoItinerary, "cdp", "\t$cop, $opc1, $CRd, $CRn, $CRm, $opc2",
+ [(int_arm_cdp imm:$cop, imm:$opc1, imm:$CRd, imm:$CRn,
+ imm:$CRm, imm:$opc2)]>,
+ Requires<[PreV8]> {
+ bits<4> opc1;
+ bits<4> CRn;
+ bits<4> CRd;
+ bits<4> cop;
+ bits<3> opc2;
+ bits<4> CRm;
+
+ let Inst{3-0} = CRm;
+ let Inst{4} = 0;
+ let Inst{7-5} = opc2;
+ let Inst{11-8} = cop;
+ let Inst{15-12} = CRd;
+ let Inst{19-16} = CRn;
+ let Inst{23-20} = opc1;
+}
+
+def CDP2 : ABXI<0b1110, (outs), (ins p_imm:$cop, imm0_15:$opc1,
+ c_imm:$CRd, c_imm:$CRn, c_imm:$CRm, imm0_7:$opc2),
+ NoItinerary, "cdp2\t$cop, $opc1, $CRd, $CRn, $CRm, $opc2",
+ [(int_arm_cdp2 imm:$cop, imm:$opc1, imm:$CRd, imm:$CRn,
+ imm:$CRm, imm:$opc2)]>,
+ Requires<[PreV8]> {
+ let Inst{31-28} = 0b1111;
+ bits<4> opc1;
+ bits<4> CRn;
+ bits<4> CRd;
+ bits<4> cop;
+ bits<3> opc2;
+ bits<4> CRm;
+
+ let Inst{3-0} = CRm;
+ let Inst{4} = 0;
+ let Inst{7-5} = opc2;
+ let Inst{11-8} = cop;
+ let Inst{15-12} = CRd;
+ let Inst{19-16} = CRn;
+ let Inst{23-20} = opc1;
+}
+
+class ACI<dag oops, dag iops, string opc, string asm,
+ IndexMode im = IndexModeNone>
+ : I<oops, iops, AddrModeNone, 4, im, BrFrm, NoItinerary,
+ opc, asm, "", []> {
+ let Inst{27-25} = 0b110;
+}
+class ACInoP<dag oops, dag iops, string opc, string asm,
+ IndexMode im = IndexModeNone>
+ : InoP<oops, iops, AddrModeNone, 4, im, BrFrm, NoItinerary,
+ opc, asm, "", []> {
+ let Inst{31-28} = 0b1111;
+ let Inst{27-25} = 0b110;
+}
+multiclass LdStCop<bit load, bit Dbit, string asm> {
+ def _OFFSET : ACI<(outs), (ins p_imm:$cop, c_imm:$CRd, addrmode5:$addr),
+ asm, "\t$cop, $CRd, $addr"> {
+ bits<13> addr;
+ bits<4> cop;
+ bits<4> CRd;
+ let Inst{24} = 1; // P = 1
+ let Inst{23} = addr{8};
+ let Inst{22} = Dbit;
+ let Inst{21} = 0; // W = 0
+ let Inst{20} = load;
+ let Inst{19-16} = addr{12-9};
+ let Inst{15-12} = CRd;
+ let Inst{11-8} = cop;
+ let Inst{7-0} = addr{7-0};
+ let DecoderMethod = "DecodeCopMemInstruction";
+ }
+ def _PRE : ACI<(outs), (ins p_imm:$cop, c_imm:$CRd, addrmode5_pre:$addr),
+ asm, "\t$cop, $CRd, $addr!", IndexModePre> {
+ bits<13> addr;
+ bits<4> cop;
+ bits<4> CRd;
+ let Inst{24} = 1; // P = 1
+ let Inst{23} = addr{8};
+ let Inst{22} = Dbit;
+ let Inst{21} = 1; // W = 1
+ let Inst{20} = load;
+ let Inst{19-16} = addr{12-9};
+ let Inst{15-12} = CRd;
+ let Inst{11-8} = cop;
+ let Inst{7-0} = addr{7-0};
+ let DecoderMethod = "DecodeCopMemInstruction";
+ }
+ def _POST: ACI<(outs), (ins p_imm:$cop, c_imm:$CRd, addr_offset_none:$addr,
+ postidx_imm8s4:$offset),
+ asm, "\t$cop, $CRd, $addr, $offset", IndexModePost> {
+ bits<9> offset;
+ bits<4> addr;
+ bits<4> cop;
+ bits<4> CRd;
+ let Inst{24} = 0; // P = 0
+ let Inst{23} = offset{8};
+ let Inst{22} = Dbit;
+ let Inst{21} = 1; // W = 1
+ let Inst{20} = load;
+ let Inst{19-16} = addr;
+ let Inst{15-12} = CRd;
+ let Inst{11-8} = cop;
+ let Inst{7-0} = offset{7-0};
+ let DecoderMethod = "DecodeCopMemInstruction";
+ }
+ def _OPTION : ACI<(outs),
+ (ins p_imm:$cop, c_imm:$CRd, addr_offset_none:$addr,
+ coproc_option_imm:$option),
+ asm, "\t$cop, $CRd, $addr, $option"> {
+ bits<8> option;
+ bits<4> addr;
+ bits<4> cop;
+ bits<4> CRd;
+ let Inst{24} = 0; // P = 0
+ let Inst{23} = 1; // U = 1
+ let Inst{22} = Dbit;
+ let Inst{21} = 0; // W = 0
+ let Inst{20} = load;
+ let Inst{19-16} = addr;
+ let Inst{15-12} = CRd;
+ let Inst{11-8} = cop;
+ let Inst{7-0} = option;
+ let DecoderMethod = "DecodeCopMemInstruction";
+ }
+}
+multiclass LdSt2Cop<bit load, bit Dbit, string asm> {
+ def _OFFSET : ACInoP<(outs), (ins p_imm:$cop, c_imm:$CRd, addrmode5:$addr),
+ asm, "\t$cop, $CRd, $addr"> {
+ bits<13> addr;
+ bits<4> cop;
+ bits<4> CRd;
+ let Inst{24} = 1; // P = 1
+ let Inst{23} = addr{8};
+ let Inst{22} = Dbit;
+ let Inst{21} = 0; // W = 0
+ let Inst{20} = load;
+ let Inst{19-16} = addr{12-9};
+ let Inst{15-12} = CRd;
+ let Inst{11-8} = cop;
+ let Inst{7-0} = addr{7-0};
+ let DecoderMethod = "DecodeCopMemInstruction";
+ }
+ def _PRE : ACInoP<(outs), (ins p_imm:$cop, c_imm:$CRd, addrmode5_pre:$addr),
+ asm, "\t$cop, $CRd, $addr!", IndexModePre> {
+ bits<13> addr;
+ bits<4> cop;
+ bits<4> CRd;
+ let Inst{24} = 1; // P = 1
+ let Inst{23} = addr{8};
+ let Inst{22} = Dbit;
+ let Inst{21} = 1; // W = 1
+ let Inst{20} = load;
+ let Inst{19-16} = addr{12-9};
+ let Inst{15-12} = CRd;
+ let Inst{11-8} = cop;
+ let Inst{7-0} = addr{7-0};
+ let DecoderMethod = "DecodeCopMemInstruction";
+ }
+ def _POST: ACInoP<(outs), (ins p_imm:$cop, c_imm:$CRd, addr_offset_none:$addr,
+ postidx_imm8s4:$offset),
+ asm, "\t$cop, $CRd, $addr, $offset", IndexModePost> {
+ bits<9> offset;
+ bits<4> addr;
+ bits<4> cop;
+ bits<4> CRd;
+ let Inst{24} = 0; // P = 0
+ let Inst{23} = offset{8};
+ let Inst{22} = Dbit;
+ let Inst{21} = 1; // W = 1
+ let Inst{20} = load;
+ let Inst{19-16} = addr;
+ let Inst{15-12} = CRd;
+ let Inst{11-8} = cop;
+ let Inst{7-0} = offset{7-0};
+ let DecoderMethod = "DecodeCopMemInstruction";
+ }
+ def _OPTION : ACInoP<(outs),
+ (ins p_imm:$cop, c_imm:$CRd, addr_offset_none:$addr,
+ coproc_option_imm:$option),
+ asm, "\t$cop, $CRd, $addr, $option"> {
+ bits<8> option;
+ bits<4> addr;
+ bits<4> cop;
+ bits<4> CRd;
+ let Inst{24} = 0; // P = 0
+ let Inst{23} = 1; // U = 1
+ let Inst{22} = Dbit;
+ let Inst{21} = 0; // W = 0
+ let Inst{20} = load;
+ let Inst{19-16} = addr;
+ let Inst{15-12} = CRd;
+ let Inst{11-8} = cop;
+ let Inst{7-0} = option;
+ let DecoderMethod = "DecodeCopMemInstruction";
+ }
+}
+
+defm LDC : LdStCop <1, 0, "ldc">;
+defm LDCL : LdStCop <1, 1, "ldcl">;
+defm STC : LdStCop <0, 0, "stc">;
+defm STCL : LdStCop <0, 1, "stcl">;
+defm LDC2 : LdSt2Cop<1, 0, "ldc2">, Requires<[PreV8]>;
+defm LDC2L : LdSt2Cop<1, 1, "ldc2l">, Requires<[PreV8]>;
+defm STC2 : LdSt2Cop<0, 0, "stc2">, Requires<[PreV8]>;
+defm STC2L : LdSt2Cop<0, 1, "stc2l">, Requires<[PreV8]>;
+
+//===----------------------------------------------------------------------===//
+// Move between coprocessor and ARM core register.
+//
+
+class MovRCopro<string opc, bit direction, dag oops, dag iops,
+ list<dag> pattern>
+ : ABI<0b1110, oops, iops, NoItinerary, opc,
+ "\t$cop, $opc1, $Rt, $CRn, $CRm, $opc2", pattern> {
+ let Inst{20} = direction;
+ let Inst{4} = 1;
+
+ bits<4> Rt;
+ bits<4> cop;
+ bits<3> opc1;
+ bits<3> opc2;
+ bits<4> CRm;
+ bits<4> CRn;
+
+ let Inst{15-12} = Rt;
+ let Inst{11-8} = cop;
+ let Inst{23-21} = opc1;
+ let Inst{7-5} = opc2;
+ let Inst{3-0} = CRm;
+ let Inst{19-16} = CRn;
+}
+
+def MCR : MovRCopro<"mcr", 0 /* from ARM core register to coprocessor */,
+ (outs),
+ (ins p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn,
+ c_imm:$CRm, imm0_7:$opc2),
+ [(int_arm_mcr imm:$cop, imm:$opc1, GPR:$Rt, imm:$CRn,
+ imm:$CRm, imm:$opc2)]>,
+ ComplexDeprecationPredicate<"MCR">;
+def : ARMInstAlias<"mcr${p} $cop, $opc1, $Rt, $CRn, $CRm",
+ (MCR p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn,
+ c_imm:$CRm, 0, pred:$p)>;
+def MRC : MovRCopro<"mrc", 1 /* from coprocessor to ARM core register */,
+ (outs GPRwithAPSR:$Rt),
+ (ins p_imm:$cop, imm0_7:$opc1, c_imm:$CRn, c_imm:$CRm,
+ imm0_7:$opc2), []>;
+def : ARMInstAlias<"mrc${p} $cop, $opc1, $Rt, $CRn, $CRm",
+ (MRC GPRwithAPSR:$Rt, p_imm:$cop, imm0_7:$opc1, c_imm:$CRn,
+ c_imm:$CRm, 0, pred:$p)>;
+
+def : ARMPat<(int_arm_mrc imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2),
+ (MRC imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2)>;
+
+class MovRCopro2<string opc, bit direction, dag oops, dag iops,
+ list<dag> pattern>
+ : ABXI<0b1110, oops, iops, NoItinerary,
+ !strconcat(opc, "\t$cop, $opc1, $Rt, $CRn, $CRm, $opc2"), pattern> {
+ let Inst{31-24} = 0b11111110;
+ let Inst{20} = direction;
+ let Inst{4} = 1;
+
+ bits<4> Rt;
+ bits<4> cop;
+ bits<3> opc1;
+ bits<3> opc2;
+ bits<4> CRm;
+ bits<4> CRn;
+
+ let Inst{15-12} = Rt;
+ let Inst{11-8} = cop;
+ let Inst{23-21} = opc1;
+ let Inst{7-5} = opc2;
+ let Inst{3-0} = CRm;
+ let Inst{19-16} = CRn;
+}
+
+def MCR2 : MovRCopro2<"mcr2", 0 /* from ARM core register to coprocessor */,
+ (outs),
+ (ins p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn,
+ c_imm:$CRm, imm0_7:$opc2),
+ [(int_arm_mcr2 imm:$cop, imm:$opc1, GPR:$Rt, imm:$CRn,
+ imm:$CRm, imm:$opc2)]>,
+ Requires<[PreV8]>;
+def : ARMInstAlias<"mcr2 $cop, $opc1, $Rt, $CRn, $CRm",
+ (MCR2 p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn,
+ c_imm:$CRm, 0)>;
+def MRC2 : MovRCopro2<"mrc2", 1 /* from coprocessor to ARM core register */,
+ (outs GPRwithAPSR:$Rt),
+ (ins p_imm:$cop, imm0_7:$opc1, c_imm:$CRn, c_imm:$CRm,
+ imm0_7:$opc2), []>,
+ Requires<[PreV8]>;
+def : ARMInstAlias<"mrc2 $cop, $opc1, $Rt, $CRn, $CRm",
+ (MRC2 GPRwithAPSR:$Rt, p_imm:$cop, imm0_7:$opc1, c_imm:$CRn,
+ c_imm:$CRm, 0)>;
+
+def : ARMV5TPat<(int_arm_mrc2 imm:$cop, imm:$opc1, imm:$CRn,
+ imm:$CRm, imm:$opc2),
+ (MRC2 imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2)>;
+
+class MovRRCopro<string opc, bit direction, dag oops, dag iops, list<dag>
+ pattern = []>
+ : ABI<0b1100, oops, iops, NoItinerary, opc, "\t$cop, $opc1, $Rt, $Rt2, $CRm",
+ pattern> {
+
+ let Inst{23-21} = 0b010;
+ let Inst{20} = direction;
+
+ bits<4> Rt;
+ bits<4> Rt2;
+ bits<4> cop;
+ bits<4> opc1;
+ bits<4> CRm;
+
+ let Inst{15-12} = Rt;
+ let Inst{19-16} = Rt2;
+ let Inst{11-8} = cop;
+ let Inst{7-4} = opc1;
+ let Inst{3-0} = CRm;
+}
+
+def MCRR : MovRRCopro<"mcrr", 0 /* from ARM core register to coprocessor */,
+ (outs), (ins p_imm:$cop, imm0_15:$opc1, GPRnopc:$Rt,
+ GPRnopc:$Rt2, c_imm:$CRm),
+ [(int_arm_mcrr imm:$cop, imm:$opc1, GPRnopc:$Rt,
+ GPRnopc:$Rt2, imm:$CRm)]>;
+def MRRC : MovRRCopro<"mrrc", 1 /* from coprocessor to ARM core register */,
+ (outs GPRnopc:$Rt, GPRnopc:$Rt2),
+ (ins p_imm:$cop, imm0_15:$opc1, c_imm:$CRm), []>;
+
+class MovRRCopro2<string opc, bit direction, list<dag> pattern = []>
+ : ABXI<0b1100, (outs), (ins p_imm:$cop, imm0_15:$opc1,
+ GPRnopc:$Rt, GPRnopc:$Rt2, c_imm:$CRm), NoItinerary,
+ !strconcat(opc, "\t$cop, $opc1, $Rt, $Rt2, $CRm"), pattern>,
+ Requires<[PreV8]> {
+ let Inst{31-28} = 0b1111;
+ let Inst{23-21} = 0b010;
+ let Inst{20} = direction;
+
+ bits<4> Rt;
+ bits<4> Rt2;
+ bits<4> cop;
+ bits<4> opc1;
+ bits<4> CRm;
+
+ let Inst{15-12} = Rt;
+ let Inst{19-16} = Rt2;
+ let Inst{11-8} = cop;
+ let Inst{7-4} = opc1;
+ let Inst{3-0} = CRm;
+
+ let DecoderMethod = "DecodeMRRC2";
+}
+
+def MCRR2 : MovRRCopro2<"mcrr2", 0 /* from ARM core register to coprocessor */,
+ [(int_arm_mcrr2 imm:$cop, imm:$opc1, GPRnopc:$Rt,
+ GPRnopc:$Rt2, imm:$CRm)]>;
+def MRRC2 : MovRRCopro2<"mrrc2", 1 /* from coprocessor to ARM core register */>;
+
+//===----------------------------------------------------------------------===//
+// Move between special register and ARM core register
+//
+
+// Move to ARM core register from Special Register
+def MRS : ABI<0b0001, (outs GPRnopc:$Rd), (ins), NoItinerary,
+ "mrs", "\t$Rd, apsr", []> {
+ bits<4> Rd;
+ let Inst{23-16} = 0b00001111;
+ let Unpredictable{19-17} = 0b111;
+
+ let Inst{15-12} = Rd;
+
+ let Inst{11-0} = 0b000000000000;
+ let Unpredictable{11-0} = 0b110100001111;
+}
+
+def : InstAlias<"mrs${p} $Rd, cpsr", (MRS GPRnopc:$Rd, pred:$p)>,
+ Requires<[IsARM]>;
+
+// The MRSsys instruction is the MRS instruction from the ARM ARM,
+// section B9.3.9, with the R bit set to 1.
+def MRSsys : ABI<0b0001, (outs GPRnopc:$Rd), (ins), NoItinerary,
+ "mrs", "\t$Rd, spsr", []> {
+ bits<4> Rd;
+ let Inst{23-16} = 0b01001111;
+ let Unpredictable{19-16} = 0b1111;
+
+ let Inst{15-12} = Rd;
+
+ let Inst{11-0} = 0b000000000000;
+ let Unpredictable{11-0} = 0b110100001111;
+}
+
+// However, the MRS (banked register) system instruction (ARMv7VE) *does* have a
+// separate encoding (distinguished by bit 5.
+def MRSbanked : ABI<0b0001, (outs GPRnopc:$Rd), (ins banked_reg:$banked),
+ NoItinerary, "mrs", "\t$Rd, $banked", []>,
+ Requires<[IsARM, HasVirtualization]> {
+ bits<6> banked;
+ bits<4> Rd;
+
+ let Inst{23} = 0;
+ let Inst{22} = banked{5}; // R bit
+ let Inst{21-20} = 0b00;
+ let Inst{19-16} = banked{3-0};
+ let Inst{15-12} = Rd;
+ let Inst{11-9} = 0b001;
+ let Inst{8} = banked{4};
+ let Inst{7-0} = 0b00000000;
+}
+
+// Move from ARM core register to Special Register
+//
+// No need to have both system and application versions of MSR (immediate) or
+// MSR (register), the encodings are the same and the assembly parser has no way
+// to distinguish between them. The mask operand contains the special register
+// (R Bit) in bit 4 and bits 3-0 contains the mask with the fields to be
+// accessed in the special register.
+def MSR : ABI<0b0001, (outs), (ins msr_mask:$mask, GPR:$Rn), NoItinerary,
+ "msr", "\t$mask, $Rn", []> {
+ bits<5> mask;
+ bits<4> Rn;
+
+ let Inst{23} = 0;
+ let Inst{22} = mask{4}; // R bit
+ let Inst{21-20} = 0b10;
+ let Inst{19-16} = mask{3-0};
+ let Inst{15-12} = 0b1111;
+ let Inst{11-4} = 0b00000000;
+ let Inst{3-0} = Rn;
+}
+
+def MSRi : ABI<0b0011, (outs), (ins msr_mask:$mask, mod_imm:$imm), NoItinerary,
+ "msr", "\t$mask, $imm", []> {
+ bits<5> mask;
+ bits<12> imm;
+
+ let Inst{23} = 0;
+ let Inst{22} = mask{4}; // R bit
+ let Inst{21-20} = 0b10;
+ let Inst{19-16} = mask{3-0};
+ let Inst{15-12} = 0b1111;
+ let Inst{11-0} = imm;
+}
+
+// However, the MSR (banked register) system instruction (ARMv7VE) *does* have a
+// separate encoding (distinguished by bit 5.
+def MSRbanked : ABI<0b0001, (outs), (ins banked_reg:$banked, GPRnopc:$Rn),
+ NoItinerary, "msr", "\t$banked, $Rn", []>,
+ Requires<[IsARM, HasVirtualization]> {
+ bits<6> banked;
+ bits<4> Rn;
+
+ let Inst{23} = 0;
+ let Inst{22} = banked{5}; // R bit
+ let Inst{21-20} = 0b10;
+ let Inst{19-16} = banked{3-0};
+ let Inst{15-12} = 0b1111;
+ let Inst{11-9} = 0b001;
+ let Inst{8} = banked{4};
+ let Inst{7-4} = 0b0000;
+ let Inst{3-0} = Rn;
+}
+
+// Dynamic stack allocation yields a _chkstk for Windows targets. These calls
+// are needed to probe the stack when allocating more than
+// 4k bytes in one go. Touching the stack at 4K increments is necessary to
+// ensure that the guard pages used by the OS virtual memory manager are
+// allocated in correct sequence.
+// The main point of having separate instruction are extra unmodelled effects
+// (compared to ordinary calls) like stack pointer change.
+
+def win__chkstk : SDNode<"ARMISD::WIN__CHKSTK", SDTNone,
+ [SDNPHasChain, SDNPSideEffect]>;
+let usesCustomInserter = 1, Uses = [R4], Defs = [R4, SP] in
+ def WIN__CHKSTK : PseudoInst<(outs), (ins), NoItinerary, [(win__chkstk)]>;
+
+def win__dbzchk : SDNode<"ARMISD::WIN__DBZCHK", SDT_WIN__DBZCHK,
+ [SDNPHasChain, SDNPSideEffect, SDNPOutGlue]>;
+let usesCustomInserter = 1, Defs = [CPSR] in
+ def WIN__DBZCHK : PseudoInst<(outs), (ins GPR:$divisor), NoItinerary,
+ [(win__dbzchk GPR:$divisor)]>;
+
+//===----------------------------------------------------------------------===//
+// TLS Instructions
+//
+
+// __aeabi_read_tp preserves the registers r1-r3.
+// This is a pseudo inst so that we can get the encoding right,
+// complete with fixup for the aeabi_read_tp function.
+// TPsoft is valid for ARM mode only, in case of Thumb mode a tTPsoft pattern
+// is defined in "ARMInstrThumb.td".
+let isCall = 1,
+ Defs = [R0, R12, LR, CPSR], Uses = [SP] in {
+ def TPsoft : ARMPseudoInst<(outs), (ins), 4, IIC_Br,
+ [(set R0, ARMthread_pointer)]>, Sched<[WriteBr]>;
+}
+
+//===----------------------------------------------------------------------===//
+// SJLJ Exception handling intrinsics
+// eh_sjlj_setjmp() is an instruction sequence to store the return
+// address and save #0 in R0 for the non-longjmp case.
+// Since by its nature we may be coming from some other function to get
+// here, and we're using the stack frame for the containing function to
+// save/restore registers, we can't keep anything live in regs across
+// the eh_sjlj_setjmp(), else it will almost certainly have been tromped upon
+// when we get here from a longjmp(). We force everything out of registers
+// except for our own input by listing the relevant registers in Defs. By
+// doing so, we also cause the prologue/epilogue code to actively preserve
+// all of the callee-saved resgisters, which is exactly what we want.
+// A constant value is passed in $val, and we use the location as a scratch.
+//
+// These are pseudo-instructions and are lowered to individual MC-insts, so
+// no encoding information is necessary.
+let Defs =
+ [ R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR, CPSR,
+ Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, Q8, Q9, Q10, Q11, Q12, Q13, Q14, Q15 ],
+ hasSideEffects = 1, isBarrier = 1, usesCustomInserter = 1 in {
+ def Int_eh_sjlj_setjmp : PseudoInst<(outs), (ins GPR:$src, GPR:$val),
+ NoItinerary,
+ [(set R0, (ARMeh_sjlj_setjmp GPR:$src, GPR:$val))]>,
+ Requires<[IsARM, HasVFP2]>;
+}
+
+let Defs =
+ [ R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR, CPSR ],
+ hasSideEffects = 1, isBarrier = 1, usesCustomInserter = 1 in {
+ def Int_eh_sjlj_setjmp_nofp : PseudoInst<(outs), (ins GPR:$src, GPR:$val),
+ NoItinerary,
+ [(set R0, (ARMeh_sjlj_setjmp GPR:$src, GPR:$val))]>,
+ Requires<[IsARM, NoVFP]>;
+}
+
+// FIXME: Non-IOS version(s)
+let isBarrier = 1, hasSideEffects = 1, isTerminator = 1,
+ Defs = [ R7, LR, SP ] in {
+def Int_eh_sjlj_longjmp : PseudoInst<(outs), (ins GPR:$src, GPR:$scratch),
+ NoItinerary,
+ [(ARMeh_sjlj_longjmp GPR:$src, GPR:$scratch)]>,
+ Requires<[IsARM]>;
+}
+
+let isBarrier = 1, hasSideEffects = 1, usesCustomInserter = 1 in
+def Int_eh_sjlj_setup_dispatch : PseudoInst<(outs), (ins), NoItinerary,
+ [(ARMeh_sjlj_setup_dispatch)]>;
+
+// eh.sjlj.dispatchsetup pseudo-instruction.
+// This pseudo is used for both ARM and Thumb. Any differences are handled when
+// the pseudo is expanded (which happens before any passes that need the
+// instruction size).
+let isBarrier = 1 in
+def Int_eh_sjlj_dispatchsetup : PseudoInst<(outs), (ins), NoItinerary, []>;
+
+
+//===----------------------------------------------------------------------===//
+// Non-Instruction Patterns
+//
+
+// ARMv4 indirect branch using (MOVr PC, dst)
+let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in
+ def MOVPCRX : ARMPseudoExpand<(outs), (ins GPR:$dst),
+ 4, IIC_Br, [(brind GPR:$dst)],
+ (MOVr PC, GPR:$dst, (ops 14, zero_reg), zero_reg)>,
+ Requires<[IsARM, NoV4T]>, Sched<[WriteBr]>;
+
+// Large immediate handling.
+
+// 32-bit immediate using two piece mod_imms or movw + movt.
+// This is a single pseudo instruction, the benefit is that it can be remat'd
+// as a single unit instead of having to handle reg inputs.
+// FIXME: Remove this when we can do generalized remat.
+let isReMaterializable = 1, isMoveImm = 1 in
+def MOVi32imm : PseudoInst<(outs GPR:$dst), (ins i32imm:$src), IIC_iMOVix2,
+ [(set GPR:$dst, (arm_i32imm:$src))]>,
+ Requires<[IsARM]>;
+
+def LDRLIT_ga_abs : PseudoInst<(outs GPR:$dst), (ins i32imm:$src), IIC_iLoad_i,
+ [(set GPR:$dst, (ARMWrapper tglobaladdr:$src))]>,
+ Requires<[IsARM, DontUseMovt]>;
+
+// Pseudo instruction that combines movw + movt + add pc (if PIC).
+// It also makes it possible to rematerialize the instructions.
+// FIXME: Remove this when we can do generalized remat and when machine licm
+// can properly the instructions.
+let isReMaterializable = 1 in {
+def MOV_ga_pcrel : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr),
+ IIC_iMOVix2addpc,
+ [(set GPR:$dst, (ARMWrapperPIC tglobaladdr:$addr))]>,
+ Requires<[IsARM, UseMovt]>;
+
+def LDRLIT_ga_pcrel : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr),
+ IIC_iLoadiALU,
+ [(set GPR:$dst,
+ (ARMWrapperPIC tglobaladdr:$addr))]>,
+ Requires<[IsARM, DontUseMovt]>;
+
+let AddedComplexity = 10 in
+def LDRLIT_ga_pcrel_ldr : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr),
+ NoItinerary,
+ [(set GPR:$dst,
+ (load (ARMWrapperPIC tglobaladdr:$addr)))]>,
+ Requires<[IsARM, DontUseMovt]>;
+
+let AddedComplexity = 10 in
+def MOV_ga_pcrel_ldr : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr),
+ IIC_iMOVix2ld,
+ [(set GPR:$dst, (load (ARMWrapperPIC tglobaladdr:$addr)))]>,
+ Requires<[IsARM, UseMovt]>;
+} // isReMaterializable
+
+// ConstantPool, GlobalAddress, and JumpTable
+def : ARMPat<(ARMWrapper tconstpool :$dst), (LEApcrel tconstpool :$dst)>;
+def : ARMPat<(ARMWrapper tglobaladdr :$dst), (MOVi32imm tglobaladdr :$dst)>,
+ Requires<[IsARM, UseMovt]>;
+def : ARMPat<(ARMWrapperJT tjumptable:$dst),
+ (LEApcrelJT tjumptable:$dst)>;
+
+// TODO: add,sub,and, 3-instr forms?
+
+// Tail calls. These patterns also apply to Thumb mode.
+def : Pat<(ARMtcret tcGPR:$dst), (TCRETURNri tcGPR:$dst)>;
+def : Pat<(ARMtcret (i32 tglobaladdr:$dst)), (TCRETURNdi texternalsym:$dst)>;
+def : Pat<(ARMtcret (i32 texternalsym:$dst)), (TCRETURNdi texternalsym:$dst)>;
+
+// Direct calls
+def : ARMPat<(ARMcall texternalsym:$func), (BL texternalsym:$func)>;
+def : ARMPat<(ARMcall_nolink texternalsym:$func),
+ (BMOVPCB_CALL texternalsym:$func)>;
+
+// zextload i1 -> zextload i8
+def : ARMPat<(zextloadi1 addrmode_imm12:$addr), (LDRBi12 addrmode_imm12:$addr)>;
+def : ARMPat<(zextloadi1 ldst_so_reg:$addr), (LDRBrs ldst_so_reg:$addr)>;
+
+// extload -> zextload
+def : ARMPat<(extloadi1 addrmode_imm12:$addr), (LDRBi12 addrmode_imm12:$addr)>;
+def : ARMPat<(extloadi1 ldst_so_reg:$addr), (LDRBrs ldst_so_reg:$addr)>;
+def : ARMPat<(extloadi8 addrmode_imm12:$addr), (LDRBi12 addrmode_imm12:$addr)>;
+def : ARMPat<(extloadi8 ldst_so_reg:$addr), (LDRBrs ldst_so_reg:$addr)>;
+
+def : ARMPat<(extloadi16 addrmode3:$addr), (LDRH addrmode3:$addr)>;
+
+def : ARMPat<(extloadi8 addrmodepc:$addr), (PICLDRB addrmodepc:$addr)>;
+def : ARMPat<(extloadi16 addrmodepc:$addr), (PICLDRH addrmodepc:$addr)>;
+
+// smul* and smla*
+def : ARMV5TEPat<(mul (sra (shl GPR:$a, (i32 16)), (i32 16)),
+ (sra (shl GPR:$b, (i32 16)), (i32 16))),
+ (SMULBB GPR:$a, GPR:$b)>;
+def : ARMV5TEPat<(mul sext_16_node:$a, sext_16_node:$b),
+ (SMULBB GPR:$a, GPR:$b)>;
+def : ARMV5TEPat<(mul (sra (shl GPR:$a, (i32 16)), (i32 16)),
+ (sra GPR:$b, (i32 16))),
+ (SMULBT GPR:$a, GPR:$b)>;
+def : ARMV5TEPat<(mul sext_16_node:$a, (sra GPR:$b, (i32 16))),
+ (SMULBT GPR:$a, GPR:$b)>;
+def : ARMV5TEPat<(mul (sra GPR:$a, (i32 16)),
+ (sra (shl GPR:$b, (i32 16)), (i32 16))),
+ (SMULTB GPR:$a, GPR:$b)>;
+def : ARMV5TEPat<(mul (sra GPR:$a, (i32 16)), sext_16_node:$b),
+ (SMULTB GPR:$a, GPR:$b)>;
+
+def : ARMV5MOPat<(add GPR:$acc,
+ (mul (sra (shl GPR:$a, (i32 16)), (i32 16)),
+ (sra (shl GPR:$b, (i32 16)), (i32 16)))),
+ (SMLABB GPR:$a, GPR:$b, GPR:$acc)>;
+def : ARMV5MOPat<(add GPR:$acc,
+ (mul sext_16_node:$a, sext_16_node:$b)),
+ (SMLABB GPR:$a, GPR:$b, GPR:$acc)>;
+def : ARMV5MOPat<(add GPR:$acc,
+ (mul (sra (shl GPR:$a, (i32 16)), (i32 16)),
+ (sra GPR:$b, (i32 16)))),
+ (SMLABT GPR:$a, GPR:$b, GPR:$acc)>;
+def : ARMV5MOPat<(add GPR:$acc,
+ (mul sext_16_node:$a, (sra GPR:$b, (i32 16)))),
+ (SMLABT GPR:$a, GPR:$b, GPR:$acc)>;
+def : ARMV5MOPat<(add GPR:$acc,
+ (mul (sra GPR:$a, (i32 16)),
+ (sra (shl GPR:$b, (i32 16)), (i32 16)))),
+ (SMLATB GPR:$a, GPR:$b, GPR:$acc)>;
+def : ARMV5MOPat<(add GPR:$acc,
+ (mul (sra GPR:$a, (i32 16)), sext_16_node:$b)),
+ (SMLATB GPR:$a, GPR:$b, GPR:$acc)>;
+
+
+// Pre-v7 uses MCR for synchronization barriers.
+def : ARMPat<(ARMMemBarrierMCR GPR:$zero), (MCR 15, 0, GPR:$zero, 7, 10, 5)>,
+ Requires<[IsARM, HasV6]>;
+
+// SXT/UXT with no rotate
+let AddedComplexity = 16 in {
+def : ARMV6Pat<(and GPR:$Src, 0x000000FF), (UXTB GPR:$Src, 0)>;
+def : ARMV6Pat<(and GPR:$Src, 0x0000FFFF), (UXTH GPR:$Src, 0)>;
+def : ARMV6Pat<(and GPR:$Src, 0x00FF00FF), (UXTB16 GPR:$Src, 0)>;
+def : ARMV6Pat<(add GPR:$Rn, (and GPR:$Rm, 0x00FF)),
+ (UXTAB GPR:$Rn, GPR:$Rm, 0)>;
+def : ARMV6Pat<(add GPR:$Rn, (and GPR:$Rm, 0xFFFF)),
+ (UXTAH GPR:$Rn, GPR:$Rm, 0)>;
+}
+
+def : ARMV6Pat<(sext_inreg GPR:$Src, i8), (SXTB GPR:$Src, 0)>;
+def : ARMV6Pat<(sext_inreg GPR:$Src, i16), (SXTH GPR:$Src, 0)>;
+
+def : ARMV6Pat<(add GPR:$Rn, (sext_inreg GPRnopc:$Rm, i8)),
+ (SXTAB GPR:$Rn, GPRnopc:$Rm, 0)>;
+def : ARMV6Pat<(add GPR:$Rn, (sext_inreg GPRnopc:$Rm, i16)),
+ (SXTAH GPR:$Rn, GPRnopc:$Rm, 0)>;
+
+// Atomic load/store patterns
+def : ARMPat<(atomic_load_8 ldst_so_reg:$src),
+ (LDRBrs ldst_so_reg:$src)>;
+def : ARMPat<(atomic_load_8 addrmode_imm12:$src),
+ (LDRBi12 addrmode_imm12:$src)>;
+def : ARMPat<(atomic_load_16 addrmode3:$src),
+ (LDRH addrmode3:$src)>;
+def : ARMPat<(atomic_load_32 ldst_so_reg:$src),
+ (LDRrs ldst_so_reg:$src)>;
+def : ARMPat<(atomic_load_32 addrmode_imm12:$src),
+ (LDRi12 addrmode_imm12:$src)>;
+def : ARMPat<(atomic_store_8 ldst_so_reg:$ptr, GPR:$val),
+ (STRBrs GPR:$val, ldst_so_reg:$ptr)>;
+def : ARMPat<(atomic_store_8 addrmode_imm12:$ptr, GPR:$val),
+ (STRBi12 GPR:$val, addrmode_imm12:$ptr)>;
+def : ARMPat<(atomic_store_16 addrmode3:$ptr, GPR:$val),
+ (STRH GPR:$val, addrmode3:$ptr)>;
+def : ARMPat<(atomic_store_32 ldst_so_reg:$ptr, GPR:$val),
+ (STRrs GPR:$val, ldst_so_reg:$ptr)>;
+def : ARMPat<(atomic_store_32 addrmode_imm12:$ptr, GPR:$val),
+ (STRi12 GPR:$val, addrmode_imm12:$ptr)>;
+
+
+//===----------------------------------------------------------------------===//
+// Thumb Support
+//
+
+include "ARMInstrThumb.td"
+
+//===----------------------------------------------------------------------===//
+// Thumb2 Support
+//
+
+include "ARMInstrThumb2.td"
+
+//===----------------------------------------------------------------------===//
+// Floating Point Support
+//
+
+include "ARMInstrVFP.td"
+
+//===----------------------------------------------------------------------===//
+// Advanced SIMD (NEON) Support
+//
+
+include "ARMInstrNEON.td"
+
+//===----------------------------------------------------------------------===//
+// Assembler aliases
+//
+
+// Memory barriers
+def : InstAlias<"dmb", (DMB 0xf)>, Requires<[IsARM, HasDB]>;
+def : InstAlias<"dsb", (DSB 0xf)>, Requires<[IsARM, HasDB]>;
+def : InstAlias<"isb", (ISB 0xf)>, Requires<[IsARM, HasDB]>;
+
+// System instructions
+def : MnemonicAlias<"swi", "svc">;
+
+// Load / Store Multiple
+def : MnemonicAlias<"ldmfd", "ldm">;
+def : MnemonicAlias<"ldmia", "ldm">;
+def : MnemonicAlias<"ldmea", "ldmdb">;
+def : MnemonicAlias<"stmfd", "stmdb">;
+def : MnemonicAlias<"stmia", "stm">;
+def : MnemonicAlias<"stmea", "stm">;
+
+// PKHBT/PKHTB with default shift amount. PKHTB is equivalent to PKHBT when the
+// shift amount is zero (i.e., unspecified).
+def : InstAlias<"pkhbt${p} $Rd, $Rn, $Rm",
+ (PKHBT GPRnopc:$Rd, GPRnopc:$Rn, GPRnopc:$Rm, 0, pred:$p)>,
+ Requires<[IsARM, HasV6]>;
+def : InstAlias<"pkhtb${p} $Rd, $Rn, $Rm",
+ (PKHBT GPRnopc:$Rd, GPRnopc:$Rn, GPRnopc:$Rm, 0, pred:$p)>,
+ Requires<[IsARM, HasV6]>;
+
+// PUSH/POP aliases for STM/LDM
+def : ARMInstAlias<"push${p} $regs", (STMDB_UPD SP, pred:$p, reglist:$regs)>;
+def : ARMInstAlias<"pop${p} $regs", (LDMIA_UPD SP, pred:$p, reglist:$regs)>;
+
+// SSAT/USAT optional shift operand.
+def : ARMInstAlias<"ssat${p} $Rd, $sat_imm, $Rn",
+ (SSAT GPRnopc:$Rd, imm1_32:$sat_imm, GPRnopc:$Rn, 0, pred:$p)>;
+def : ARMInstAlias<"usat${p} $Rd, $sat_imm, $Rn",
+ (USAT GPRnopc:$Rd, imm0_31:$sat_imm, GPRnopc:$Rn, 0, pred:$p)>;
+
+
+// Extend instruction optional rotate operand.
+def : ARMInstAlias<"sxtab${p} $Rd, $Rn, $Rm",
+ (SXTAB GPRnopc:$Rd, GPR:$Rn, GPRnopc:$Rm, 0, pred:$p)>;
+def : ARMInstAlias<"sxtah${p} $Rd, $Rn, $Rm",
+ (SXTAH GPRnopc:$Rd, GPR:$Rn, GPRnopc:$Rm, 0, pred:$p)>;
+def : ARMInstAlias<"sxtab16${p} $Rd, $Rn, $Rm",
+ (SXTAB16 GPRnopc:$Rd, GPR:$Rn, GPRnopc:$Rm, 0, pred:$p)>;
+def : ARMInstAlias<"sxtb${p} $Rd, $Rm",
+ (SXTB GPRnopc:$Rd, GPRnopc:$Rm, 0, pred:$p)>;
+def : ARMInstAlias<"sxtb16${p} $Rd, $Rm",
+ (SXTB16 GPRnopc:$Rd, GPRnopc:$Rm, 0, pred:$p)>;
+def : ARMInstAlias<"sxth${p} $Rd, $Rm",
+ (SXTH GPRnopc:$Rd, GPRnopc:$Rm, 0, pred:$p)>;
+
+def : ARMInstAlias<"uxtab${p} $Rd, $Rn, $Rm",
+ (UXTAB GPRnopc:$Rd, GPR:$Rn, GPRnopc:$Rm, 0, pred:$p)>;
+def : ARMInstAlias<"uxtah${p} $Rd, $Rn, $Rm",
+ (UXTAH GPRnopc:$Rd, GPR:$Rn, GPRnopc:$Rm, 0, pred:$p)>;
+def : ARMInstAlias<"uxtab16${p} $Rd, $Rn, $Rm",
+ (UXTAB16 GPRnopc:$Rd, GPR:$Rn, GPRnopc:$Rm, 0, pred:$p)>;
+def : ARMInstAlias<"uxtb${p} $Rd, $Rm",
+ (UXTB GPRnopc:$Rd, GPRnopc:$Rm, 0, pred:$p)>;
+def : ARMInstAlias<"uxtb16${p} $Rd, $Rm",
+ (UXTB16 GPRnopc:$Rd, GPRnopc:$Rm, 0, pred:$p)>;
+def : ARMInstAlias<"uxth${p} $Rd, $Rm",
+ (UXTH GPRnopc:$Rd, GPRnopc:$Rm, 0, pred:$p)>;
+
+
+// RFE aliases
+def : MnemonicAlias<"rfefa", "rfeda">;
+def : MnemonicAlias<"rfeea", "rfedb">;
+def : MnemonicAlias<"rfefd", "rfeia">;
+def : MnemonicAlias<"rfeed", "rfeib">;
+def : MnemonicAlias<"rfe", "rfeia">;
+
+// SRS aliases
+def : MnemonicAlias<"srsfa", "srsib">;
+def : MnemonicAlias<"srsea", "srsia">;
+def : MnemonicAlias<"srsfd", "srsdb">;
+def : MnemonicAlias<"srsed", "srsda">;
+def : MnemonicAlias<"srs", "srsia">;
+
+// QSAX == QSUBADDX
+def : MnemonicAlias<"qsubaddx", "qsax">;
+// SASX == SADDSUBX
+def : MnemonicAlias<"saddsubx", "sasx">;
+// SHASX == SHADDSUBX
+def : MnemonicAlias<"shaddsubx", "shasx">;
+// SHSAX == SHSUBADDX
+def : MnemonicAlias<"shsubaddx", "shsax">;
+// SSAX == SSUBADDX
+def : MnemonicAlias<"ssubaddx", "ssax">;
+// UASX == UADDSUBX
+def : MnemonicAlias<"uaddsubx", "uasx">;
+// UHASX == UHADDSUBX
+def : MnemonicAlias<"uhaddsubx", "uhasx">;
+// UHSAX == UHSUBADDX
+def : MnemonicAlias<"uhsubaddx", "uhsax">;
+// UQASX == UQADDSUBX
+def : MnemonicAlias<"uqaddsubx", "uqasx">;
+// UQSAX == UQSUBADDX
+def : MnemonicAlias<"uqsubaddx", "uqsax">;
+// USAX == USUBADDX
+def : MnemonicAlias<"usubaddx", "usax">;
+
+// "mov Rd, mod_imm_not" can be handled via "mvn" in assembly, just like
+// for isel.
+def : ARMInstAlias<"mov${s}${p} $Rd, $imm",
+ (MVNi rGPR:$Rd, mod_imm_not:$imm, pred:$p, cc_out:$s)>;
+def : ARMInstAlias<"mvn${s}${p} $Rd, $imm",
+ (MOVi rGPR:$Rd, mod_imm_not:$imm, pred:$p, cc_out:$s)>;
+// Same for AND <--> BIC
+def : ARMInstAlias<"bic${s}${p} $Rd, $Rn, $imm",
+ (ANDri GPR:$Rd, GPR:$Rn, mod_imm_not:$imm,
+ pred:$p, cc_out:$s)>;
+def : ARMInstAlias<"bic${s}${p} $Rdn, $imm",
+ (ANDri GPR:$Rdn, GPR:$Rdn, mod_imm_not:$imm,
+ pred:$p, cc_out:$s)>;
+def : ARMInstAlias<"and${s}${p} $Rd, $Rn, $imm",
+ (BICri GPR:$Rd, GPR:$Rn, mod_imm_not:$imm,
+ pred:$p, cc_out:$s)>;
+def : ARMInstAlias<"and${s}${p} $Rdn, $imm",
+ (BICri GPR:$Rdn, GPR:$Rdn, mod_imm_not:$imm,
+ pred:$p, cc_out:$s)>;
+
+// Likewise, "add Rd, mod_imm_neg" -> sub
+def : ARMInstAlias<"add${s}${p} $Rd, $Rn, $imm",
+ (SUBri GPR:$Rd, GPR:$Rn, mod_imm_neg:$imm, pred:$p, cc_out:$s)>;
+def : ARMInstAlias<"add${s}${p} $Rd, $imm",
+ (SUBri GPR:$Rd, GPR:$Rd, mod_imm_neg:$imm, pred:$p, cc_out:$s)>;
+// Same for CMP <--> CMN via mod_imm_neg
+def : ARMInstAlias<"cmp${p} $Rd, $imm",
+ (CMNri rGPR:$Rd, mod_imm_neg:$imm, pred:$p)>;
+def : ARMInstAlias<"cmn${p} $Rd, $imm",
+ (CMPri rGPR:$Rd, mod_imm_neg:$imm, pred:$p)>;
+
+// The shifter forms of the MOV instruction are aliased to the ASR, LSL,
+// LSR, ROR, and RRX instructions.
+// FIXME: We need C++ parser hooks to map the alias to the MOV
+// encoding. It seems we should be able to do that sort of thing
+// in tblgen, but it could get ugly.
+let TwoOperandAliasConstraint = "$Rm = $Rd" in {
+def ASRi : ARMAsmPseudo<"asr${s}${p} $Rd, $Rm, $imm",
+ (ins GPR:$Rd, GPR:$Rm, imm0_32:$imm, pred:$p,
+ cc_out:$s)>;
+def LSRi : ARMAsmPseudo<"lsr${s}${p} $Rd, $Rm, $imm",
+ (ins GPR:$Rd, GPR:$Rm, imm0_32:$imm, pred:$p,
+ cc_out:$s)>;
+def LSLi : ARMAsmPseudo<"lsl${s}${p} $Rd, $Rm, $imm",
+ (ins GPR:$Rd, GPR:$Rm, imm0_31:$imm, pred:$p,
+ cc_out:$s)>;
+def RORi : ARMAsmPseudo<"ror${s}${p} $Rd, $Rm, $imm",
+ (ins GPR:$Rd, GPR:$Rm, imm0_31:$imm, pred:$p,
+ cc_out:$s)>;
+}
+def RRXi : ARMAsmPseudo<"rrx${s}${p} $Rd, $Rm",
+ (ins GPR:$Rd, GPR:$Rm, pred:$p, cc_out:$s)>;
+let TwoOperandAliasConstraint = "$Rn = $Rd" in {
+def ASRr : ARMAsmPseudo<"asr${s}${p} $Rd, $Rn, $Rm",
+ (ins GPRnopc:$Rd, GPRnopc:$Rn, GPRnopc:$Rm, pred:$p,
+ cc_out:$s)>;
+def LSRr : ARMAsmPseudo<"lsr${s}${p} $Rd, $Rn, $Rm",
+ (ins GPRnopc:$Rd, GPRnopc:$Rn, GPRnopc:$Rm, pred:$p,
+ cc_out:$s)>;
+def LSLr : ARMAsmPseudo<"lsl${s}${p} $Rd, $Rn, $Rm",
+ (ins GPRnopc:$Rd, GPRnopc:$Rn, GPRnopc:$Rm, pred:$p,
+ cc_out:$s)>;
+def RORr : ARMAsmPseudo<"ror${s}${p} $Rd, $Rn, $Rm",
+ (ins GPRnopc:$Rd, GPRnopc:$Rn, GPRnopc:$Rm, pred:$p,
+ cc_out:$s)>;
+}
+
+// "neg" is and alias for "rsb rd, rn, #0"
+def : ARMInstAlias<"neg${s}${p} $Rd, $Rm",
+ (RSBri GPR:$Rd, GPR:$Rm, 0, pred:$p, cc_out:$s)>;
+
+// Pre-v6, 'mov r0, r0' was used as a NOP encoding.
+def : InstAlias<"nop${p}", (MOVr R0, R0, pred:$p, zero_reg)>,
+ Requires<[IsARM, NoV6]>;
+
+// MUL/UMLAL/SMLAL/UMULL/SMULL are available on all arches, but
+// the instruction definitions need difference constraints pre-v6.
+// Use these aliases for the assembly parsing on pre-v6.
+def : InstAlias<"mul${s}${p} $Rd, $Rn, $Rm",
+ (MUL GPRnopc:$Rd, GPRnopc:$Rn, GPRnopc:$Rm, pred:$p, cc_out:$s)>,
+ Requires<[IsARM, NoV6]>;
+def : InstAlias<"mla${s}${p} $Rd, $Rn, $Rm, $Ra",
+ (MLA GPRnopc:$Rd, GPRnopc:$Rn, GPRnopc:$Rm, GPRnopc:$Ra,
+ pred:$p, cc_out:$s)>,
+ Requires<[IsARM, NoV6]>;
+def : InstAlias<"smlal${s}${p} $RdLo, $RdHi, $Rn, $Rm",
+ (SMLAL GPR:$RdLo, GPR:$RdHi, GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s)>,
+ Requires<[IsARM, NoV6]>;
+def : InstAlias<"umlal${s}${p} $RdLo, $RdHi, $Rn, $Rm",
+ (UMLAL GPR:$RdLo, GPR:$RdHi, GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s)>,
+ Requires<[IsARM, NoV6]>;
+def : InstAlias<"smull${s}${p} $RdLo, $RdHi, $Rn, $Rm",
+ (SMULL GPR:$RdLo, GPR:$RdHi, GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s)>,
+ Requires<[IsARM, NoV6]>;
+def : InstAlias<"umull${s}${p} $RdLo, $RdHi, $Rn, $Rm",
+ (UMULL GPR:$RdLo, GPR:$RdHi, GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s)>,
+ Requires<[IsARM, NoV6]>;
+
+// 'it' blocks in ARM mode just validate the predicates. The IT itself
+// is discarded.
+def ITasm : ARMAsmPseudo<"it$mask $cc", (ins it_pred:$cc, it_mask:$mask)>,
+ ComplexDeprecationPredicate<"IT">;
+
+let mayLoad = 1, mayStore =1, hasSideEffects = 1 in
+def SPACE : PseudoInst<(outs GPR:$Rd), (ins i32imm:$size, GPR:$Rn),
+ NoItinerary,
+ [(set GPR:$Rd, (int_arm_space imm:$size, GPR:$Rn))]>;
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrNEON.td b/contrib/llvm/lib/Target/ARM/ARMInstrNEON.td
new file mode 100644
index 0000000..7020ffb
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMInstrNEON.td
@@ -0,0 +1,8178 @@
+//===-- ARMInstrNEON.td - NEON support for ARM -------------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the ARM NEON instruction set.
+//
+//===----------------------------------------------------------------------===//
+
+
+//===----------------------------------------------------------------------===//
+// NEON-specific Operands.
+//===----------------------------------------------------------------------===//
+def nModImm : Operand<i32> {
+ let PrintMethod = "printNEONModImmOperand";
+}
+
+def nImmSplatI8AsmOperand : AsmOperandClass { let Name = "NEONi8splat"; }
+def nImmSplatI8 : Operand<i32> {
+ let PrintMethod = "printNEONModImmOperand";
+ let ParserMatchClass = nImmSplatI8AsmOperand;
+}
+def nImmSplatI16AsmOperand : AsmOperandClass { let Name = "NEONi16splat"; }
+def nImmSplatI16 : Operand<i32> {
+ let PrintMethod = "printNEONModImmOperand";
+ let ParserMatchClass = nImmSplatI16AsmOperand;
+}
+def nImmSplatI32AsmOperand : AsmOperandClass { let Name = "NEONi32splat"; }
+def nImmSplatI32 : Operand<i32> {
+ let PrintMethod = "printNEONModImmOperand";
+ let ParserMatchClass = nImmSplatI32AsmOperand;
+}
+def nImmSplatNotI16AsmOperand : AsmOperandClass { let Name = "NEONi16splatNot"; }
+def nImmSplatNotI16 : Operand<i32> {
+ let ParserMatchClass = nImmSplatNotI16AsmOperand;
+}
+def nImmSplatNotI32AsmOperand : AsmOperandClass { let Name = "NEONi32splatNot"; }
+def nImmSplatNotI32 : Operand<i32> {
+ let ParserMatchClass = nImmSplatNotI32AsmOperand;
+}
+def nImmVMOVI32AsmOperand : AsmOperandClass { let Name = "NEONi32vmov"; }
+def nImmVMOVI32 : Operand<i32> {
+ let PrintMethod = "printNEONModImmOperand";
+ let ParserMatchClass = nImmVMOVI32AsmOperand;
+}
+
+def nImmVMOVI16AsmOperandByteReplicate :
+ AsmOperandClass {
+ let Name = "NEONi16vmovByteReplicate";
+ let PredicateMethod = "isNEONi16ByteReplicate";
+ let RenderMethod = "addNEONvmovByteReplicateOperands";
+}
+def nImmVMOVI32AsmOperandByteReplicate :
+ AsmOperandClass {
+ let Name = "NEONi32vmovByteReplicate";
+ let PredicateMethod = "isNEONi32ByteReplicate";
+ let RenderMethod = "addNEONvmovByteReplicateOperands";
+}
+def nImmVMVNI16AsmOperandByteReplicate :
+ AsmOperandClass {
+ let Name = "NEONi16invByteReplicate";
+ let PredicateMethod = "isNEONi16ByteReplicate";
+ let RenderMethod = "addNEONinvByteReplicateOperands";
+}
+def nImmVMVNI32AsmOperandByteReplicate :
+ AsmOperandClass {
+ let Name = "NEONi32invByteReplicate";
+ let PredicateMethod = "isNEONi32ByteReplicate";
+ let RenderMethod = "addNEONinvByteReplicateOperands";
+}
+
+def nImmVMOVI16ByteReplicate : Operand<i32> {
+ let PrintMethod = "printNEONModImmOperand";
+ let ParserMatchClass = nImmVMOVI16AsmOperandByteReplicate;
+}
+def nImmVMOVI32ByteReplicate : Operand<i32> {
+ let PrintMethod = "printNEONModImmOperand";
+ let ParserMatchClass = nImmVMOVI32AsmOperandByteReplicate;
+}
+def nImmVMVNI16ByteReplicate : Operand<i32> {
+ let PrintMethod = "printNEONModImmOperand";
+ let ParserMatchClass = nImmVMVNI16AsmOperandByteReplicate;
+}
+def nImmVMVNI32ByteReplicate : Operand<i32> {
+ let PrintMethod = "printNEONModImmOperand";
+ let ParserMatchClass = nImmVMVNI32AsmOperandByteReplicate;
+}
+
+def nImmVMOVI32NegAsmOperand : AsmOperandClass { let Name = "NEONi32vmovNeg"; }
+def nImmVMOVI32Neg : Operand<i32> {
+ let PrintMethod = "printNEONModImmOperand";
+ let ParserMatchClass = nImmVMOVI32NegAsmOperand;
+}
+def nImmVMOVF32 : Operand<i32> {
+ let PrintMethod = "printFPImmOperand";
+ let ParserMatchClass = FPImmOperand;
+}
+def nImmSplatI64AsmOperand : AsmOperandClass { let Name = "NEONi64splat"; }
+def nImmSplatI64 : Operand<i32> {
+ let PrintMethod = "printNEONModImmOperand";
+ let ParserMatchClass = nImmSplatI64AsmOperand;
+}
+
+def VectorIndex8Operand : AsmOperandClass { let Name = "VectorIndex8"; }
+def VectorIndex16Operand : AsmOperandClass { let Name = "VectorIndex16"; }
+def VectorIndex32Operand : AsmOperandClass { let Name = "VectorIndex32"; }
+def VectorIndex8 : Operand<i32>, ImmLeaf<i32, [{
+ return ((uint64_t)Imm) < 8;
+}]> {
+ let ParserMatchClass = VectorIndex8Operand;
+ let PrintMethod = "printVectorIndex";
+ let MIOperandInfo = (ops i32imm);
+}
+def VectorIndex16 : Operand<i32>, ImmLeaf<i32, [{
+ return ((uint64_t)Imm) < 4;
+}]> {
+ let ParserMatchClass = VectorIndex16Operand;
+ let PrintMethod = "printVectorIndex";
+ let MIOperandInfo = (ops i32imm);
+}
+def VectorIndex32 : Operand<i32>, ImmLeaf<i32, [{
+ return ((uint64_t)Imm) < 2;
+}]> {
+ let ParserMatchClass = VectorIndex32Operand;
+ let PrintMethod = "printVectorIndex";
+ let MIOperandInfo = (ops i32imm);
+}
+
+// Register list of one D register.
+def VecListOneDAsmOperand : AsmOperandClass {
+ let Name = "VecListOneD";
+ let ParserMethod = "parseVectorList";
+ let RenderMethod = "addVecListOperands";
+}
+def VecListOneD : RegisterOperand<DPR, "printVectorListOne"> {
+ let ParserMatchClass = VecListOneDAsmOperand;
+}
+// Register list of two sequential D registers.
+def VecListDPairAsmOperand : AsmOperandClass {
+ let Name = "VecListDPair";
+ let ParserMethod = "parseVectorList";
+ let RenderMethod = "addVecListOperands";
+}
+def VecListDPair : RegisterOperand<DPair, "printVectorListTwo"> {
+ let ParserMatchClass = VecListDPairAsmOperand;
+}
+// Register list of three sequential D registers.
+def VecListThreeDAsmOperand : AsmOperandClass {
+ let Name = "VecListThreeD";
+ let ParserMethod = "parseVectorList";
+ let RenderMethod = "addVecListOperands";
+}
+def VecListThreeD : RegisterOperand<DPR, "printVectorListThree"> {
+ let ParserMatchClass = VecListThreeDAsmOperand;
+}
+// Register list of four sequential D registers.
+def VecListFourDAsmOperand : AsmOperandClass {
+ let Name = "VecListFourD";
+ let ParserMethod = "parseVectorList";
+ let RenderMethod = "addVecListOperands";
+}
+def VecListFourD : RegisterOperand<DPR, "printVectorListFour"> {
+ let ParserMatchClass = VecListFourDAsmOperand;
+}
+// Register list of two D registers spaced by 2 (two sequential Q registers).
+def VecListDPairSpacedAsmOperand : AsmOperandClass {
+ let Name = "VecListDPairSpaced";
+ let ParserMethod = "parseVectorList";
+ let RenderMethod = "addVecListOperands";
+}
+def VecListDPairSpaced : RegisterOperand<DPair, "printVectorListTwoSpaced"> {
+ let ParserMatchClass = VecListDPairSpacedAsmOperand;
+}
+// Register list of three D registers spaced by 2 (three Q registers).
+def VecListThreeQAsmOperand : AsmOperandClass {
+ let Name = "VecListThreeQ";
+ let ParserMethod = "parseVectorList";
+ let RenderMethod = "addVecListOperands";
+}
+def VecListThreeQ : RegisterOperand<DPR, "printVectorListThreeSpaced"> {
+ let ParserMatchClass = VecListThreeQAsmOperand;
+}
+// Register list of three D registers spaced by 2 (three Q registers).
+def VecListFourQAsmOperand : AsmOperandClass {
+ let Name = "VecListFourQ";
+ let ParserMethod = "parseVectorList";
+ let RenderMethod = "addVecListOperands";
+}
+def VecListFourQ : RegisterOperand<DPR, "printVectorListFourSpaced"> {
+ let ParserMatchClass = VecListFourQAsmOperand;
+}
+
+// Register list of one D register, with "all lanes" subscripting.
+def VecListOneDAllLanesAsmOperand : AsmOperandClass {
+ let Name = "VecListOneDAllLanes";
+ let ParserMethod = "parseVectorList";
+ let RenderMethod = "addVecListOperands";
+}
+def VecListOneDAllLanes : RegisterOperand<DPR, "printVectorListOneAllLanes"> {
+ let ParserMatchClass = VecListOneDAllLanesAsmOperand;
+}
+// Register list of two D registers, with "all lanes" subscripting.
+def VecListDPairAllLanesAsmOperand : AsmOperandClass {
+ let Name = "VecListDPairAllLanes";
+ let ParserMethod = "parseVectorList";
+ let RenderMethod = "addVecListOperands";
+}
+def VecListDPairAllLanes : RegisterOperand<DPair,
+ "printVectorListTwoAllLanes"> {
+ let ParserMatchClass = VecListDPairAllLanesAsmOperand;
+}
+// Register list of two D registers spaced by 2 (two sequential Q registers).
+def VecListDPairSpacedAllLanesAsmOperand : AsmOperandClass {
+ let Name = "VecListDPairSpacedAllLanes";
+ let ParserMethod = "parseVectorList";
+ let RenderMethod = "addVecListOperands";
+}
+def VecListDPairSpacedAllLanes : RegisterOperand<DPair,
+ "printVectorListTwoSpacedAllLanes"> {
+ let ParserMatchClass = VecListDPairSpacedAllLanesAsmOperand;
+}
+// Register list of three D registers, with "all lanes" subscripting.
+def VecListThreeDAllLanesAsmOperand : AsmOperandClass {
+ let Name = "VecListThreeDAllLanes";
+ let ParserMethod = "parseVectorList";
+ let RenderMethod = "addVecListOperands";
+}
+def VecListThreeDAllLanes : RegisterOperand<DPR,
+ "printVectorListThreeAllLanes"> {
+ let ParserMatchClass = VecListThreeDAllLanesAsmOperand;
+}
+// Register list of three D registers spaced by 2 (three sequential Q regs).
+def VecListThreeQAllLanesAsmOperand : AsmOperandClass {
+ let Name = "VecListThreeQAllLanes";
+ let ParserMethod = "parseVectorList";
+ let RenderMethod = "addVecListOperands";
+}
+def VecListThreeQAllLanes : RegisterOperand<DPR,
+ "printVectorListThreeSpacedAllLanes"> {
+ let ParserMatchClass = VecListThreeQAllLanesAsmOperand;
+}
+// Register list of four D registers, with "all lanes" subscripting.
+def VecListFourDAllLanesAsmOperand : AsmOperandClass {
+ let Name = "VecListFourDAllLanes";
+ let ParserMethod = "parseVectorList";
+ let RenderMethod = "addVecListOperands";
+}
+def VecListFourDAllLanes : RegisterOperand<DPR, "printVectorListFourAllLanes"> {
+ let ParserMatchClass = VecListFourDAllLanesAsmOperand;
+}
+// Register list of four D registers spaced by 2 (four sequential Q regs).
+def VecListFourQAllLanesAsmOperand : AsmOperandClass {
+ let Name = "VecListFourQAllLanes";
+ let ParserMethod = "parseVectorList";
+ let RenderMethod = "addVecListOperands";
+}
+def VecListFourQAllLanes : RegisterOperand<DPR,
+ "printVectorListFourSpacedAllLanes"> {
+ let ParserMatchClass = VecListFourQAllLanesAsmOperand;
+}
+
+
+// Register list of one D register, with byte lane subscripting.
+def VecListOneDByteIndexAsmOperand : AsmOperandClass {
+ let Name = "VecListOneDByteIndexed";
+ let ParserMethod = "parseVectorList";
+ let RenderMethod = "addVecListIndexedOperands";
+}
+def VecListOneDByteIndexed : Operand<i32> {
+ let ParserMatchClass = VecListOneDByteIndexAsmOperand;
+ let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
+}
+// ...with half-word lane subscripting.
+def VecListOneDHWordIndexAsmOperand : AsmOperandClass {
+ let Name = "VecListOneDHWordIndexed";
+ let ParserMethod = "parseVectorList";
+ let RenderMethod = "addVecListIndexedOperands";
+}
+def VecListOneDHWordIndexed : Operand<i32> {
+ let ParserMatchClass = VecListOneDHWordIndexAsmOperand;
+ let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
+}
+// ...with word lane subscripting.
+def VecListOneDWordIndexAsmOperand : AsmOperandClass {
+ let Name = "VecListOneDWordIndexed";
+ let ParserMethod = "parseVectorList";
+ let RenderMethod = "addVecListIndexedOperands";
+}
+def VecListOneDWordIndexed : Operand<i32> {
+ let ParserMatchClass = VecListOneDWordIndexAsmOperand;
+ let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
+}
+
+// Register list of two D registers with byte lane subscripting.
+def VecListTwoDByteIndexAsmOperand : AsmOperandClass {
+ let Name = "VecListTwoDByteIndexed";
+ let ParserMethod = "parseVectorList";
+ let RenderMethod = "addVecListIndexedOperands";
+}
+def VecListTwoDByteIndexed : Operand<i32> {
+ let ParserMatchClass = VecListTwoDByteIndexAsmOperand;
+ let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
+}
+// ...with half-word lane subscripting.
+def VecListTwoDHWordIndexAsmOperand : AsmOperandClass {
+ let Name = "VecListTwoDHWordIndexed";
+ let ParserMethod = "parseVectorList";
+ let RenderMethod = "addVecListIndexedOperands";
+}
+def VecListTwoDHWordIndexed : Operand<i32> {
+ let ParserMatchClass = VecListTwoDHWordIndexAsmOperand;
+ let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
+}
+// ...with word lane subscripting.
+def VecListTwoDWordIndexAsmOperand : AsmOperandClass {
+ let Name = "VecListTwoDWordIndexed";
+ let ParserMethod = "parseVectorList";
+ let RenderMethod = "addVecListIndexedOperands";
+}
+def VecListTwoDWordIndexed : Operand<i32> {
+ let ParserMatchClass = VecListTwoDWordIndexAsmOperand;
+ let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
+}
+// Register list of two Q registers with half-word lane subscripting.
+def VecListTwoQHWordIndexAsmOperand : AsmOperandClass {
+ let Name = "VecListTwoQHWordIndexed";
+ let ParserMethod = "parseVectorList";
+ let RenderMethod = "addVecListIndexedOperands";
+}
+def VecListTwoQHWordIndexed : Operand<i32> {
+ let ParserMatchClass = VecListTwoQHWordIndexAsmOperand;
+ let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
+}
+// ...with word lane subscripting.
+def VecListTwoQWordIndexAsmOperand : AsmOperandClass {
+ let Name = "VecListTwoQWordIndexed";
+ let ParserMethod = "parseVectorList";
+ let RenderMethod = "addVecListIndexedOperands";
+}
+def VecListTwoQWordIndexed : Operand<i32> {
+ let ParserMatchClass = VecListTwoQWordIndexAsmOperand;
+ let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
+}
+
+
+// Register list of three D registers with byte lane subscripting.
+def VecListThreeDByteIndexAsmOperand : AsmOperandClass {
+ let Name = "VecListThreeDByteIndexed";
+ let ParserMethod = "parseVectorList";
+ let RenderMethod = "addVecListIndexedOperands";
+}
+def VecListThreeDByteIndexed : Operand<i32> {
+ let ParserMatchClass = VecListThreeDByteIndexAsmOperand;
+ let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
+}
+// ...with half-word lane subscripting.
+def VecListThreeDHWordIndexAsmOperand : AsmOperandClass {
+ let Name = "VecListThreeDHWordIndexed";
+ let ParserMethod = "parseVectorList";
+ let RenderMethod = "addVecListIndexedOperands";
+}
+def VecListThreeDHWordIndexed : Operand<i32> {
+ let ParserMatchClass = VecListThreeDHWordIndexAsmOperand;
+ let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
+}
+// ...with word lane subscripting.
+def VecListThreeDWordIndexAsmOperand : AsmOperandClass {
+ let Name = "VecListThreeDWordIndexed";
+ let ParserMethod = "parseVectorList";
+ let RenderMethod = "addVecListIndexedOperands";
+}
+def VecListThreeDWordIndexed : Operand<i32> {
+ let ParserMatchClass = VecListThreeDWordIndexAsmOperand;
+ let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
+}
+// Register list of three Q registers with half-word lane subscripting.
+def VecListThreeQHWordIndexAsmOperand : AsmOperandClass {
+ let Name = "VecListThreeQHWordIndexed";
+ let ParserMethod = "parseVectorList";
+ let RenderMethod = "addVecListIndexedOperands";
+}
+def VecListThreeQHWordIndexed : Operand<i32> {
+ let ParserMatchClass = VecListThreeQHWordIndexAsmOperand;
+ let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
+}
+// ...with word lane subscripting.
+def VecListThreeQWordIndexAsmOperand : AsmOperandClass {
+ let Name = "VecListThreeQWordIndexed";
+ let ParserMethod = "parseVectorList";
+ let RenderMethod = "addVecListIndexedOperands";
+}
+def VecListThreeQWordIndexed : Operand<i32> {
+ let ParserMatchClass = VecListThreeQWordIndexAsmOperand;
+ let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
+}
+
+// Register list of four D registers with byte lane subscripting.
+def VecListFourDByteIndexAsmOperand : AsmOperandClass {
+ let Name = "VecListFourDByteIndexed";
+ let ParserMethod = "parseVectorList";
+ let RenderMethod = "addVecListIndexedOperands";
+}
+def VecListFourDByteIndexed : Operand<i32> {
+ let ParserMatchClass = VecListFourDByteIndexAsmOperand;
+ let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
+}
+// ...with half-word lane subscripting.
+def VecListFourDHWordIndexAsmOperand : AsmOperandClass {
+ let Name = "VecListFourDHWordIndexed";
+ let ParserMethod = "parseVectorList";
+ let RenderMethod = "addVecListIndexedOperands";
+}
+def VecListFourDHWordIndexed : Operand<i32> {
+ let ParserMatchClass = VecListFourDHWordIndexAsmOperand;
+ let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
+}
+// ...with word lane subscripting.
+def VecListFourDWordIndexAsmOperand : AsmOperandClass {
+ let Name = "VecListFourDWordIndexed";
+ let ParserMethod = "parseVectorList";
+ let RenderMethod = "addVecListIndexedOperands";
+}
+def VecListFourDWordIndexed : Operand<i32> {
+ let ParserMatchClass = VecListFourDWordIndexAsmOperand;
+ let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
+}
+// Register list of four Q registers with half-word lane subscripting.
+def VecListFourQHWordIndexAsmOperand : AsmOperandClass {
+ let Name = "VecListFourQHWordIndexed";
+ let ParserMethod = "parseVectorList";
+ let RenderMethod = "addVecListIndexedOperands";
+}
+def VecListFourQHWordIndexed : Operand<i32> {
+ let ParserMatchClass = VecListFourQHWordIndexAsmOperand;
+ let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
+}
+// ...with word lane subscripting.
+def VecListFourQWordIndexAsmOperand : AsmOperandClass {
+ let Name = "VecListFourQWordIndexed";
+ let ParserMethod = "parseVectorList";
+ let RenderMethod = "addVecListIndexedOperands";
+}
+def VecListFourQWordIndexed : Operand<i32> {
+ let ParserMatchClass = VecListFourQWordIndexAsmOperand;
+ let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
+}
+
+def dword_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+ return cast<LoadSDNode>(N)->getAlignment() >= 8;
+}]>;
+def dword_alignedstore : PatFrag<(ops node:$val, node:$ptr),
+ (store node:$val, node:$ptr), [{
+ return cast<StoreSDNode>(N)->getAlignment() >= 8;
+}]>;
+def word_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+ return cast<LoadSDNode>(N)->getAlignment() == 4;
+}]>;
+def word_alignedstore : PatFrag<(ops node:$val, node:$ptr),
+ (store node:$val, node:$ptr), [{
+ return cast<StoreSDNode>(N)->getAlignment() == 4;
+}]>;
+def hword_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+ return cast<LoadSDNode>(N)->getAlignment() == 2;
+}]>;
+def hword_alignedstore : PatFrag<(ops node:$val, node:$ptr),
+ (store node:$val, node:$ptr), [{
+ return cast<StoreSDNode>(N)->getAlignment() == 2;
+}]>;
+def byte_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+ return cast<LoadSDNode>(N)->getAlignment() == 1;
+}]>;
+def byte_alignedstore : PatFrag<(ops node:$val, node:$ptr),
+ (store node:$val, node:$ptr), [{
+ return cast<StoreSDNode>(N)->getAlignment() == 1;
+}]>;
+def non_word_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+ return cast<LoadSDNode>(N)->getAlignment() < 4;
+}]>;
+def non_word_alignedstore : PatFrag<(ops node:$val, node:$ptr),
+ (store node:$val, node:$ptr), [{
+ return cast<StoreSDNode>(N)->getAlignment() < 4;
+}]>;
+
+//===----------------------------------------------------------------------===//
+// NEON-specific DAG Nodes.
+//===----------------------------------------------------------------------===//
+
+def SDTARMVCMP : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<1, 2>]>;
+def SDTARMVCMPZ : SDTypeProfile<1, 1, []>;
+
+def NEONvceq : SDNode<"ARMISD::VCEQ", SDTARMVCMP>;
+def NEONvceqz : SDNode<"ARMISD::VCEQZ", SDTARMVCMPZ>;
+def NEONvcge : SDNode<"ARMISD::VCGE", SDTARMVCMP>;
+def NEONvcgez : SDNode<"ARMISD::VCGEZ", SDTARMVCMPZ>;
+def NEONvclez : SDNode<"ARMISD::VCLEZ", SDTARMVCMPZ>;
+def NEONvcgeu : SDNode<"ARMISD::VCGEU", SDTARMVCMP>;
+def NEONvcgt : SDNode<"ARMISD::VCGT", SDTARMVCMP>;
+def NEONvcgtz : SDNode<"ARMISD::VCGTZ", SDTARMVCMPZ>;
+def NEONvcltz : SDNode<"ARMISD::VCLTZ", SDTARMVCMPZ>;
+def NEONvcgtu : SDNode<"ARMISD::VCGTU", SDTARMVCMP>;
+def NEONvtst : SDNode<"ARMISD::VTST", SDTARMVCMP>;
+
+// Types for vector shift by immediates. The "SHX" version is for long and
+// narrow operations where the source and destination vectors have different
+// types. The "SHINS" version is for shift and insert operations.
+def SDTARMVSH : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<0, 1>,
+ SDTCisVT<2, i32>]>;
+def SDTARMVSHX : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>,
+ SDTCisVT<2, i32>]>;
+def SDTARMVSHINS : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<0, 1>,
+ SDTCisSameAs<0, 2>, SDTCisVT<3, i32>]>;
+
+def NEONvshl : SDNode<"ARMISD::VSHL", SDTARMVSH>;
+def NEONvshrs : SDNode<"ARMISD::VSHRs", SDTARMVSH>;
+def NEONvshru : SDNode<"ARMISD::VSHRu", SDTARMVSH>;
+def NEONvshrn : SDNode<"ARMISD::VSHRN", SDTARMVSHX>;
+
+def NEONvrshrs : SDNode<"ARMISD::VRSHRs", SDTARMVSH>;
+def NEONvrshru : SDNode<"ARMISD::VRSHRu", SDTARMVSH>;
+def NEONvrshrn : SDNode<"ARMISD::VRSHRN", SDTARMVSHX>;
+
+def NEONvqshls : SDNode<"ARMISD::VQSHLs", SDTARMVSH>;
+def NEONvqshlu : SDNode<"ARMISD::VQSHLu", SDTARMVSH>;
+def NEONvqshlsu : SDNode<"ARMISD::VQSHLsu", SDTARMVSH>;
+def NEONvqshrns : SDNode<"ARMISD::VQSHRNs", SDTARMVSHX>;
+def NEONvqshrnu : SDNode<"ARMISD::VQSHRNu", SDTARMVSHX>;
+def NEONvqshrnsu : SDNode<"ARMISD::VQSHRNsu", SDTARMVSHX>;
+
+def NEONvqrshrns : SDNode<"ARMISD::VQRSHRNs", SDTARMVSHX>;
+def NEONvqrshrnu : SDNode<"ARMISD::VQRSHRNu", SDTARMVSHX>;
+def NEONvqrshrnsu : SDNode<"ARMISD::VQRSHRNsu", SDTARMVSHX>;
+
+def NEONvsli : SDNode<"ARMISD::VSLI", SDTARMVSHINS>;
+def NEONvsri : SDNode<"ARMISD::VSRI", SDTARMVSHINS>;
+
+def SDTARMVGETLN : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisInt<1>,
+ SDTCisVT<2, i32>]>;
+def NEONvgetlaneu : SDNode<"ARMISD::VGETLANEu", SDTARMVGETLN>;
+def NEONvgetlanes : SDNode<"ARMISD::VGETLANEs", SDTARMVGETLN>;
+
+def SDTARMVMOVIMM : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVT<1, i32>]>;
+def NEONvmovImm : SDNode<"ARMISD::VMOVIMM", SDTARMVMOVIMM>;
+def NEONvmvnImm : SDNode<"ARMISD::VMVNIMM", SDTARMVMOVIMM>;
+def NEONvmovFPImm : SDNode<"ARMISD::VMOVFPIMM", SDTARMVMOVIMM>;
+
+def SDTARMVORRIMM : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>,
+ SDTCisVT<2, i32>]>;
+def NEONvorrImm : SDNode<"ARMISD::VORRIMM", SDTARMVORRIMM>;
+def NEONvbicImm : SDNode<"ARMISD::VBICIMM", SDTARMVORRIMM>;
+
+def NEONvbsl : SDNode<"ARMISD::VBSL",
+ SDTypeProfile<1, 3, [SDTCisVec<0>,
+ SDTCisSameAs<0, 1>,
+ SDTCisSameAs<0, 2>,
+ SDTCisSameAs<0, 3>]>>;
+
+def NEONvdup : SDNode<"ARMISD::VDUP", SDTypeProfile<1, 1, [SDTCisVec<0>]>>;
+
+// VDUPLANE can produce a quad-register result from a double-register source,
+// so the result is not constrained to match the source.
+def NEONvduplane : SDNode<"ARMISD::VDUPLANE",
+ SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>,
+ SDTCisVT<2, i32>]>>;
+
+def SDTARMVEXT : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0, 1>,
+ SDTCisSameAs<0, 2>, SDTCisVT<3, i32>]>;
+def NEONvext : SDNode<"ARMISD::VEXT", SDTARMVEXT>;
+
+def SDTARMVSHUF : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0, 1>]>;
+def NEONvrev64 : SDNode<"ARMISD::VREV64", SDTARMVSHUF>;
+def NEONvrev32 : SDNode<"ARMISD::VREV32", SDTARMVSHUF>;
+def NEONvrev16 : SDNode<"ARMISD::VREV16", SDTARMVSHUF>;
+
+def SDTARMVSHUF2 : SDTypeProfile<2, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>,
+ SDTCisSameAs<0, 2>,
+ SDTCisSameAs<0, 3>]>;
+def NEONzip : SDNode<"ARMISD::VZIP", SDTARMVSHUF2>;
+def NEONuzp : SDNode<"ARMISD::VUZP", SDTARMVSHUF2>;
+def NEONtrn : SDNode<"ARMISD::VTRN", SDTARMVSHUF2>;
+
+def SDTARMVMULL : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>,
+ SDTCisSameAs<1, 2>]>;
+def NEONvmulls : SDNode<"ARMISD::VMULLs", SDTARMVMULL>;
+def NEONvmullu : SDNode<"ARMISD::VMULLu", SDTARMVMULL>;
+
+def NEONimmAllZerosV: PatLeaf<(NEONvmovImm (i32 timm)), [{
+ ConstantSDNode *ConstVal = cast<ConstantSDNode>(N->getOperand(0));
+ unsigned EltBits = 0;
+ uint64_t EltVal = ARM_AM::decodeNEONModImm(ConstVal->getZExtValue(), EltBits);
+ return (EltBits == 32 && EltVal == 0);
+}]>;
+
+def NEONimmAllOnesV: PatLeaf<(NEONvmovImm (i32 timm)), [{
+ ConstantSDNode *ConstVal = cast<ConstantSDNode>(N->getOperand(0));
+ unsigned EltBits = 0;
+ uint64_t EltVal = ARM_AM::decodeNEONModImm(ConstVal->getZExtValue(), EltBits);
+ return (EltBits == 8 && EltVal == 0xff);
+}]>;
+
+//===----------------------------------------------------------------------===//
+// NEON load / store instructions
+//===----------------------------------------------------------------------===//
+
+// Use VLDM to load a Q register as a D register pair.
+// This is a pseudo instruction that is expanded to VLDMD after reg alloc.
+def VLDMQIA
+ : PseudoVFPLdStM<(outs DPair:$dst), (ins GPR:$Rn),
+ IIC_fpLoad_m, "",
+ [(set DPair:$dst, (v2f64 (load GPR:$Rn)))]>;
+
+// Use VSTM to store a Q register as a D register pair.
+// This is a pseudo instruction that is expanded to VSTMD after reg alloc.
+def VSTMQIA
+ : PseudoVFPLdStM<(outs), (ins DPair:$src, GPR:$Rn),
+ IIC_fpStore_m, "",
+ [(store (v2f64 DPair:$src), GPR:$Rn)]>;
+
+// Classes for VLD* pseudo-instructions with multi-register operands.
+// These are expanded to real instructions after register allocation.
+class VLDQPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs QPR:$dst), (ins addrmode6:$addr), itin, "">;
+class VLDQWBPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs QPR:$dst, GPR:$wb),
+ (ins addrmode6:$addr, am6offset:$offset), itin,
+ "$addr.addr = $wb">;
+class VLDQWBfixedPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs QPR:$dst, GPR:$wb),
+ (ins addrmode6:$addr), itin,
+ "$addr.addr = $wb">;
+class VLDQWBregisterPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs QPR:$dst, GPR:$wb),
+ (ins addrmode6:$addr, rGPR:$offset), itin,
+ "$addr.addr = $wb">;
+
+class VLDQQPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs QQPR:$dst), (ins addrmode6:$addr), itin, "">;
+class VLDQQWBPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb),
+ (ins addrmode6:$addr, am6offset:$offset), itin,
+ "$addr.addr = $wb">;
+class VLDQQWBfixedPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb),
+ (ins addrmode6:$addr), itin,
+ "$addr.addr = $wb">;
+class VLDQQWBregisterPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb),
+ (ins addrmode6:$addr, rGPR:$offset), itin,
+ "$addr.addr = $wb">;
+
+
+class VLDQQQQPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs QQQQPR:$dst), (ins addrmode6:$addr, QQQQPR:$src),itin,
+ "$src = $dst">;
+class VLDQQQQWBPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs QQQQPR:$dst, GPR:$wb),
+ (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src), itin,
+ "$addr.addr = $wb, $src = $dst">;
+
+let mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 in {
+
+// VLD1 : Vector Load (multiple single elements)
+class VLD1D<bits<4> op7_4, string Dt, Operand AddrMode>
+ : NLdSt<0,0b10,0b0111,op7_4, (outs VecListOneD:$Vd),
+ (ins AddrMode:$Rn), IIC_VLD1,
+ "vld1", Dt, "$Vd, $Rn", "", []> {
+ let Rm = 0b1111;
+ let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVLDST1Instruction";
+}
+class VLD1Q<bits<4> op7_4, string Dt, Operand AddrMode>
+ : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd),
+ (ins AddrMode:$Rn), IIC_VLD1x2,
+ "vld1", Dt, "$Vd, $Rn", "", []> {
+ let Rm = 0b1111;
+ let Inst{5-4} = Rn{5-4};
+ let DecoderMethod = "DecodeVLDST1Instruction";
+}
+
+def VLD1d8 : VLD1D<{0,0,0,?}, "8", addrmode6align64>;
+def VLD1d16 : VLD1D<{0,1,0,?}, "16", addrmode6align64>;
+def VLD1d32 : VLD1D<{1,0,0,?}, "32", addrmode6align64>;
+def VLD1d64 : VLD1D<{1,1,0,?}, "64", addrmode6align64>;
+
+def VLD1q8 : VLD1Q<{0,0,?,?}, "8", addrmode6align64or128>;
+def VLD1q16 : VLD1Q<{0,1,?,?}, "16", addrmode6align64or128>;
+def VLD1q32 : VLD1Q<{1,0,?,?}, "32", addrmode6align64or128>;
+def VLD1q64 : VLD1Q<{1,1,?,?}, "64", addrmode6align64or128>;
+
+// ...with address register writeback:
+multiclass VLD1DWB<bits<4> op7_4, string Dt, Operand AddrMode> {
+ def _fixed : NLdSt<0,0b10, 0b0111,op7_4, (outs VecListOneD:$Vd, GPR:$wb),
+ (ins AddrMode:$Rn), IIC_VLD1u,
+ "vld1", Dt, "$Vd, $Rn!",
+ "$Rn.addr = $wb", []> {
+ let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
+ let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVLDST1Instruction";
+ }
+ def _register : NLdSt<0,0b10,0b0111,op7_4, (outs VecListOneD:$Vd, GPR:$wb),
+ (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1u,
+ "vld1", Dt, "$Vd, $Rn, $Rm",
+ "$Rn.addr = $wb", []> {
+ let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVLDST1Instruction";
+ }
+}
+multiclass VLD1QWB<bits<4> op7_4, string Dt, Operand AddrMode> {
+ def _fixed : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd, GPR:$wb),
+ (ins AddrMode:$Rn), IIC_VLD1x2u,
+ "vld1", Dt, "$Vd, $Rn!",
+ "$Rn.addr = $wb", []> {
+ let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
+ let Inst{5-4} = Rn{5-4};
+ let DecoderMethod = "DecodeVLDST1Instruction";
+ }
+ def _register : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd, GPR:$wb),
+ (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1x2u,
+ "vld1", Dt, "$Vd, $Rn, $Rm",
+ "$Rn.addr = $wb", []> {
+ let Inst{5-4} = Rn{5-4};
+ let DecoderMethod = "DecodeVLDST1Instruction";
+ }
+}
+
+defm VLD1d8wb : VLD1DWB<{0,0,0,?}, "8", addrmode6align64>;
+defm VLD1d16wb : VLD1DWB<{0,1,0,?}, "16", addrmode6align64>;
+defm VLD1d32wb : VLD1DWB<{1,0,0,?}, "32", addrmode6align64>;
+defm VLD1d64wb : VLD1DWB<{1,1,0,?}, "64", addrmode6align64>;
+defm VLD1q8wb : VLD1QWB<{0,0,?,?}, "8", addrmode6align64or128>;
+defm VLD1q16wb : VLD1QWB<{0,1,?,?}, "16", addrmode6align64or128>;
+defm VLD1q32wb : VLD1QWB<{1,0,?,?}, "32", addrmode6align64or128>;
+defm VLD1q64wb : VLD1QWB<{1,1,?,?}, "64", addrmode6align64or128>;
+
+// ...with 3 registers
+class VLD1D3<bits<4> op7_4, string Dt, Operand AddrMode>
+ : NLdSt<0,0b10,0b0110,op7_4, (outs VecListThreeD:$Vd),
+ (ins AddrMode:$Rn), IIC_VLD1x3, "vld1", Dt,
+ "$Vd, $Rn", "", []> {
+ let Rm = 0b1111;
+ let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVLDST1Instruction";
+}
+multiclass VLD1D3WB<bits<4> op7_4, string Dt, Operand AddrMode> {
+ def _fixed : NLdSt<0,0b10,0b0110, op7_4, (outs VecListThreeD:$Vd, GPR:$wb),
+ (ins AddrMode:$Rn), IIC_VLD1x2u,
+ "vld1", Dt, "$Vd, $Rn!",
+ "$Rn.addr = $wb", []> {
+ let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
+ let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVLDST1Instruction";
+ }
+ def _register : NLdSt<0,0b10,0b0110,op7_4, (outs VecListThreeD:$Vd, GPR:$wb),
+ (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1x2u,
+ "vld1", Dt, "$Vd, $Rn, $Rm",
+ "$Rn.addr = $wb", []> {
+ let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVLDST1Instruction";
+ }
+}
+
+def VLD1d8T : VLD1D3<{0,0,0,?}, "8", addrmode6align64>;
+def VLD1d16T : VLD1D3<{0,1,0,?}, "16", addrmode6align64>;
+def VLD1d32T : VLD1D3<{1,0,0,?}, "32", addrmode6align64>;
+def VLD1d64T : VLD1D3<{1,1,0,?}, "64", addrmode6align64>;
+
+defm VLD1d8Twb : VLD1D3WB<{0,0,0,?}, "8", addrmode6align64>;
+defm VLD1d16Twb : VLD1D3WB<{0,1,0,?}, "16", addrmode6align64>;
+defm VLD1d32Twb : VLD1D3WB<{1,0,0,?}, "32", addrmode6align64>;
+defm VLD1d64Twb : VLD1D3WB<{1,1,0,?}, "64", addrmode6align64>;
+
+def VLD1d64TPseudo : VLDQQPseudo<IIC_VLD1x3>;
+def VLD1d64TPseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD1x3>;
+def VLD1d64TPseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD1x3>;
+
+// ...with 4 registers
+class VLD1D4<bits<4> op7_4, string Dt, Operand AddrMode>
+ : NLdSt<0, 0b10, 0b0010, op7_4, (outs VecListFourD:$Vd),
+ (ins AddrMode:$Rn), IIC_VLD1x4, "vld1", Dt,
+ "$Vd, $Rn", "", []> {
+ let Rm = 0b1111;
+ let Inst{5-4} = Rn{5-4};
+ let DecoderMethod = "DecodeVLDST1Instruction";
+}
+multiclass VLD1D4WB<bits<4> op7_4, string Dt, Operand AddrMode> {
+ def _fixed : NLdSt<0,0b10,0b0010, op7_4, (outs VecListFourD:$Vd, GPR:$wb),
+ (ins AddrMode:$Rn), IIC_VLD1x2u,
+ "vld1", Dt, "$Vd, $Rn!",
+ "$Rn.addr = $wb", []> {
+ let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
+ let Inst{5-4} = Rn{5-4};
+ let DecoderMethod = "DecodeVLDST1Instruction";
+ }
+ def _register : NLdSt<0,0b10,0b0010,op7_4, (outs VecListFourD:$Vd, GPR:$wb),
+ (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1x2u,
+ "vld1", Dt, "$Vd, $Rn, $Rm",
+ "$Rn.addr = $wb", []> {
+ let Inst{5-4} = Rn{5-4};
+ let DecoderMethod = "DecodeVLDST1Instruction";
+ }
+}
+
+def VLD1d8Q : VLD1D4<{0,0,?,?}, "8", addrmode6align64or128or256>;
+def VLD1d16Q : VLD1D4<{0,1,?,?}, "16", addrmode6align64or128or256>;
+def VLD1d32Q : VLD1D4<{1,0,?,?}, "32", addrmode6align64or128or256>;
+def VLD1d64Q : VLD1D4<{1,1,?,?}, "64", addrmode6align64or128or256>;
+
+defm VLD1d8Qwb : VLD1D4WB<{0,0,?,?}, "8", addrmode6align64or128or256>;
+defm VLD1d16Qwb : VLD1D4WB<{0,1,?,?}, "16", addrmode6align64or128or256>;
+defm VLD1d32Qwb : VLD1D4WB<{1,0,?,?}, "32", addrmode6align64or128or256>;
+defm VLD1d64Qwb : VLD1D4WB<{1,1,?,?}, "64", addrmode6align64or128or256>;
+
+def VLD1d64QPseudo : VLDQQPseudo<IIC_VLD1x4>;
+def VLD1d64QPseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD1x4>;
+def VLD1d64QPseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD1x4>;
+
+// VLD2 : Vector Load (multiple 2-element structures)
+class VLD2<bits<4> op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy,
+ InstrItinClass itin, Operand AddrMode>
+ : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd),
+ (ins AddrMode:$Rn), itin,
+ "vld2", Dt, "$Vd, $Rn", "", []> {
+ let Rm = 0b1111;
+ let Inst{5-4} = Rn{5-4};
+ let DecoderMethod = "DecodeVLDST2Instruction";
+}
+
+def VLD2d8 : VLD2<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VLD2,
+ addrmode6align64or128>;
+def VLD2d16 : VLD2<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VLD2,
+ addrmode6align64or128>;
+def VLD2d32 : VLD2<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VLD2,
+ addrmode6align64or128>;
+
+def VLD2q8 : VLD2<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VLD2x2,
+ addrmode6align64or128or256>;
+def VLD2q16 : VLD2<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VLD2x2,
+ addrmode6align64or128or256>;
+def VLD2q32 : VLD2<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VLD2x2,
+ addrmode6align64or128or256>;
+
+def VLD2q8Pseudo : VLDQQPseudo<IIC_VLD2x2>;
+def VLD2q16Pseudo : VLDQQPseudo<IIC_VLD2x2>;
+def VLD2q32Pseudo : VLDQQPseudo<IIC_VLD2x2>;
+
+// ...with address register writeback:
+multiclass VLD2WB<bits<4> op11_8, bits<4> op7_4, string Dt,
+ RegisterOperand VdTy, InstrItinClass itin, Operand AddrMode> {
+ def _fixed : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd, GPR:$wb),
+ (ins AddrMode:$Rn), itin,
+ "vld2", Dt, "$Vd, $Rn!",
+ "$Rn.addr = $wb", []> {
+ let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
+ let Inst{5-4} = Rn{5-4};
+ let DecoderMethod = "DecodeVLDST2Instruction";
+ }
+ def _register : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd, GPR:$wb),
+ (ins AddrMode:$Rn, rGPR:$Rm), itin,
+ "vld2", Dt, "$Vd, $Rn, $Rm",
+ "$Rn.addr = $wb", []> {
+ let Inst{5-4} = Rn{5-4};
+ let DecoderMethod = "DecodeVLDST2Instruction";
+ }
+}
+
+defm VLD2d8wb : VLD2WB<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VLD2u,
+ addrmode6align64or128>;
+defm VLD2d16wb : VLD2WB<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VLD2u,
+ addrmode6align64or128>;
+defm VLD2d32wb : VLD2WB<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VLD2u,
+ addrmode6align64or128>;
+
+defm VLD2q8wb : VLD2WB<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VLD2x2u,
+ addrmode6align64or128or256>;
+defm VLD2q16wb : VLD2WB<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VLD2x2u,
+ addrmode6align64or128or256>;
+defm VLD2q32wb : VLD2WB<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VLD2x2u,
+ addrmode6align64or128or256>;
+
+def VLD2q8PseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2x2u>;
+def VLD2q16PseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2x2u>;
+def VLD2q32PseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2x2u>;
+def VLD2q8PseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD2x2u>;
+def VLD2q16PseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD2x2u>;
+def VLD2q32PseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD2x2u>;
+
+// ...with double-spaced registers
+def VLD2b8 : VLD2<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, IIC_VLD2,
+ addrmode6align64or128>;
+def VLD2b16 : VLD2<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, IIC_VLD2,
+ addrmode6align64or128>;
+def VLD2b32 : VLD2<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, IIC_VLD2,
+ addrmode6align64or128>;
+defm VLD2b8wb : VLD2WB<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, IIC_VLD2u,
+ addrmode6align64or128>;
+defm VLD2b16wb : VLD2WB<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, IIC_VLD2u,
+ addrmode6align64or128>;
+defm VLD2b32wb : VLD2WB<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, IIC_VLD2u,
+ addrmode6align64or128>;
+
+// VLD3 : Vector Load (multiple 3-element structures)
+class VLD3D<bits<4> op11_8, bits<4> op7_4, string Dt>
+ : NLdSt<0, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3),
+ (ins addrmode6:$Rn), IIC_VLD3,
+ "vld3", Dt, "\\{$Vd, $dst2, $dst3\\}, $Rn", "", []> {
+ let Rm = 0b1111;
+ let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVLDST3Instruction";
+}
+
+def VLD3d8 : VLD3D<0b0100, {0,0,0,?}, "8">;
+def VLD3d16 : VLD3D<0b0100, {0,1,0,?}, "16">;
+def VLD3d32 : VLD3D<0b0100, {1,0,0,?}, "32">;
+
+def VLD3d8Pseudo : VLDQQPseudo<IIC_VLD3>;
+def VLD3d16Pseudo : VLDQQPseudo<IIC_VLD3>;
+def VLD3d32Pseudo : VLDQQPseudo<IIC_VLD3>;
+
+// ...with address register writeback:
+class VLD3DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
+ : NLdSt<0, 0b10, op11_8, op7_4,
+ (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb),
+ (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD3u,
+ "vld3", Dt, "\\{$Vd, $dst2, $dst3\\}, $Rn$Rm",
+ "$Rn.addr = $wb", []> {
+ let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVLDST3Instruction";
+}
+
+def VLD3d8_UPD : VLD3DWB<0b0100, {0,0,0,?}, "8">;
+def VLD3d16_UPD : VLD3DWB<0b0100, {0,1,0,?}, "16">;
+def VLD3d32_UPD : VLD3DWB<0b0100, {1,0,0,?}, "32">;
+
+def VLD3d8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3u>;
+def VLD3d16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3u>;
+def VLD3d32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3u>;
+
+// ...with double-spaced registers:
+def VLD3q8 : VLD3D<0b0101, {0,0,0,?}, "8">;
+def VLD3q16 : VLD3D<0b0101, {0,1,0,?}, "16">;
+def VLD3q32 : VLD3D<0b0101, {1,0,0,?}, "32">;
+def VLD3q8_UPD : VLD3DWB<0b0101, {0,0,0,?}, "8">;
+def VLD3q16_UPD : VLD3DWB<0b0101, {0,1,0,?}, "16">;
+def VLD3q32_UPD : VLD3DWB<0b0101, {1,0,0,?}, "32">;
+
+def VLD3q8Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>;
+def VLD3q16Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>;
+def VLD3q32Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>;
+
+// ...alternate versions to be allocated odd register numbers:
+def VLD3q8oddPseudo : VLDQQQQPseudo<IIC_VLD3>;
+def VLD3q16oddPseudo : VLDQQQQPseudo<IIC_VLD3>;
+def VLD3q32oddPseudo : VLDQQQQPseudo<IIC_VLD3>;
+
+def VLD3q8oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>;
+def VLD3q16oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>;
+def VLD3q32oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>;
+
+// VLD4 : Vector Load (multiple 4-element structures)
+class VLD4D<bits<4> op11_8, bits<4> op7_4, string Dt>
+ : NLdSt<0, 0b10, op11_8, op7_4,
+ (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4),
+ (ins addrmode6:$Rn), IIC_VLD4,
+ "vld4", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn", "", []> {
+ let Rm = 0b1111;
+ let Inst{5-4} = Rn{5-4};
+ let DecoderMethod = "DecodeVLDST4Instruction";
+}
+
+def VLD4d8 : VLD4D<0b0000, {0,0,?,?}, "8">;
+def VLD4d16 : VLD4D<0b0000, {0,1,?,?}, "16">;
+def VLD4d32 : VLD4D<0b0000, {1,0,?,?}, "32">;
+
+def VLD4d8Pseudo : VLDQQPseudo<IIC_VLD4>;
+def VLD4d16Pseudo : VLDQQPseudo<IIC_VLD4>;
+def VLD4d32Pseudo : VLDQQPseudo<IIC_VLD4>;
+
+// ...with address register writeback:
+class VLD4DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
+ : NLdSt<0, 0b10, op11_8, op7_4,
+ (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb),
+ (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD4u,
+ "vld4", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn$Rm",
+ "$Rn.addr = $wb", []> {
+ let Inst{5-4} = Rn{5-4};
+ let DecoderMethod = "DecodeVLDST4Instruction";
+}
+
+def VLD4d8_UPD : VLD4DWB<0b0000, {0,0,?,?}, "8">;
+def VLD4d16_UPD : VLD4DWB<0b0000, {0,1,?,?}, "16">;
+def VLD4d32_UPD : VLD4DWB<0b0000, {1,0,?,?}, "32">;
+
+def VLD4d8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4u>;
+def VLD4d16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4u>;
+def VLD4d32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4u>;
+
+// ...with double-spaced registers:
+def VLD4q8 : VLD4D<0b0001, {0,0,?,?}, "8">;
+def VLD4q16 : VLD4D<0b0001, {0,1,?,?}, "16">;
+def VLD4q32 : VLD4D<0b0001, {1,0,?,?}, "32">;
+def VLD4q8_UPD : VLD4DWB<0b0001, {0,0,?,?}, "8">;
+def VLD4q16_UPD : VLD4DWB<0b0001, {0,1,?,?}, "16">;
+def VLD4q32_UPD : VLD4DWB<0b0001, {1,0,?,?}, "32">;
+
+def VLD4q8Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>;
+def VLD4q16Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>;
+def VLD4q32Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>;
+
+// ...alternate versions to be allocated odd register numbers:
+def VLD4q8oddPseudo : VLDQQQQPseudo<IIC_VLD4>;
+def VLD4q16oddPseudo : VLDQQQQPseudo<IIC_VLD4>;
+def VLD4q32oddPseudo : VLDQQQQPseudo<IIC_VLD4>;
+
+def VLD4q8oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>;
+def VLD4q16oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>;
+def VLD4q32oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>;
+
+} // mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1
+
+// Classes for VLD*LN pseudo-instructions with multi-register operands.
+// These are expanded to real instructions after register allocation.
+class VLDQLNPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs QPR:$dst),
+ (ins addrmode6:$addr, QPR:$src, nohash_imm:$lane),
+ itin, "$src = $dst">;
+class VLDQLNWBPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs QPR:$dst, GPR:$wb),
+ (ins addrmode6:$addr, am6offset:$offset, QPR:$src,
+ nohash_imm:$lane), itin, "$addr.addr = $wb, $src = $dst">;
+class VLDQQLNPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs QQPR:$dst),
+ (ins addrmode6:$addr, QQPR:$src, nohash_imm:$lane),
+ itin, "$src = $dst">;
+class VLDQQLNWBPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb),
+ (ins addrmode6:$addr, am6offset:$offset, QQPR:$src,
+ nohash_imm:$lane), itin, "$addr.addr = $wb, $src = $dst">;
+class VLDQQQQLNPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs QQQQPR:$dst),
+ (ins addrmode6:$addr, QQQQPR:$src, nohash_imm:$lane),
+ itin, "$src = $dst">;
+class VLDQQQQLNWBPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs QQQQPR:$dst, GPR:$wb),
+ (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src,
+ nohash_imm:$lane), itin, "$addr.addr = $wb, $src = $dst">;
+
+// VLD1LN : Vector Load (single element to one lane)
+class VLD1LN<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty,
+ PatFrag LoadOp>
+ : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd),
+ (ins addrmode6:$Rn, DPR:$src, nohash_imm:$lane),
+ IIC_VLD1ln, "vld1", Dt, "\\{$Vd[$lane]\\}, $Rn",
+ "$src = $Vd",
+ [(set DPR:$Vd, (vector_insert (Ty DPR:$src),
+ (i32 (LoadOp addrmode6:$Rn)),
+ imm:$lane))]> {
+ let Rm = 0b1111;
+ let DecoderMethod = "DecodeVLD1LN";
+}
+class VLD1LN32<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty,
+ PatFrag LoadOp>
+ : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd),
+ (ins addrmode6oneL32:$Rn, DPR:$src, nohash_imm:$lane),
+ IIC_VLD1ln, "vld1", Dt, "\\{$Vd[$lane]\\}, $Rn",
+ "$src = $Vd",
+ [(set DPR:$Vd, (vector_insert (Ty DPR:$src),
+ (i32 (LoadOp addrmode6oneL32:$Rn)),
+ imm:$lane))]> {
+ let Rm = 0b1111;
+ let DecoderMethod = "DecodeVLD1LN";
+}
+class VLD1QLNPseudo<ValueType Ty, PatFrag LoadOp> : VLDQLNPseudo<IIC_VLD1ln> {
+ let Pattern = [(set QPR:$dst, (vector_insert (Ty QPR:$src),
+ (i32 (LoadOp addrmode6:$addr)),
+ imm:$lane))];
+}
+
+def VLD1LNd8 : VLD1LN<0b0000, {?,?,?,0}, "8", v8i8, extloadi8> {
+ let Inst{7-5} = lane{2-0};
+}
+def VLD1LNd16 : VLD1LN<0b0100, {?,?,0,?}, "16", v4i16, extloadi16> {
+ let Inst{7-6} = lane{1-0};
+ let Inst{5-4} = Rn{5-4};
+}
+def VLD1LNd32 : VLD1LN32<0b1000, {?,0,?,?}, "32", v2i32, load> {
+ let Inst{7} = lane{0};
+ let Inst{5-4} = Rn{5-4};
+}
+
+def VLD1LNq8Pseudo : VLD1QLNPseudo<v16i8, extloadi8>;
+def VLD1LNq16Pseudo : VLD1QLNPseudo<v8i16, extloadi16>;
+def VLD1LNq32Pseudo : VLD1QLNPseudo<v4i32, load>;
+
+def : Pat<(vector_insert (v2f32 DPR:$src),
+ (f32 (load addrmode6:$addr)), imm:$lane),
+ (VLD1LNd32 addrmode6:$addr, DPR:$src, imm:$lane)>;
+def : Pat<(vector_insert (v4f32 QPR:$src),
+ (f32 (load addrmode6:$addr)), imm:$lane),
+ (VLD1LNq32Pseudo addrmode6:$addr, QPR:$src, imm:$lane)>;
+
+let mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 in {
+
+// ...with address register writeback:
+class VLD1LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
+ : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, GPR:$wb),
+ (ins addrmode6:$Rn, am6offset:$Rm,
+ DPR:$src, nohash_imm:$lane), IIC_VLD1lnu, "vld1", Dt,
+ "\\{$Vd[$lane]\\}, $Rn$Rm",
+ "$src = $Vd, $Rn.addr = $wb", []> {
+ let DecoderMethod = "DecodeVLD1LN";
+}
+
+def VLD1LNd8_UPD : VLD1LNWB<0b0000, {?,?,?,0}, "8"> {
+ let Inst{7-5} = lane{2-0};
+}
+def VLD1LNd16_UPD : VLD1LNWB<0b0100, {?,?,0,?}, "16"> {
+ let Inst{7-6} = lane{1-0};
+ let Inst{4} = Rn{4};
+}
+def VLD1LNd32_UPD : VLD1LNWB<0b1000, {?,0,?,?}, "32"> {
+ let Inst{7} = lane{0};
+ let Inst{5} = Rn{4};
+ let Inst{4} = Rn{4};
+}
+
+def VLD1LNq8Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD1lnu>;
+def VLD1LNq16Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD1lnu>;
+def VLD1LNq32Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD1lnu>;
+
+// VLD2LN : Vector Load (single 2-element structure to one lane)
+class VLD2LN<bits<4> op11_8, bits<4> op7_4, string Dt>
+ : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2),
+ (ins addrmode6:$Rn, DPR:$src1, DPR:$src2, nohash_imm:$lane),
+ IIC_VLD2ln, "vld2", Dt, "\\{$Vd[$lane], $dst2[$lane]\\}, $Rn",
+ "$src1 = $Vd, $src2 = $dst2", []> {
+ let Rm = 0b1111;
+ let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVLD2LN";
+}
+
+def VLD2LNd8 : VLD2LN<0b0001, {?,?,?,?}, "8"> {
+ let Inst{7-5} = lane{2-0};
+}
+def VLD2LNd16 : VLD2LN<0b0101, {?,?,0,?}, "16"> {
+ let Inst{7-6} = lane{1-0};
+}
+def VLD2LNd32 : VLD2LN<0b1001, {?,0,0,?}, "32"> {
+ let Inst{7} = lane{0};
+}
+
+def VLD2LNd8Pseudo : VLDQLNPseudo<IIC_VLD2ln>;
+def VLD2LNd16Pseudo : VLDQLNPseudo<IIC_VLD2ln>;
+def VLD2LNd32Pseudo : VLDQLNPseudo<IIC_VLD2ln>;
+
+// ...with double-spaced registers:
+def VLD2LNq16 : VLD2LN<0b0101, {?,?,1,?}, "16"> {
+ let Inst{7-6} = lane{1-0};
+}
+def VLD2LNq32 : VLD2LN<0b1001, {?,1,0,?}, "32"> {
+ let Inst{7} = lane{0};
+}
+
+def VLD2LNq16Pseudo : VLDQQLNPseudo<IIC_VLD2ln>;
+def VLD2LNq32Pseudo : VLDQQLNPseudo<IIC_VLD2ln>;
+
+// ...with address register writeback:
+class VLD2LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
+ : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, GPR:$wb),
+ (ins addrmode6:$Rn, am6offset:$Rm,
+ DPR:$src1, DPR:$src2, nohash_imm:$lane), IIC_VLD2lnu, "vld2", Dt,
+ "\\{$Vd[$lane], $dst2[$lane]\\}, $Rn$Rm",
+ "$src1 = $Vd, $src2 = $dst2, $Rn.addr = $wb", []> {
+ let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVLD2LN";
+}
+
+def VLD2LNd8_UPD : VLD2LNWB<0b0001, {?,?,?,?}, "8"> {
+ let Inst{7-5} = lane{2-0};
+}
+def VLD2LNd16_UPD : VLD2LNWB<0b0101, {?,?,0,?}, "16"> {
+ let Inst{7-6} = lane{1-0};
+}
+def VLD2LNd32_UPD : VLD2LNWB<0b1001, {?,0,0,?}, "32"> {
+ let Inst{7} = lane{0};
+}
+
+def VLD2LNd8Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD2lnu>;
+def VLD2LNd16Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD2lnu>;
+def VLD2LNd32Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD2lnu>;
+
+def VLD2LNq16_UPD : VLD2LNWB<0b0101, {?,?,1,?}, "16"> {
+ let Inst{7-6} = lane{1-0};
+}
+def VLD2LNq32_UPD : VLD2LNWB<0b1001, {?,1,0,?}, "32"> {
+ let Inst{7} = lane{0};
+}
+
+def VLD2LNq16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD2lnu>;
+def VLD2LNq32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD2lnu>;
+
+// VLD3LN : Vector Load (single 3-element structure to one lane)
+class VLD3LN<bits<4> op11_8, bits<4> op7_4, string Dt>
+ : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3),
+ (ins addrmode6:$Rn, DPR:$src1, DPR:$src2, DPR:$src3,
+ nohash_imm:$lane), IIC_VLD3ln, "vld3", Dt,
+ "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane]\\}, $Rn",
+ "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3", []> {
+ let Rm = 0b1111;
+ let DecoderMethod = "DecodeVLD3LN";
+}
+
+def VLD3LNd8 : VLD3LN<0b0010, {?,?,?,0}, "8"> {
+ let Inst{7-5} = lane{2-0};
+}
+def VLD3LNd16 : VLD3LN<0b0110, {?,?,0,0}, "16"> {
+ let Inst{7-6} = lane{1-0};
+}
+def VLD3LNd32 : VLD3LN<0b1010, {?,0,0,0}, "32"> {
+ let Inst{7} = lane{0};
+}
+
+def VLD3LNd8Pseudo : VLDQQLNPseudo<IIC_VLD3ln>;
+def VLD3LNd16Pseudo : VLDQQLNPseudo<IIC_VLD3ln>;
+def VLD3LNd32Pseudo : VLDQQLNPseudo<IIC_VLD3ln>;
+
+// ...with double-spaced registers:
+def VLD3LNq16 : VLD3LN<0b0110, {?,?,1,0}, "16"> {
+ let Inst{7-6} = lane{1-0};
+}
+def VLD3LNq32 : VLD3LN<0b1010, {?,1,0,0}, "32"> {
+ let Inst{7} = lane{0};
+}
+
+def VLD3LNq16Pseudo : VLDQQQQLNPseudo<IIC_VLD3ln>;
+def VLD3LNq32Pseudo : VLDQQQQLNPseudo<IIC_VLD3ln>;
+
+// ...with address register writeback:
+class VLD3LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
+ : NLdStLn<1, 0b10, op11_8, op7_4,
+ (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb),
+ (ins addrmode6:$Rn, am6offset:$Rm,
+ DPR:$src1, DPR:$src2, DPR:$src3, nohash_imm:$lane),
+ IIC_VLD3lnu, "vld3", Dt,
+ "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane]\\}, $Rn$Rm",
+ "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $Rn.addr = $wb",
+ []> {
+ let DecoderMethod = "DecodeVLD3LN";
+}
+
+def VLD3LNd8_UPD : VLD3LNWB<0b0010, {?,?,?,0}, "8"> {
+ let Inst{7-5} = lane{2-0};
+}
+def VLD3LNd16_UPD : VLD3LNWB<0b0110, {?,?,0,0}, "16"> {
+ let Inst{7-6} = lane{1-0};
+}
+def VLD3LNd32_UPD : VLD3LNWB<0b1010, {?,0,0,0}, "32"> {
+ let Inst{7} = lane{0};
+}
+
+def VLD3LNd8Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3lnu>;
+def VLD3LNd16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3lnu>;
+def VLD3LNd32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3lnu>;
+
+def VLD3LNq16_UPD : VLD3LNWB<0b0110, {?,?,1,0}, "16"> {
+ let Inst{7-6} = lane{1-0};
+}
+def VLD3LNq32_UPD : VLD3LNWB<0b1010, {?,1,0,0}, "32"> {
+ let Inst{7} = lane{0};
+}
+
+def VLD3LNq16Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD3lnu>;
+def VLD3LNq32Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD3lnu>;
+
+// VLD4LN : Vector Load (single 4-element structure to one lane)
+class VLD4LN<bits<4> op11_8, bits<4> op7_4, string Dt>
+ : NLdStLn<1, 0b10, op11_8, op7_4,
+ (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4),
+ (ins addrmode6:$Rn, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4,
+ nohash_imm:$lane), IIC_VLD4ln, "vld4", Dt,
+ "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane], $dst4[$lane]\\}, $Rn",
+ "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4", []> {
+ let Rm = 0b1111;
+ let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVLD4LN";
+}
+
+def VLD4LNd8 : VLD4LN<0b0011, {?,?,?,?}, "8"> {
+ let Inst{7-5} = lane{2-0};
+}
+def VLD4LNd16 : VLD4LN<0b0111, {?,?,0,?}, "16"> {
+ let Inst{7-6} = lane{1-0};
+}
+def VLD4LNd32 : VLD4LN<0b1011, {?,0,?,?}, "32"> {
+ let Inst{7} = lane{0};
+ let Inst{5} = Rn{5};
+}
+
+def VLD4LNd8Pseudo : VLDQQLNPseudo<IIC_VLD4ln>;
+def VLD4LNd16Pseudo : VLDQQLNPseudo<IIC_VLD4ln>;
+def VLD4LNd32Pseudo : VLDQQLNPseudo<IIC_VLD4ln>;
+
+// ...with double-spaced registers:
+def VLD4LNq16 : VLD4LN<0b0111, {?,?,1,?}, "16"> {
+ let Inst{7-6} = lane{1-0};
+}
+def VLD4LNq32 : VLD4LN<0b1011, {?,1,?,?}, "32"> {
+ let Inst{7} = lane{0};
+ let Inst{5} = Rn{5};
+}
+
+def VLD4LNq16Pseudo : VLDQQQQLNPseudo<IIC_VLD4ln>;
+def VLD4LNq32Pseudo : VLDQQQQLNPseudo<IIC_VLD4ln>;
+
+// ...with address register writeback:
+class VLD4LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
+ : NLdStLn<1, 0b10, op11_8, op7_4,
+ (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb),
+ (ins addrmode6:$Rn, am6offset:$Rm,
+ DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4, nohash_imm:$lane),
+ IIC_VLD4lnu, "vld4", Dt,
+"\\{$Vd[$lane], $dst2[$lane], $dst3[$lane], $dst4[$lane]\\}, $Rn$Rm",
+"$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4, $Rn.addr = $wb",
+ []> {
+ let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVLD4LN" ;
+}
+
+def VLD4LNd8_UPD : VLD4LNWB<0b0011, {?,?,?,?}, "8"> {
+ let Inst{7-5} = lane{2-0};
+}
+def VLD4LNd16_UPD : VLD4LNWB<0b0111, {?,?,0,?}, "16"> {
+ let Inst{7-6} = lane{1-0};
+}
+def VLD4LNd32_UPD : VLD4LNWB<0b1011, {?,0,?,?}, "32"> {
+ let Inst{7} = lane{0};
+ let Inst{5} = Rn{5};
+}
+
+def VLD4LNd8Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4lnu>;
+def VLD4LNd16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4lnu>;
+def VLD4LNd32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4lnu>;
+
+def VLD4LNq16_UPD : VLD4LNWB<0b0111, {?,?,1,?}, "16"> {
+ let Inst{7-6} = lane{1-0};
+}
+def VLD4LNq32_UPD : VLD4LNWB<0b1011, {?,1,?,?}, "32"> {
+ let Inst{7} = lane{0};
+ let Inst{5} = Rn{5};
+}
+
+def VLD4LNq16Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD4lnu>;
+def VLD4LNq32Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD4lnu>;
+
+} // mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1
+
+// VLD1DUP : Vector Load (single element to all lanes)
+class VLD1DUP<bits<4> op7_4, string Dt, ValueType Ty, PatFrag LoadOp,
+ Operand AddrMode>
+ : NLdSt<1, 0b10, 0b1100, op7_4, (outs VecListOneDAllLanes:$Vd),
+ (ins AddrMode:$Rn),
+ IIC_VLD1dup, "vld1", Dt, "$Vd, $Rn", "",
+ [(set VecListOneDAllLanes:$Vd,
+ (Ty (NEONvdup (i32 (LoadOp AddrMode:$Rn)))))]> {
+ let Rm = 0b1111;
+ let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVLD1DupInstruction";
+}
+def VLD1DUPd8 : VLD1DUP<{0,0,0,?}, "8", v8i8, extloadi8,
+ addrmode6dupalignNone>;
+def VLD1DUPd16 : VLD1DUP<{0,1,0,?}, "16", v4i16, extloadi16,
+ addrmode6dupalign16>;
+def VLD1DUPd32 : VLD1DUP<{1,0,0,?}, "32", v2i32, load,
+ addrmode6dupalign32>;
+
+def : Pat<(v2f32 (NEONvdup (f32 (load addrmode6dup:$addr)))),
+ (VLD1DUPd32 addrmode6:$addr)>;
+
+class VLD1QDUP<bits<4> op7_4, string Dt, ValueType Ty, PatFrag LoadOp,
+ Operand AddrMode>
+ : NLdSt<1, 0b10, 0b1100, op7_4, (outs VecListDPairAllLanes:$Vd),
+ (ins AddrMode:$Rn), IIC_VLD1dup,
+ "vld1", Dt, "$Vd, $Rn", "",
+ [(set VecListDPairAllLanes:$Vd,
+ (Ty (NEONvdup (i32 (LoadOp AddrMode:$Rn)))))]> {
+ let Rm = 0b1111;
+ let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVLD1DupInstruction";
+}
+
+def VLD1DUPq8 : VLD1QDUP<{0,0,1,0}, "8", v16i8, extloadi8,
+ addrmode6dupalignNone>;
+def VLD1DUPq16 : VLD1QDUP<{0,1,1,?}, "16", v8i16, extloadi16,
+ addrmode6dupalign16>;
+def VLD1DUPq32 : VLD1QDUP<{1,0,1,?}, "32", v4i32, load,
+ addrmode6dupalign32>;
+
+def : Pat<(v4f32 (NEONvdup (f32 (load addrmode6dup:$addr)))),
+ (VLD1DUPq32 addrmode6:$addr)>;
+
+let mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 in {
+// ...with address register writeback:
+multiclass VLD1DUPWB<bits<4> op7_4, string Dt, Operand AddrMode> {
+ def _fixed : NLdSt<1, 0b10, 0b1100, op7_4,
+ (outs VecListOneDAllLanes:$Vd, GPR:$wb),
+ (ins AddrMode:$Rn), IIC_VLD1dupu,
+ "vld1", Dt, "$Vd, $Rn!",
+ "$Rn.addr = $wb", []> {
+ let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
+ let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVLD1DupInstruction";
+ }
+ def _register : NLdSt<1, 0b10, 0b1100, op7_4,
+ (outs VecListOneDAllLanes:$Vd, GPR:$wb),
+ (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1dupu,
+ "vld1", Dt, "$Vd, $Rn, $Rm",
+ "$Rn.addr = $wb", []> {
+ let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVLD1DupInstruction";
+ }
+}
+multiclass VLD1QDUPWB<bits<4> op7_4, string Dt, Operand AddrMode> {
+ def _fixed : NLdSt<1, 0b10, 0b1100, op7_4,
+ (outs VecListDPairAllLanes:$Vd, GPR:$wb),
+ (ins AddrMode:$Rn), IIC_VLD1dupu,
+ "vld1", Dt, "$Vd, $Rn!",
+ "$Rn.addr = $wb", []> {
+ let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
+ let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVLD1DupInstruction";
+ }
+ def _register : NLdSt<1, 0b10, 0b1100, op7_4,
+ (outs VecListDPairAllLanes:$Vd, GPR:$wb),
+ (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1dupu,
+ "vld1", Dt, "$Vd, $Rn, $Rm",
+ "$Rn.addr = $wb", []> {
+ let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVLD1DupInstruction";
+ }
+}
+
+defm VLD1DUPd8wb : VLD1DUPWB<{0,0,0,0}, "8", addrmode6dupalignNone>;
+defm VLD1DUPd16wb : VLD1DUPWB<{0,1,0,?}, "16", addrmode6dupalign16>;
+defm VLD1DUPd32wb : VLD1DUPWB<{1,0,0,?}, "32", addrmode6dupalign32>;
+
+defm VLD1DUPq8wb : VLD1QDUPWB<{0,0,1,0}, "8", addrmode6dupalignNone>;
+defm VLD1DUPq16wb : VLD1QDUPWB<{0,1,1,?}, "16", addrmode6dupalign16>;
+defm VLD1DUPq32wb : VLD1QDUPWB<{1,0,1,?}, "32", addrmode6dupalign32>;
+
+// VLD2DUP : Vector Load (single 2-element structure to all lanes)
+class VLD2DUP<bits<4> op7_4, string Dt, RegisterOperand VdTy, Operand AddrMode>
+ : NLdSt<1, 0b10, 0b1101, op7_4, (outs VdTy:$Vd),
+ (ins AddrMode:$Rn), IIC_VLD2dup,
+ "vld2", Dt, "$Vd, $Rn", "", []> {
+ let Rm = 0b1111;
+ let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVLD2DupInstruction";
+}
+
+def VLD2DUPd8 : VLD2DUP<{0,0,0,?}, "8", VecListDPairAllLanes,
+ addrmode6dupalign16>;
+def VLD2DUPd16 : VLD2DUP<{0,1,0,?}, "16", VecListDPairAllLanes,
+ addrmode6dupalign32>;
+def VLD2DUPd32 : VLD2DUP<{1,0,0,?}, "32", VecListDPairAllLanes,
+ addrmode6dupalign64>;
+
+// HACK this one, VLD2DUPd8x2 must be changed at the same time with VLD2b8 or
+// "vld2.8 {d0[], d2[]}, [r4:32]" will become "vld2.8 {d0, d2}, [r4:32]".
+// ...with double-spaced registers
+def VLD2DUPd8x2 : VLD2DUP<{0,0,1,?}, "8", VecListDPairSpacedAllLanes,
+ addrmode6dupalign16>;
+def VLD2DUPd16x2 : VLD2DUP<{0,1,1,?}, "16", VecListDPairSpacedAllLanes,
+ addrmode6dupalign32>;
+def VLD2DUPd32x2 : VLD2DUP<{1,0,1,?}, "32", VecListDPairSpacedAllLanes,
+ addrmode6dupalign64>;
+
+// ...with address register writeback:
+multiclass VLD2DUPWB<bits<4> op7_4, string Dt, RegisterOperand VdTy,
+ Operand AddrMode> {
+ def _fixed : NLdSt<1, 0b10, 0b1101, op7_4,
+ (outs VdTy:$Vd, GPR:$wb),
+ (ins AddrMode:$Rn), IIC_VLD2dupu,
+ "vld2", Dt, "$Vd, $Rn!",
+ "$Rn.addr = $wb", []> {
+ let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
+ let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVLD2DupInstruction";
+ }
+ def _register : NLdSt<1, 0b10, 0b1101, op7_4,
+ (outs VdTy:$Vd, GPR:$wb),
+ (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD2dupu,
+ "vld2", Dt, "$Vd, $Rn, $Rm",
+ "$Rn.addr = $wb", []> {
+ let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVLD2DupInstruction";
+ }
+}
+
+defm VLD2DUPd8wb : VLD2DUPWB<{0,0,0,0}, "8", VecListDPairAllLanes,
+ addrmode6dupalign16>;
+defm VLD2DUPd16wb : VLD2DUPWB<{0,1,0,?}, "16", VecListDPairAllLanes,
+ addrmode6dupalign32>;
+defm VLD2DUPd32wb : VLD2DUPWB<{1,0,0,?}, "32", VecListDPairAllLanes,
+ addrmode6dupalign64>;
+
+defm VLD2DUPd8x2wb : VLD2DUPWB<{0,0,1,0}, "8", VecListDPairSpacedAllLanes,
+ addrmode6dupalign16>;
+defm VLD2DUPd16x2wb : VLD2DUPWB<{0,1,1,?}, "16", VecListDPairSpacedAllLanes,
+ addrmode6dupalign32>;
+defm VLD2DUPd32x2wb : VLD2DUPWB<{1,0,1,?}, "32", VecListDPairSpacedAllLanes,
+ addrmode6dupalign64>;
+
+// VLD3DUP : Vector Load (single 3-element structure to all lanes)
+class VLD3DUP<bits<4> op7_4, string Dt>
+ : NLdSt<1, 0b10, 0b1110, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3),
+ (ins addrmode6dup:$Rn), IIC_VLD3dup,
+ "vld3", Dt, "\\{$Vd[], $dst2[], $dst3[]\\}, $Rn", "", []> {
+ let Rm = 0b1111;
+ let Inst{4} = 0;
+ let DecoderMethod = "DecodeVLD3DupInstruction";
+}
+
+def VLD3DUPd8 : VLD3DUP<{0,0,0,?}, "8">;
+def VLD3DUPd16 : VLD3DUP<{0,1,0,?}, "16">;
+def VLD3DUPd32 : VLD3DUP<{1,0,0,?}, "32">;
+
+def VLD3DUPd8Pseudo : VLDQQPseudo<IIC_VLD3dup>;
+def VLD3DUPd16Pseudo : VLDQQPseudo<IIC_VLD3dup>;
+def VLD3DUPd32Pseudo : VLDQQPseudo<IIC_VLD3dup>;
+
+// ...with double-spaced registers (not used for codegen):
+def VLD3DUPq8 : VLD3DUP<{0,0,1,?}, "8">;
+def VLD3DUPq16 : VLD3DUP<{0,1,1,?}, "16">;
+def VLD3DUPq32 : VLD3DUP<{1,0,1,?}, "32">;
+
+// ...with address register writeback:
+class VLD3DUPWB<bits<4> op7_4, string Dt, Operand AddrMode>
+ : NLdSt<1, 0b10, 0b1110, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb),
+ (ins AddrMode:$Rn, am6offset:$Rm), IIC_VLD3dupu,
+ "vld3", Dt, "\\{$Vd[], $dst2[], $dst3[]\\}, $Rn$Rm",
+ "$Rn.addr = $wb", []> {
+ let Inst{4} = 0;
+ let DecoderMethod = "DecodeVLD3DupInstruction";
+}
+
+def VLD3DUPd8_UPD : VLD3DUPWB<{0,0,0,0}, "8", addrmode6dupalign64>;
+def VLD3DUPd16_UPD : VLD3DUPWB<{0,1,0,?}, "16", addrmode6dupalign64>;
+def VLD3DUPd32_UPD : VLD3DUPWB<{1,0,0,?}, "32", addrmode6dupalign64>;
+
+def VLD3DUPq8_UPD : VLD3DUPWB<{0,0,1,0}, "8", addrmode6dupalign64>;
+def VLD3DUPq16_UPD : VLD3DUPWB<{0,1,1,?}, "16", addrmode6dupalign64>;
+def VLD3DUPq32_UPD : VLD3DUPWB<{1,0,1,?}, "32", addrmode6dupalign64>;
+
+def VLD3DUPd8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>;
+def VLD3DUPd16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>;
+def VLD3DUPd32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>;
+
+// VLD4DUP : Vector Load (single 4-element structure to all lanes)
+class VLD4DUP<bits<4> op7_4, string Dt>
+ : NLdSt<1, 0b10, 0b1111, op7_4,
+ (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4),
+ (ins addrmode6dup:$Rn), IIC_VLD4dup,
+ "vld4", Dt, "\\{$Vd[], $dst2[], $dst3[], $dst4[]\\}, $Rn", "", []> {
+ let Rm = 0b1111;
+ let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVLD4DupInstruction";
+}
+
+def VLD4DUPd8 : VLD4DUP<{0,0,0,?}, "8">;
+def VLD4DUPd16 : VLD4DUP<{0,1,0,?}, "16">;
+def VLD4DUPd32 : VLD4DUP<{1,?,0,?}, "32"> { let Inst{6} = Rn{5}; }
+
+def VLD4DUPd8Pseudo : VLDQQPseudo<IIC_VLD4dup>;
+def VLD4DUPd16Pseudo : VLDQQPseudo<IIC_VLD4dup>;
+def VLD4DUPd32Pseudo : VLDQQPseudo<IIC_VLD4dup>;
+
+// ...with double-spaced registers (not used for codegen):
+def VLD4DUPq8 : VLD4DUP<{0,0,1,?}, "8">;
+def VLD4DUPq16 : VLD4DUP<{0,1,1,?}, "16">;
+def VLD4DUPq32 : VLD4DUP<{1,?,1,?}, "32"> { let Inst{6} = Rn{5}; }
+
+// ...with address register writeback:
+class VLD4DUPWB<bits<4> op7_4, string Dt>
+ : NLdSt<1, 0b10, 0b1111, op7_4,
+ (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb),
+ (ins addrmode6dup:$Rn, am6offset:$Rm), IIC_VLD4dupu,
+ "vld4", Dt, "\\{$Vd[], $dst2[], $dst3[], $dst4[]\\}, $Rn$Rm",
+ "$Rn.addr = $wb", []> {
+ let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVLD4DupInstruction";
+}
+
+def VLD4DUPd8_UPD : VLD4DUPWB<{0,0,0,0}, "8">;
+def VLD4DUPd16_UPD : VLD4DUPWB<{0,1,0,?}, "16">;
+def VLD4DUPd32_UPD : VLD4DUPWB<{1,?,0,?}, "32"> { let Inst{6} = Rn{5}; }
+
+def VLD4DUPq8_UPD : VLD4DUPWB<{0,0,1,0}, "8">;
+def VLD4DUPq16_UPD : VLD4DUPWB<{0,1,1,?}, "16">;
+def VLD4DUPq32_UPD : VLD4DUPWB<{1,?,1,?}, "32"> { let Inst{6} = Rn{5}; }
+
+def VLD4DUPd8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4dupu>;
+def VLD4DUPd16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4dupu>;
+def VLD4DUPd32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4dupu>;
+
+} // mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1
+
+let mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1 in {
+
+// Classes for VST* pseudo-instructions with multi-register operands.
+// These are expanded to real instructions after register allocation.
+class VSTQPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs), (ins addrmode6:$addr, QPR:$src), itin, "">;
+class VSTQWBPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs GPR:$wb),
+ (ins addrmode6:$addr, am6offset:$offset, QPR:$src), itin,
+ "$addr.addr = $wb">;
+class VSTQWBfixedPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs GPR:$wb),
+ (ins addrmode6:$addr, QPR:$src), itin,
+ "$addr.addr = $wb">;
+class VSTQWBregisterPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs GPR:$wb),
+ (ins addrmode6:$addr, rGPR:$offset, QPR:$src), itin,
+ "$addr.addr = $wb">;
+class VSTQQPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQPR:$src), itin, "">;
+class VSTQQWBPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs GPR:$wb),
+ (ins addrmode6:$addr, am6offset:$offset, QQPR:$src), itin,
+ "$addr.addr = $wb">;
+class VSTQQWBfixedPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs GPR:$wb),
+ (ins addrmode6:$addr, QQPR:$src), itin,
+ "$addr.addr = $wb">;
+class VSTQQWBregisterPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs GPR:$wb),
+ (ins addrmode6:$addr, rGPR:$offset, QQPR:$src), itin,
+ "$addr.addr = $wb">;
+
+class VSTQQQQPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQQQPR:$src), itin, "">;
+class VSTQQQQWBPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs GPR:$wb),
+ (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src), itin,
+ "$addr.addr = $wb">;
+
+// VST1 : Vector Store (multiple single elements)
+class VST1D<bits<4> op7_4, string Dt, Operand AddrMode>
+ : NLdSt<0,0b00,0b0111,op7_4, (outs), (ins AddrMode:$Rn, VecListOneD:$Vd),
+ IIC_VST1, "vst1", Dt, "$Vd, $Rn", "", []> {
+ let Rm = 0b1111;
+ let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVLDST1Instruction";
+}
+class VST1Q<bits<4> op7_4, string Dt, Operand AddrMode>
+ : NLdSt<0,0b00,0b1010,op7_4, (outs), (ins AddrMode:$Rn, VecListDPair:$Vd),
+ IIC_VST1x2, "vst1", Dt, "$Vd, $Rn", "", []> {
+ let Rm = 0b1111;
+ let Inst{5-4} = Rn{5-4};
+ let DecoderMethod = "DecodeVLDST1Instruction";
+}
+
+def VST1d8 : VST1D<{0,0,0,?}, "8", addrmode6align64>;
+def VST1d16 : VST1D<{0,1,0,?}, "16", addrmode6align64>;
+def VST1d32 : VST1D<{1,0,0,?}, "32", addrmode6align64>;
+def VST1d64 : VST1D<{1,1,0,?}, "64", addrmode6align64>;
+
+def VST1q8 : VST1Q<{0,0,?,?}, "8", addrmode6align64or128>;
+def VST1q16 : VST1Q<{0,1,?,?}, "16", addrmode6align64or128>;
+def VST1q32 : VST1Q<{1,0,?,?}, "32", addrmode6align64or128>;
+def VST1q64 : VST1Q<{1,1,?,?}, "64", addrmode6align64or128>;
+
+// ...with address register writeback:
+multiclass VST1DWB<bits<4> op7_4, string Dt, Operand AddrMode> {
+ def _fixed : NLdSt<0,0b00, 0b0111,op7_4, (outs GPR:$wb),
+ (ins AddrMode:$Rn, VecListOneD:$Vd), IIC_VLD1u,
+ "vst1", Dt, "$Vd, $Rn!",
+ "$Rn.addr = $wb", []> {
+ let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
+ let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVLDST1Instruction";
+ }
+ def _register : NLdSt<0,0b00,0b0111,op7_4, (outs GPR:$wb),
+ (ins AddrMode:$Rn, rGPR:$Rm, VecListOneD:$Vd),
+ IIC_VLD1u,
+ "vst1", Dt, "$Vd, $Rn, $Rm",
+ "$Rn.addr = $wb", []> {
+ let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVLDST1Instruction";
+ }
+}
+multiclass VST1QWB<bits<4> op7_4, string Dt, Operand AddrMode> {
+ def _fixed : NLdSt<0,0b00,0b1010,op7_4, (outs GPR:$wb),
+ (ins AddrMode:$Rn, VecListDPair:$Vd), IIC_VLD1x2u,
+ "vst1", Dt, "$Vd, $Rn!",
+ "$Rn.addr = $wb", []> {
+ let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
+ let Inst{5-4} = Rn{5-4};
+ let DecoderMethod = "DecodeVLDST1Instruction";
+ }
+ def _register : NLdSt<0,0b00,0b1010,op7_4, (outs GPR:$wb),
+ (ins AddrMode:$Rn, rGPR:$Rm, VecListDPair:$Vd),
+ IIC_VLD1x2u,
+ "vst1", Dt, "$Vd, $Rn, $Rm",
+ "$Rn.addr = $wb", []> {
+ let Inst{5-4} = Rn{5-4};
+ let DecoderMethod = "DecodeVLDST1Instruction";
+ }
+}
+
+defm VST1d8wb : VST1DWB<{0,0,0,?}, "8", addrmode6align64>;
+defm VST1d16wb : VST1DWB<{0,1,0,?}, "16", addrmode6align64>;
+defm VST1d32wb : VST1DWB<{1,0,0,?}, "32", addrmode6align64>;
+defm VST1d64wb : VST1DWB<{1,1,0,?}, "64", addrmode6align64>;
+
+defm VST1q8wb : VST1QWB<{0,0,?,?}, "8", addrmode6align64or128>;
+defm VST1q16wb : VST1QWB<{0,1,?,?}, "16", addrmode6align64or128>;
+defm VST1q32wb : VST1QWB<{1,0,?,?}, "32", addrmode6align64or128>;
+defm VST1q64wb : VST1QWB<{1,1,?,?}, "64", addrmode6align64or128>;
+
+// ...with 3 registers
+class VST1D3<bits<4> op7_4, string Dt, Operand AddrMode>
+ : NLdSt<0, 0b00, 0b0110, op7_4, (outs),
+ (ins AddrMode:$Rn, VecListThreeD:$Vd),
+ IIC_VST1x3, "vst1", Dt, "$Vd, $Rn", "", []> {
+ let Rm = 0b1111;
+ let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVLDST1Instruction";
+}
+multiclass VST1D3WB<bits<4> op7_4, string Dt, Operand AddrMode> {
+ def _fixed : NLdSt<0,0b00,0b0110,op7_4, (outs GPR:$wb),
+ (ins AddrMode:$Rn, VecListThreeD:$Vd), IIC_VLD1x3u,
+ "vst1", Dt, "$Vd, $Rn!",
+ "$Rn.addr = $wb", []> {
+ let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
+ let Inst{5-4} = Rn{5-4};
+ let DecoderMethod = "DecodeVLDST1Instruction";
+ }
+ def _register : NLdSt<0,0b00,0b0110,op7_4, (outs GPR:$wb),
+ (ins AddrMode:$Rn, rGPR:$Rm, VecListThreeD:$Vd),
+ IIC_VLD1x3u,
+ "vst1", Dt, "$Vd, $Rn, $Rm",
+ "$Rn.addr = $wb", []> {
+ let Inst{5-4} = Rn{5-4};
+ let DecoderMethod = "DecodeVLDST1Instruction";
+ }
+}
+
+def VST1d8T : VST1D3<{0,0,0,?}, "8", addrmode6align64>;
+def VST1d16T : VST1D3<{0,1,0,?}, "16", addrmode6align64>;
+def VST1d32T : VST1D3<{1,0,0,?}, "32", addrmode6align64>;
+def VST1d64T : VST1D3<{1,1,0,?}, "64", addrmode6align64>;
+
+defm VST1d8Twb : VST1D3WB<{0,0,0,?}, "8", addrmode6align64>;
+defm VST1d16Twb : VST1D3WB<{0,1,0,?}, "16", addrmode6align64>;
+defm VST1d32Twb : VST1D3WB<{1,0,0,?}, "32", addrmode6align64>;
+defm VST1d64Twb : VST1D3WB<{1,1,0,?}, "64", addrmode6align64>;
+
+def VST1d64TPseudo : VSTQQPseudo<IIC_VST1x3>;
+def VST1d64TPseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST1x3u>;
+def VST1d64TPseudoWB_register : VSTQQWBPseudo<IIC_VST1x3u>;
+
+// ...with 4 registers
+class VST1D4<bits<4> op7_4, string Dt, Operand AddrMode>
+ : NLdSt<0, 0b00, 0b0010, op7_4, (outs),
+ (ins AddrMode:$Rn, VecListFourD:$Vd),
+ IIC_VST1x4, "vst1", Dt, "$Vd, $Rn", "",
+ []> {
+ let Rm = 0b1111;
+ let Inst{5-4} = Rn{5-4};
+ let DecoderMethod = "DecodeVLDST1Instruction";
+}
+multiclass VST1D4WB<bits<4> op7_4, string Dt, Operand AddrMode> {
+ def _fixed : NLdSt<0,0b00,0b0010,op7_4, (outs GPR:$wb),
+ (ins AddrMode:$Rn, VecListFourD:$Vd), IIC_VLD1x4u,
+ "vst1", Dt, "$Vd, $Rn!",
+ "$Rn.addr = $wb", []> {
+ let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
+ let Inst{5-4} = Rn{5-4};
+ let DecoderMethod = "DecodeVLDST1Instruction";
+ }
+ def _register : NLdSt<0,0b00,0b0010,op7_4, (outs GPR:$wb),
+ (ins AddrMode:$Rn, rGPR:$Rm, VecListFourD:$Vd),
+ IIC_VLD1x4u,
+ "vst1", Dt, "$Vd, $Rn, $Rm",
+ "$Rn.addr = $wb", []> {
+ let Inst{5-4} = Rn{5-4};
+ let DecoderMethod = "DecodeVLDST1Instruction";
+ }
+}
+
+def VST1d8Q : VST1D4<{0,0,?,?}, "8", addrmode6align64or128or256>;
+def VST1d16Q : VST1D4<{0,1,?,?}, "16", addrmode6align64or128or256>;
+def VST1d32Q : VST1D4<{1,0,?,?}, "32", addrmode6align64or128or256>;
+def VST1d64Q : VST1D4<{1,1,?,?}, "64", addrmode6align64or128or256>;
+
+defm VST1d8Qwb : VST1D4WB<{0,0,?,?}, "8", addrmode6align64or128or256>;
+defm VST1d16Qwb : VST1D4WB<{0,1,?,?}, "16", addrmode6align64or128or256>;
+defm VST1d32Qwb : VST1D4WB<{1,0,?,?}, "32", addrmode6align64or128or256>;
+defm VST1d64Qwb : VST1D4WB<{1,1,?,?}, "64", addrmode6align64or128or256>;
+
+def VST1d64QPseudo : VSTQQPseudo<IIC_VST1x4>;
+def VST1d64QPseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST1x4u>;
+def VST1d64QPseudoWB_register : VSTQQWBPseudo<IIC_VST1x4u>;
+
+// VST2 : Vector Store (multiple 2-element structures)
+class VST2<bits<4> op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy,
+ InstrItinClass itin, Operand AddrMode>
+ : NLdSt<0, 0b00, op11_8, op7_4, (outs), (ins AddrMode:$Rn, VdTy:$Vd),
+ itin, "vst2", Dt, "$Vd, $Rn", "", []> {
+ let Rm = 0b1111;
+ let Inst{5-4} = Rn{5-4};
+ let DecoderMethod = "DecodeVLDST2Instruction";
+}
+
+def VST2d8 : VST2<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VST2,
+ addrmode6align64or128>;
+def VST2d16 : VST2<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VST2,
+ addrmode6align64or128>;
+def VST2d32 : VST2<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VST2,
+ addrmode6align64or128>;
+
+def VST2q8 : VST2<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VST2x2,
+ addrmode6align64or128or256>;
+def VST2q16 : VST2<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VST2x2,
+ addrmode6align64or128or256>;
+def VST2q32 : VST2<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VST2x2,
+ addrmode6align64or128or256>;
+
+def VST2q8Pseudo : VSTQQPseudo<IIC_VST2x2>;
+def VST2q16Pseudo : VSTQQPseudo<IIC_VST2x2>;
+def VST2q32Pseudo : VSTQQPseudo<IIC_VST2x2>;
+
+// ...with address register writeback:
+multiclass VST2DWB<bits<4> op11_8, bits<4> op7_4, string Dt,
+ RegisterOperand VdTy, Operand AddrMode> {
+ def _fixed : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb),
+ (ins AddrMode:$Rn, VdTy:$Vd), IIC_VLD1u,
+ "vst2", Dt, "$Vd, $Rn!",
+ "$Rn.addr = $wb", []> {
+ let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
+ let Inst{5-4} = Rn{5-4};
+ let DecoderMethod = "DecodeVLDST2Instruction";
+ }
+ def _register : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb),
+ (ins AddrMode:$Rn, rGPR:$Rm, VdTy:$Vd), IIC_VLD1u,
+ "vst2", Dt, "$Vd, $Rn, $Rm",
+ "$Rn.addr = $wb", []> {
+ let Inst{5-4} = Rn{5-4};
+ let DecoderMethod = "DecodeVLDST2Instruction";
+ }
+}
+multiclass VST2QWB<bits<4> op7_4, string Dt, Operand AddrMode> {
+ def _fixed : NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb),
+ (ins AddrMode:$Rn, VecListFourD:$Vd), IIC_VLD1u,
+ "vst2", Dt, "$Vd, $Rn!",
+ "$Rn.addr = $wb", []> {
+ let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
+ let Inst{5-4} = Rn{5-4};
+ let DecoderMethod = "DecodeVLDST2Instruction";
+ }
+ def _register : NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb),
+ (ins AddrMode:$Rn, rGPR:$Rm, VecListFourD:$Vd),
+ IIC_VLD1u,
+ "vst2", Dt, "$Vd, $Rn, $Rm",
+ "$Rn.addr = $wb", []> {
+ let Inst{5-4} = Rn{5-4};
+ let DecoderMethod = "DecodeVLDST2Instruction";
+ }
+}
+
+defm VST2d8wb : VST2DWB<0b1000, {0,0,?,?}, "8", VecListDPair,
+ addrmode6align64or128>;
+defm VST2d16wb : VST2DWB<0b1000, {0,1,?,?}, "16", VecListDPair,
+ addrmode6align64or128>;
+defm VST2d32wb : VST2DWB<0b1000, {1,0,?,?}, "32", VecListDPair,
+ addrmode6align64or128>;
+
+defm VST2q8wb : VST2QWB<{0,0,?,?}, "8", addrmode6align64or128or256>;
+defm VST2q16wb : VST2QWB<{0,1,?,?}, "16", addrmode6align64or128or256>;
+defm VST2q32wb : VST2QWB<{1,0,?,?}, "32", addrmode6align64or128or256>;
+
+def VST2q8PseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST2x2u>;
+def VST2q16PseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST2x2u>;
+def VST2q32PseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST2x2u>;
+def VST2q8PseudoWB_register : VSTQQWBregisterPseudo<IIC_VST2x2u>;
+def VST2q16PseudoWB_register : VSTQQWBregisterPseudo<IIC_VST2x2u>;
+def VST2q32PseudoWB_register : VSTQQWBregisterPseudo<IIC_VST2x2u>;
+
+// ...with double-spaced registers
+def VST2b8 : VST2<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, IIC_VST2,
+ addrmode6align64or128>;
+def VST2b16 : VST2<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, IIC_VST2,
+ addrmode6align64or128>;
+def VST2b32 : VST2<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, IIC_VST2,
+ addrmode6align64or128>;
+defm VST2b8wb : VST2DWB<0b1001, {0,0,?,?}, "8", VecListDPairSpaced,
+ addrmode6align64or128>;
+defm VST2b16wb : VST2DWB<0b1001, {0,1,?,?}, "16", VecListDPairSpaced,
+ addrmode6align64or128>;
+defm VST2b32wb : VST2DWB<0b1001, {1,0,?,?}, "32", VecListDPairSpaced,
+ addrmode6align64or128>;
+
+// VST3 : Vector Store (multiple 3-element structures)
+class VST3D<bits<4> op11_8, bits<4> op7_4, string Dt>
+ : NLdSt<0, 0b00, op11_8, op7_4, (outs),
+ (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3), IIC_VST3,
+ "vst3", Dt, "\\{$Vd, $src2, $src3\\}, $Rn", "", []> {
+ let Rm = 0b1111;
+ let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVLDST3Instruction";
+}
+
+def VST3d8 : VST3D<0b0100, {0,0,0,?}, "8">;
+def VST3d16 : VST3D<0b0100, {0,1,0,?}, "16">;
+def VST3d32 : VST3D<0b0100, {1,0,0,?}, "32">;
+
+def VST3d8Pseudo : VSTQQPseudo<IIC_VST3>;
+def VST3d16Pseudo : VSTQQPseudo<IIC_VST3>;
+def VST3d32Pseudo : VSTQQPseudo<IIC_VST3>;
+
+// ...with address register writeback:
+class VST3DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
+ : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb),
+ (ins addrmode6:$Rn, am6offset:$Rm,
+ DPR:$Vd, DPR:$src2, DPR:$src3), IIC_VST3u,
+ "vst3", Dt, "\\{$Vd, $src2, $src3\\}, $Rn$Rm",
+ "$Rn.addr = $wb", []> {
+ let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVLDST3Instruction";
+}
+
+def VST3d8_UPD : VST3DWB<0b0100, {0,0,0,?}, "8">;
+def VST3d16_UPD : VST3DWB<0b0100, {0,1,0,?}, "16">;
+def VST3d32_UPD : VST3DWB<0b0100, {1,0,0,?}, "32">;
+
+def VST3d8Pseudo_UPD : VSTQQWBPseudo<IIC_VST3u>;
+def VST3d16Pseudo_UPD : VSTQQWBPseudo<IIC_VST3u>;
+def VST3d32Pseudo_UPD : VSTQQWBPseudo<IIC_VST3u>;
+
+// ...with double-spaced registers:
+def VST3q8 : VST3D<0b0101, {0,0,0,?}, "8">;
+def VST3q16 : VST3D<0b0101, {0,1,0,?}, "16">;
+def VST3q32 : VST3D<0b0101, {1,0,0,?}, "32">;
+def VST3q8_UPD : VST3DWB<0b0101, {0,0,0,?}, "8">;
+def VST3q16_UPD : VST3DWB<0b0101, {0,1,0,?}, "16">;
+def VST3q32_UPD : VST3DWB<0b0101, {1,0,0,?}, "32">;
+
+def VST3q8Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>;
+def VST3q16Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>;
+def VST3q32Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>;
+
+// ...alternate versions to be allocated odd register numbers:
+def VST3q8oddPseudo : VSTQQQQPseudo<IIC_VST3>;
+def VST3q16oddPseudo : VSTQQQQPseudo<IIC_VST3>;
+def VST3q32oddPseudo : VSTQQQQPseudo<IIC_VST3>;
+
+def VST3q8oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>;
+def VST3q16oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>;
+def VST3q32oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>;
+
+// VST4 : Vector Store (multiple 4-element structures)
+class VST4D<bits<4> op11_8, bits<4> op7_4, string Dt>
+ : NLdSt<0, 0b00, op11_8, op7_4, (outs),
+ (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4),
+ IIC_VST4, "vst4", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn",
+ "", []> {
+ let Rm = 0b1111;
+ let Inst{5-4} = Rn{5-4};
+ let DecoderMethod = "DecodeVLDST4Instruction";
+}
+
+def VST4d8 : VST4D<0b0000, {0,0,?,?}, "8">;
+def VST4d16 : VST4D<0b0000, {0,1,?,?}, "16">;
+def VST4d32 : VST4D<0b0000, {1,0,?,?}, "32">;
+
+def VST4d8Pseudo : VSTQQPseudo<IIC_VST4>;
+def VST4d16Pseudo : VSTQQPseudo<IIC_VST4>;
+def VST4d32Pseudo : VSTQQPseudo<IIC_VST4>;
+
+// ...with address register writeback:
+class VST4DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
+ : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb),
+ (ins addrmode6:$Rn, am6offset:$Rm,
+ DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4), IIC_VST4u,
+ "vst4", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn$Rm",
+ "$Rn.addr = $wb", []> {
+ let Inst{5-4} = Rn{5-4};
+ let DecoderMethod = "DecodeVLDST4Instruction";
+}
+
+def VST4d8_UPD : VST4DWB<0b0000, {0,0,?,?}, "8">;
+def VST4d16_UPD : VST4DWB<0b0000, {0,1,?,?}, "16">;
+def VST4d32_UPD : VST4DWB<0b0000, {1,0,?,?}, "32">;
+
+def VST4d8Pseudo_UPD : VSTQQWBPseudo<IIC_VST4u>;
+def VST4d16Pseudo_UPD : VSTQQWBPseudo<IIC_VST4u>;
+def VST4d32Pseudo_UPD : VSTQQWBPseudo<IIC_VST4u>;
+
+// ...with double-spaced registers:
+def VST4q8 : VST4D<0b0001, {0,0,?,?}, "8">;
+def VST4q16 : VST4D<0b0001, {0,1,?,?}, "16">;
+def VST4q32 : VST4D<0b0001, {1,0,?,?}, "32">;
+def VST4q8_UPD : VST4DWB<0b0001, {0,0,?,?}, "8">;
+def VST4q16_UPD : VST4DWB<0b0001, {0,1,?,?}, "16">;
+def VST4q32_UPD : VST4DWB<0b0001, {1,0,?,?}, "32">;
+
+def VST4q8Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>;
+def VST4q16Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>;
+def VST4q32Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>;
+
+// ...alternate versions to be allocated odd register numbers:
+def VST4q8oddPseudo : VSTQQQQPseudo<IIC_VST4>;
+def VST4q16oddPseudo : VSTQQQQPseudo<IIC_VST4>;
+def VST4q32oddPseudo : VSTQQQQPseudo<IIC_VST4>;
+
+def VST4q8oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>;
+def VST4q16oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>;
+def VST4q32oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>;
+
+} // mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1
+
+// Classes for VST*LN pseudo-instructions with multi-register operands.
+// These are expanded to real instructions after register allocation.
+class VSTQLNPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs), (ins addrmode6:$addr, QPR:$src, nohash_imm:$lane),
+ itin, "">;
+class VSTQLNWBPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs GPR:$wb),
+ (ins addrmode6:$addr, am6offset:$offset, QPR:$src,
+ nohash_imm:$lane), itin, "$addr.addr = $wb">;
+class VSTQQLNPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQPR:$src, nohash_imm:$lane),
+ itin, "">;
+class VSTQQLNWBPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs GPR:$wb),
+ (ins addrmode6:$addr, am6offset:$offset, QQPR:$src,
+ nohash_imm:$lane), itin, "$addr.addr = $wb">;
+class VSTQQQQLNPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQQQPR:$src, nohash_imm:$lane),
+ itin, "">;
+class VSTQQQQLNWBPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs GPR:$wb),
+ (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src,
+ nohash_imm:$lane), itin, "$addr.addr = $wb">;
+
+// VST1LN : Vector Store (single element from one lane)
+class VST1LN<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty,
+ PatFrag StoreOp, SDNode ExtractOp, Operand AddrMode>
+ : NLdStLn<1, 0b00, op11_8, op7_4, (outs),
+ (ins AddrMode:$Rn, DPR:$Vd, nohash_imm:$lane),
+ IIC_VST1ln, "vst1", Dt, "\\{$Vd[$lane]\\}, $Rn", "",
+ [(StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane), AddrMode:$Rn)]> {
+ let Rm = 0b1111;
+ let DecoderMethod = "DecodeVST1LN";
+}
+class VST1QLNPseudo<ValueType Ty, PatFrag StoreOp, SDNode ExtractOp>
+ : VSTQLNPseudo<IIC_VST1ln> {
+ let Pattern = [(StoreOp (ExtractOp (Ty QPR:$src), imm:$lane),
+ addrmode6:$addr)];
+}
+
+def VST1LNd8 : VST1LN<0b0000, {?,?,?,0}, "8", v8i8, truncstorei8,
+ NEONvgetlaneu, addrmode6> {
+ let Inst{7-5} = lane{2-0};
+}
+def VST1LNd16 : VST1LN<0b0100, {?,?,0,?}, "16", v4i16, truncstorei16,
+ NEONvgetlaneu, addrmode6> {
+ let Inst{7-6} = lane{1-0};
+ let Inst{4} = Rn{4};
+}
+
+def VST1LNd32 : VST1LN<0b1000, {?,0,?,?}, "32", v2i32, store, extractelt,
+ addrmode6oneL32> {
+ let Inst{7} = lane{0};
+ let Inst{5-4} = Rn{5-4};
+}
+
+def VST1LNq8Pseudo : VST1QLNPseudo<v16i8, truncstorei8, NEONvgetlaneu>;
+def VST1LNq16Pseudo : VST1QLNPseudo<v8i16, truncstorei16, NEONvgetlaneu>;
+def VST1LNq32Pseudo : VST1QLNPseudo<v4i32, store, extractelt>;
+
+def : Pat<(store (extractelt (v2f32 DPR:$src), imm:$lane), addrmode6:$addr),
+ (VST1LNd32 addrmode6:$addr, DPR:$src, imm:$lane)>;
+def : Pat<(store (extractelt (v4f32 QPR:$src), imm:$lane), addrmode6:$addr),
+ (VST1LNq32Pseudo addrmode6:$addr, QPR:$src, imm:$lane)>;
+
+// ...with address register writeback:
+class VST1LNWB<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty,
+ PatFrag StoreOp, SDNode ExtractOp, Operand AdrMode>
+ : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb),
+ (ins AdrMode:$Rn, am6offset:$Rm,
+ DPR:$Vd, nohash_imm:$lane), IIC_VST1lnu, "vst1", Dt,
+ "\\{$Vd[$lane]\\}, $Rn$Rm",
+ "$Rn.addr = $wb",
+ [(set GPR:$wb, (StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane),
+ AdrMode:$Rn, am6offset:$Rm))]> {
+ let DecoderMethod = "DecodeVST1LN";
+}
+class VST1QLNWBPseudo<ValueType Ty, PatFrag StoreOp, SDNode ExtractOp>
+ : VSTQLNWBPseudo<IIC_VST1lnu> {
+ let Pattern = [(set GPR:$wb, (StoreOp (ExtractOp (Ty QPR:$src), imm:$lane),
+ addrmode6:$addr, am6offset:$offset))];
+}
+
+def VST1LNd8_UPD : VST1LNWB<0b0000, {?,?,?,0}, "8", v8i8, post_truncsti8,
+ NEONvgetlaneu, addrmode6> {
+ let Inst{7-5} = lane{2-0};
+}
+def VST1LNd16_UPD : VST1LNWB<0b0100, {?,?,0,?}, "16", v4i16, post_truncsti16,
+ NEONvgetlaneu, addrmode6> {
+ let Inst{7-6} = lane{1-0};
+ let Inst{4} = Rn{4};
+}
+def VST1LNd32_UPD : VST1LNWB<0b1000, {?,0,?,?}, "32", v2i32, post_store,
+ extractelt, addrmode6oneL32> {
+ let Inst{7} = lane{0};
+ let Inst{5-4} = Rn{5-4};
+}
+
+def VST1LNq8Pseudo_UPD : VST1QLNWBPseudo<v16i8, post_truncsti8, NEONvgetlaneu>;
+def VST1LNq16Pseudo_UPD : VST1QLNWBPseudo<v8i16, post_truncsti16,NEONvgetlaneu>;
+def VST1LNq32Pseudo_UPD : VST1QLNWBPseudo<v4i32, post_store, extractelt>;
+
+let mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1 in {
+
+// VST2LN : Vector Store (single 2-element structure from one lane)
+class VST2LN<bits<4> op11_8, bits<4> op7_4, string Dt>
+ : NLdStLn<1, 0b00, op11_8, op7_4, (outs),
+ (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, nohash_imm:$lane),
+ IIC_VST2ln, "vst2", Dt, "\\{$Vd[$lane], $src2[$lane]\\}, $Rn",
+ "", []> {
+ let Rm = 0b1111;
+ let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVST2LN";
+}
+
+def VST2LNd8 : VST2LN<0b0001, {?,?,?,?}, "8"> {
+ let Inst{7-5} = lane{2-0};
+}
+def VST2LNd16 : VST2LN<0b0101, {?,?,0,?}, "16"> {
+ let Inst{7-6} = lane{1-0};
+}
+def VST2LNd32 : VST2LN<0b1001, {?,0,0,?}, "32"> {
+ let Inst{7} = lane{0};
+}
+
+def VST2LNd8Pseudo : VSTQLNPseudo<IIC_VST2ln>;
+def VST2LNd16Pseudo : VSTQLNPseudo<IIC_VST2ln>;
+def VST2LNd32Pseudo : VSTQLNPseudo<IIC_VST2ln>;
+
+// ...with double-spaced registers:
+def VST2LNq16 : VST2LN<0b0101, {?,?,1,?}, "16"> {
+ let Inst{7-6} = lane{1-0};
+ let Inst{4} = Rn{4};
+}
+def VST2LNq32 : VST2LN<0b1001, {?,1,0,?}, "32"> {
+ let Inst{7} = lane{0};
+ let Inst{4} = Rn{4};
+}
+
+def VST2LNq16Pseudo : VSTQQLNPseudo<IIC_VST2ln>;
+def VST2LNq32Pseudo : VSTQQLNPseudo<IIC_VST2ln>;
+
+// ...with address register writeback:
+class VST2LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
+ : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb),
+ (ins addrmode6:$Rn, am6offset:$Rm,
+ DPR:$Vd, DPR:$src2, nohash_imm:$lane), IIC_VST2lnu, "vst2", Dt,
+ "\\{$Vd[$lane], $src2[$lane]\\}, $Rn$Rm",
+ "$Rn.addr = $wb", []> {
+ let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVST2LN";
+}
+
+def VST2LNd8_UPD : VST2LNWB<0b0001, {?,?,?,?}, "8"> {
+ let Inst{7-5} = lane{2-0};
+}
+def VST2LNd16_UPD : VST2LNWB<0b0101, {?,?,0,?}, "16"> {
+ let Inst{7-6} = lane{1-0};
+}
+def VST2LNd32_UPD : VST2LNWB<0b1001, {?,0,0,?}, "32"> {
+ let Inst{7} = lane{0};
+}
+
+def VST2LNd8Pseudo_UPD : VSTQLNWBPseudo<IIC_VST2lnu>;
+def VST2LNd16Pseudo_UPD : VSTQLNWBPseudo<IIC_VST2lnu>;
+def VST2LNd32Pseudo_UPD : VSTQLNWBPseudo<IIC_VST2lnu>;
+
+def VST2LNq16_UPD : VST2LNWB<0b0101, {?,?,1,?}, "16"> {
+ let Inst{7-6} = lane{1-0};
+}
+def VST2LNq32_UPD : VST2LNWB<0b1001, {?,1,0,?}, "32"> {
+ let Inst{7} = lane{0};
+}
+
+def VST2LNq16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST2lnu>;
+def VST2LNq32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST2lnu>;
+
+// VST3LN : Vector Store (single 3-element structure from one lane)
+class VST3LN<bits<4> op11_8, bits<4> op7_4, string Dt>
+ : NLdStLn<1, 0b00, op11_8, op7_4, (outs),
+ (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3,
+ nohash_imm:$lane), IIC_VST3ln, "vst3", Dt,
+ "\\{$Vd[$lane], $src2[$lane], $src3[$lane]\\}, $Rn", "", []> {
+ let Rm = 0b1111;
+ let DecoderMethod = "DecodeVST3LN";
+}
+
+def VST3LNd8 : VST3LN<0b0010, {?,?,?,0}, "8"> {
+ let Inst{7-5} = lane{2-0};
+}
+def VST3LNd16 : VST3LN<0b0110, {?,?,0,0}, "16"> {
+ let Inst{7-6} = lane{1-0};
+}
+def VST3LNd32 : VST3LN<0b1010, {?,0,0,0}, "32"> {
+ let Inst{7} = lane{0};
+}
+
+def VST3LNd8Pseudo : VSTQQLNPseudo<IIC_VST3ln>;
+def VST3LNd16Pseudo : VSTQQLNPseudo<IIC_VST3ln>;
+def VST3LNd32Pseudo : VSTQQLNPseudo<IIC_VST3ln>;
+
+// ...with double-spaced registers:
+def VST3LNq16 : VST3LN<0b0110, {?,?,1,0}, "16"> {
+ let Inst{7-6} = lane{1-0};
+}
+def VST3LNq32 : VST3LN<0b1010, {?,1,0,0}, "32"> {
+ let Inst{7} = lane{0};
+}
+
+def VST3LNq16Pseudo : VSTQQQQLNPseudo<IIC_VST3ln>;
+def VST3LNq32Pseudo : VSTQQQQLNPseudo<IIC_VST3ln>;
+
+// ...with address register writeback:
+class VST3LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
+ : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb),
+ (ins addrmode6:$Rn, am6offset:$Rm,
+ DPR:$Vd, DPR:$src2, DPR:$src3, nohash_imm:$lane),
+ IIC_VST3lnu, "vst3", Dt,
+ "\\{$Vd[$lane], $src2[$lane], $src3[$lane]\\}, $Rn$Rm",
+ "$Rn.addr = $wb", []> {
+ let DecoderMethod = "DecodeVST3LN";
+}
+
+def VST3LNd8_UPD : VST3LNWB<0b0010, {?,?,?,0}, "8"> {
+ let Inst{7-5} = lane{2-0};
+}
+def VST3LNd16_UPD : VST3LNWB<0b0110, {?,?,0,0}, "16"> {
+ let Inst{7-6} = lane{1-0};
+}
+def VST3LNd32_UPD : VST3LNWB<0b1010, {?,0,0,0}, "32"> {
+ let Inst{7} = lane{0};
+}
+
+def VST3LNd8Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST3lnu>;
+def VST3LNd16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST3lnu>;
+def VST3LNd32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST3lnu>;
+
+def VST3LNq16_UPD : VST3LNWB<0b0110, {?,?,1,0}, "16"> {
+ let Inst{7-6} = lane{1-0};
+}
+def VST3LNq32_UPD : VST3LNWB<0b1010, {?,1,0,0}, "32"> {
+ let Inst{7} = lane{0};
+}
+
+def VST3LNq16Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST3lnu>;
+def VST3LNq32Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST3lnu>;
+
+// VST4LN : Vector Store (single 4-element structure from one lane)
+class VST4LN<bits<4> op11_8, bits<4> op7_4, string Dt>
+ : NLdStLn<1, 0b00, op11_8, op7_4, (outs),
+ (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4,
+ nohash_imm:$lane), IIC_VST4ln, "vst4", Dt,
+ "\\{$Vd[$lane], $src2[$lane], $src3[$lane], $src4[$lane]\\}, $Rn",
+ "", []> {
+ let Rm = 0b1111;
+ let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVST4LN";
+}
+
+def VST4LNd8 : VST4LN<0b0011, {?,?,?,?}, "8"> {
+ let Inst{7-5} = lane{2-0};
+}
+def VST4LNd16 : VST4LN<0b0111, {?,?,0,?}, "16"> {
+ let Inst{7-6} = lane{1-0};
+}
+def VST4LNd32 : VST4LN<0b1011, {?,0,?,?}, "32"> {
+ let Inst{7} = lane{0};
+ let Inst{5} = Rn{5};
+}
+
+def VST4LNd8Pseudo : VSTQQLNPseudo<IIC_VST4ln>;
+def VST4LNd16Pseudo : VSTQQLNPseudo<IIC_VST4ln>;
+def VST4LNd32Pseudo : VSTQQLNPseudo<IIC_VST4ln>;
+
+// ...with double-spaced registers:
+def VST4LNq16 : VST4LN<0b0111, {?,?,1,?}, "16"> {
+ let Inst{7-6} = lane{1-0};
+}
+def VST4LNq32 : VST4LN<0b1011, {?,1,?,?}, "32"> {
+ let Inst{7} = lane{0};
+ let Inst{5} = Rn{5};
+}
+
+def VST4LNq16Pseudo : VSTQQQQLNPseudo<IIC_VST4ln>;
+def VST4LNq32Pseudo : VSTQQQQLNPseudo<IIC_VST4ln>;
+
+// ...with address register writeback:
+class VST4LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
+ : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb),
+ (ins addrmode6:$Rn, am6offset:$Rm,
+ DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4, nohash_imm:$lane),
+ IIC_VST4lnu, "vst4", Dt,
+ "\\{$Vd[$lane], $src2[$lane], $src3[$lane], $src4[$lane]\\}, $Rn$Rm",
+ "$Rn.addr = $wb", []> {
+ let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVST4LN";
+}
+
+def VST4LNd8_UPD : VST4LNWB<0b0011, {?,?,?,?}, "8"> {
+ let Inst{7-5} = lane{2-0};
+}
+def VST4LNd16_UPD : VST4LNWB<0b0111, {?,?,0,?}, "16"> {
+ let Inst{7-6} = lane{1-0};
+}
+def VST4LNd32_UPD : VST4LNWB<0b1011, {?,0,?,?}, "32"> {
+ let Inst{7} = lane{0};
+ let Inst{5} = Rn{5};
+}
+
+def VST4LNd8Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST4lnu>;
+def VST4LNd16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST4lnu>;
+def VST4LNd32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST4lnu>;
+
+def VST4LNq16_UPD : VST4LNWB<0b0111, {?,?,1,?}, "16"> {
+ let Inst{7-6} = lane{1-0};
+}
+def VST4LNq32_UPD : VST4LNWB<0b1011, {?,1,?,?}, "32"> {
+ let Inst{7} = lane{0};
+ let Inst{5} = Rn{5};
+}
+
+def VST4LNq16Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST4lnu>;
+def VST4LNq32Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST4lnu>;
+
+} // mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1
+
+// Use vld1/vst1 for unaligned f64 load / store
+def : Pat<(f64 (hword_alignedload addrmode6:$addr)),
+ (VLD1d16 addrmode6:$addr)>, Requires<[IsLE]>;
+def : Pat<(hword_alignedstore (f64 DPR:$value), addrmode6:$addr),
+ (VST1d16 addrmode6:$addr, DPR:$value)>, Requires<[IsLE]>;
+def : Pat<(f64 (byte_alignedload addrmode6:$addr)),
+ (VLD1d8 addrmode6:$addr)>, Requires<[IsLE]>;
+def : Pat<(byte_alignedstore (f64 DPR:$value), addrmode6:$addr),
+ (VST1d8 addrmode6:$addr, DPR:$value)>, Requires<[IsLE]>;
+def : Pat<(f64 (non_word_alignedload addrmode6:$addr)),
+ (VLD1d64 addrmode6:$addr)>, Requires<[IsBE]>;
+def : Pat<(non_word_alignedstore (f64 DPR:$value), addrmode6:$addr),
+ (VST1d64 addrmode6:$addr, DPR:$value)>, Requires<[IsBE]>;
+
+// Use vld1/vst1 for Q and QQ. Also use them for unaligned v2f64
+// load / store if it's legal.
+def : Pat<(v2f64 (dword_alignedload addrmode6:$addr)),
+ (VLD1q64 addrmode6:$addr)>;
+def : Pat<(dword_alignedstore (v2f64 QPR:$value), addrmode6:$addr),
+ (VST1q64 addrmode6:$addr, QPR:$value)>;
+def : Pat<(v2f64 (word_alignedload addrmode6:$addr)),
+ (VLD1q32 addrmode6:$addr)>, Requires<[IsLE]>;
+def : Pat<(word_alignedstore (v2f64 QPR:$value), addrmode6:$addr),
+ (VST1q32 addrmode6:$addr, QPR:$value)>, Requires<[IsLE]>;
+def : Pat<(v2f64 (hword_alignedload addrmode6:$addr)),
+ (VLD1q16 addrmode6:$addr)>, Requires<[IsLE]>;
+def : Pat<(hword_alignedstore (v2f64 QPR:$value), addrmode6:$addr),
+ (VST1q16 addrmode6:$addr, QPR:$value)>, Requires<[IsLE]>;
+def : Pat<(v2f64 (byte_alignedload addrmode6:$addr)),
+ (VLD1q8 addrmode6:$addr)>, Requires<[IsLE]>;
+def : Pat<(byte_alignedstore (v2f64 QPR:$value), addrmode6:$addr),
+ (VST1q8 addrmode6:$addr, QPR:$value)>, Requires<[IsLE]>;
+
+//===----------------------------------------------------------------------===//
+// NEON pattern fragments
+//===----------------------------------------------------------------------===//
+
+// Extract D sub-registers of Q registers.
+def DSubReg_i8_reg : SDNodeXForm<imm, [{
+ assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
+ return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue()/8, SDLoc(N),
+ MVT::i32);
+}]>;
+def DSubReg_i16_reg : SDNodeXForm<imm, [{
+ assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
+ return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue()/4, SDLoc(N),
+ MVT::i32);
+}]>;
+def DSubReg_i32_reg : SDNodeXForm<imm, [{
+ assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
+ return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue()/2, SDLoc(N),
+ MVT::i32);
+}]>;
+def DSubReg_f64_reg : SDNodeXForm<imm, [{
+ assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
+ return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue(), SDLoc(N),
+ MVT::i32);
+}]>;
+
+// Extract S sub-registers of Q/D registers.
+def SSubReg_f32_reg : SDNodeXForm<imm, [{
+ assert(ARM::ssub_3 == ARM::ssub_0+3 && "Unexpected subreg numbering");
+ return CurDAG->getTargetConstant(ARM::ssub_0 + N->getZExtValue(), SDLoc(N),
+ MVT::i32);
+}]>;
+
+// Translate lane numbers from Q registers to D subregs.
+def SubReg_i8_lane : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(N->getZExtValue() & 7, SDLoc(N), MVT::i32);
+}]>;
+def SubReg_i16_lane : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(N->getZExtValue() & 3, SDLoc(N), MVT::i32);
+}]>;
+def SubReg_i32_lane : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(N->getZExtValue() & 1, SDLoc(N), MVT::i32);
+}]>;
+
+//===----------------------------------------------------------------------===//
+// Instruction Classes
+//===----------------------------------------------------------------------===//
+
+// Basic 2-register operations: double- and quad-register.
+class N2VD<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
+ bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr,
+ string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode>
+ : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$Vd),
+ (ins DPR:$Vm), IIC_VUNAD, OpcodeStr, Dt,"$Vd, $Vm", "",
+ [(set DPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vm))))]>;
+class N2VQ<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
+ bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr,
+ string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode>
+ : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$Vd),
+ (ins QPR:$Vm), IIC_VUNAQ, OpcodeStr, Dt,"$Vd, $Vm", "",
+ [(set QPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vm))))]>;
+
+// Basic 2-register intrinsics, both double- and quad-register.
+class N2VDInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
+ bits<2> op17_16, bits<5> op11_7, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
+ : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$Vd),
+ (ins DPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "",
+ [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm))))]>;
+class N2VQInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
+ bits<2> op17_16, bits<5> op11_7, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
+ : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$Vd),
+ (ins QPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "",
+ [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>;
+
+// Same as above, but not predicated.
+class N2VDIntnp<bits<2> op19_18, bits<2> op17_16, bits<3> op10_8, bit op7,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
+ : N2Vnp<op19_18, op17_16, op10_8, op7, 0, (outs DPR:$Vd), (ins DPR:$Vm),
+ itin, OpcodeStr, Dt,
+ [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm))))]>;
+
+class N2VQIntnp<bits<2> op19_18, bits<2> op17_16, bits<3> op10_8, bit op7,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
+ : N2Vnp<op19_18, op17_16, op10_8, op7, 1, (outs QPR:$Vd), (ins QPR:$Vm),
+ itin, OpcodeStr, Dt,
+ [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>;
+
+// Similar to NV2VQIntnp with some more encoding bits exposed (crypto).
+class N2VQIntXnp<bits<2> op19_18, bits<2> op17_16, bits<3> op10_8, bit op6,
+ bit op7, InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
+ : N2Vnp<op19_18, op17_16, op10_8, op7, op6, (outs QPR:$Vd), (ins QPR:$Vm),
+ itin, OpcodeStr, Dt,
+ [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>;
+
+// Same as N2VQIntXnp but with Vd as a src register.
+class N2VQIntX2np<bits<2> op19_18, bits<2> op17_16, bits<3> op10_8, bit op6,
+ bit op7, InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
+ : N2Vnp<op19_18, op17_16, op10_8, op7, op6,
+ (outs QPR:$Vd), (ins QPR:$src, QPR:$Vm),
+ itin, OpcodeStr, Dt,
+ [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$src), (OpTy QPR:$Vm))))]> {
+ let Constraints = "$src = $Vd";
+}
+
+// Narrow 2-register operations.
+class N2VN<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
+ bits<2> op17_16, bits<5> op11_7, bit op6, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType TyD, ValueType TyQ, SDNode OpNode>
+ : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs DPR:$Vd),
+ (ins QPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "",
+ [(set DPR:$Vd, (TyD (OpNode (TyQ QPR:$Vm))))]>;
+
+// Narrow 2-register intrinsics.
+class N2VNInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
+ bits<2> op17_16, bits<5> op11_7, bit op6, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType TyD, ValueType TyQ, SDPatternOperator IntOp>
+ : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs DPR:$Vd),
+ (ins QPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "",
+ [(set DPR:$Vd, (TyD (IntOp (TyQ QPR:$Vm))))]>;
+
+// Long 2-register operations (currently only used for VMOVL).
+class N2VL<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
+ bits<2> op17_16, bits<5> op11_7, bit op6, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType TyQ, ValueType TyD, SDNode OpNode>
+ : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs QPR:$Vd),
+ (ins DPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "",
+ [(set QPR:$Vd, (TyQ (OpNode (TyD DPR:$Vm))))]>;
+
+// Long 2-register intrinsics.
+class N2VLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
+ bits<2> op17_16, bits<5> op11_7, bit op6, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType TyQ, ValueType TyD, SDPatternOperator IntOp>
+ : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs QPR:$Vd),
+ (ins DPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "",
+ [(set QPR:$Vd, (TyQ (IntOp (TyD DPR:$Vm))))]>;
+
+// 2-register shuffles (VTRN/VZIP/VUZP), both double- and quad-register.
+class N2VDShuffle<bits<2> op19_18, bits<5> op11_7, string OpcodeStr, string Dt>
+ : N2V<0b11, 0b11, op19_18, 0b10, op11_7, 0, 0, (outs DPR:$Vd, DPR:$Vm),
+ (ins DPR:$src1, DPR:$src2), IIC_VPERMD,
+ OpcodeStr, Dt, "$Vd, $Vm",
+ "$src1 = $Vd, $src2 = $Vm", []>;
+class N2VQShuffle<bits<2> op19_18, bits<5> op11_7,
+ InstrItinClass itin, string OpcodeStr, string Dt>
+ : N2V<0b11, 0b11, op19_18, 0b10, op11_7, 1, 0, (outs QPR:$Vd, QPR:$Vm),
+ (ins QPR:$src1, QPR:$src2), itin, OpcodeStr, Dt, "$Vd, $Vm",
+ "$src1 = $Vd, $src2 = $Vm", []>;
+
+// Basic 3-register operations: double- and quad-register.
+class N3VD<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable>
+ : N3V<op24, op23, op21_20, op11_8, 0, op4,
+ (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
+ [(set DPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]> {
+ // All of these have a two-operand InstAlias.
+ let TwoOperandAliasConstraint = "$Vn = $Vd";
+ let isCommutable = Commutable;
+}
+// Same as N3VD but no data type.
+class N3VDX<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+ InstrItinClass itin, string OpcodeStr,
+ ValueType ResTy, ValueType OpTy,
+ SDNode OpNode, bit Commutable>
+ : N3VX<op24, op23, op21_20, op11_8, 0, op4,
+ (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
+ OpcodeStr, "$Vd, $Vn, $Vm", "",
+ [(set DPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>{
+ // All of these have a two-operand InstAlias.
+ let TwoOperandAliasConstraint = "$Vn = $Vd";
+ let isCommutable = Commutable;
+}
+
+class N3VDSL<bits<2> op21_20, bits<4> op11_8,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType Ty, SDNode ShOp>
+ : N3VLane32<0, 1, op21_20, op11_8, 1, 0,
+ (outs DPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
+ NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
+ [(set (Ty DPR:$Vd),
+ (Ty (ShOp (Ty DPR:$Vn),
+ (Ty (NEONvduplane (Ty DPR_VFP2:$Vm),imm:$lane)))))]> {
+ // All of these have a two-operand InstAlias.
+ let TwoOperandAliasConstraint = "$Vn = $Vd";
+ let isCommutable = 0;
+}
+class N3VDSL16<bits<2> op21_20, bits<4> op11_8,
+ string OpcodeStr, string Dt, ValueType Ty, SDNode ShOp>
+ : N3VLane16<0, 1, op21_20, op11_8, 1, 0,
+ (outs DPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
+ NVMulSLFrm, IIC_VMULi16D, OpcodeStr, Dt,"$Vd, $Vn, $Vm$lane","",
+ [(set (Ty DPR:$Vd),
+ (Ty (ShOp (Ty DPR:$Vn),
+ (Ty (NEONvduplane (Ty DPR_8:$Vm), imm:$lane)))))]> {
+ // All of these have a two-operand InstAlias.
+ let TwoOperandAliasConstraint = "$Vn = $Vd";
+ let isCommutable = 0;
+}
+
+class N3VQ<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable>
+ : N3V<op24, op23, op21_20, op11_8, 1, op4,
+ (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), N3RegFrm, itin,
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
+ [(set QPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]> {
+ // All of these have a two-operand InstAlias.
+ let TwoOperandAliasConstraint = "$Vn = $Vd";
+ let isCommutable = Commutable;
+}
+class N3VQX<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+ InstrItinClass itin, string OpcodeStr,
+ ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable>
+ : N3VX<op24, op23, op21_20, op11_8, 1, op4,
+ (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), N3RegFrm, itin,
+ OpcodeStr, "$Vd, $Vn, $Vm", "",
+ [(set QPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]>{
+ // All of these have a two-operand InstAlias.
+ let TwoOperandAliasConstraint = "$Vn = $Vd";
+ let isCommutable = Commutable;
+}
+class N3VQSL<bits<2> op21_20, bits<4> op11_8,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType ResTy, ValueType OpTy, SDNode ShOp>
+ : N3VLane32<1, 1, op21_20, op11_8, 1, 0,
+ (outs QPR:$Vd), (ins QPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
+ NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
+ [(set (ResTy QPR:$Vd),
+ (ResTy (ShOp (ResTy QPR:$Vn),
+ (ResTy (NEONvduplane (OpTy DPR_VFP2:$Vm),
+ imm:$lane)))))]> {
+ // All of these have a two-operand InstAlias.
+ let TwoOperandAliasConstraint = "$Vn = $Vd";
+ let isCommutable = 0;
+}
+class N3VQSL16<bits<2> op21_20, bits<4> op11_8, string OpcodeStr, string Dt,
+ ValueType ResTy, ValueType OpTy, SDNode ShOp>
+ : N3VLane16<1, 1, op21_20, op11_8, 1, 0,
+ (outs QPR:$Vd), (ins QPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
+ NVMulSLFrm, IIC_VMULi16Q, OpcodeStr, Dt,"$Vd, $Vn, $Vm$lane", "",
+ [(set (ResTy QPR:$Vd),
+ (ResTy (ShOp (ResTy QPR:$Vn),
+ (ResTy (NEONvduplane (OpTy DPR_8:$Vm),
+ imm:$lane)))))]> {
+ // All of these have a two-operand InstAlias.
+ let TwoOperandAliasConstraint = "$Vn = $Vd";
+ let isCommutable = 0;
+}
+
+// Basic 3-register intrinsics, both double- and quad-register.
+class N3VDInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+ Format f, InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp, bit Commutable>
+ : N3V<op24, op23, op21_20, op11_8, 0, op4,
+ (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), f, itin,
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
+ [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]> {
+ // All of these have a two-operand InstAlias.
+ let TwoOperandAliasConstraint = "$Vn = $Vd";
+ let isCommutable = Commutable;
+}
+
+class N3VDIntnp<bits<5> op27_23, bits<2> op21_20, bits<4> op11_8, bit op6,
+ bit op4, Format f, InstrItinClass itin, string OpcodeStr,
+ string Dt, ValueType ResTy, ValueType OpTy,
+ SDPatternOperator IntOp, bit Commutable>
+ : N3Vnp<op27_23, op21_20, op11_8, op6, op4,
+ (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, OpcodeStr, Dt,
+ [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>;
+
+class N3VDIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
+ string OpcodeStr, string Dt, ValueType Ty, SDPatternOperator IntOp>
+ : N3VLane32<0, 1, op21_20, op11_8, 1, 0,
+ (outs DPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
+ NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
+ [(set (Ty DPR:$Vd),
+ (Ty (IntOp (Ty DPR:$Vn),
+ (Ty (NEONvduplane (Ty DPR_VFP2:$Vm),
+ imm:$lane)))))]> {
+ let isCommutable = 0;
+}
+
+class N3VDIntSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
+ string OpcodeStr, string Dt, ValueType Ty, SDPatternOperator IntOp>
+ : N3VLane16<0, 1, op21_20, op11_8, 1, 0,
+ (outs DPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
+ NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
+ [(set (Ty DPR:$Vd),
+ (Ty (IntOp (Ty DPR:$Vn),
+ (Ty (NEONvduplane (Ty DPR_8:$Vm), imm:$lane)))))]> {
+ let isCommutable = 0;
+}
+class N3VDIntSh<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+ Format f, InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
+ : N3V<op24, op23, op21_20, op11_8, 0, op4,
+ (outs DPR:$Vd), (ins DPR:$Vm, DPR:$Vn), f, itin,
+ OpcodeStr, Dt, "$Vd, $Vm, $Vn", "",
+ [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm), (OpTy DPR:$Vn))))]> {
+ let TwoOperandAliasConstraint = "$Vm = $Vd";
+ let isCommutable = 0;
+}
+
+class N3VQInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+ Format f, InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp, bit Commutable>
+ : N3V<op24, op23, op21_20, op11_8, 1, op4,
+ (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), f, itin,
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
+ [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]> {
+ // All of these have a two-operand InstAlias.
+ let TwoOperandAliasConstraint = "$Vn = $Vd";
+ let isCommutable = Commutable;
+}
+
+class N3VQIntnp<bits<5> op27_23, bits<2> op21_20, bits<4> op11_8, bit op6,
+ bit op4, Format f, InstrItinClass itin, string OpcodeStr,
+ string Dt, ValueType ResTy, ValueType OpTy,
+ SDPatternOperator IntOp, bit Commutable>
+ : N3Vnp<op27_23, op21_20, op11_8, op6, op4,
+ (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), f, itin, OpcodeStr, Dt,
+ [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]>;
+
+// Same as N3VQIntnp but with Vd as a src register.
+class N3VQInt3np<bits<5> op27_23, bits<2> op21_20, bits<4> op11_8, bit op6,
+ bit op4, Format f, InstrItinClass itin, string OpcodeStr,
+ string Dt, ValueType ResTy, ValueType OpTy,
+ SDPatternOperator IntOp, bit Commutable>
+ : N3Vnp<op27_23, op21_20, op11_8, op6, op4,
+ (outs QPR:$Vd), (ins QPR:$src, QPR:$Vn, QPR:$Vm),
+ f, itin, OpcodeStr, Dt,
+ [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$src), (OpTy QPR:$Vn),
+ (OpTy QPR:$Vm))))]> {
+ let Constraints = "$src = $Vd";
+}
+
+class N3VQIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
+ string OpcodeStr, string Dt,
+ ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
+ : N3VLane32<1, 1, op21_20, op11_8, 1, 0,
+ (outs QPR:$Vd), (ins QPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
+ NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
+ [(set (ResTy QPR:$Vd),
+ (ResTy (IntOp (ResTy QPR:$Vn),
+ (ResTy (NEONvduplane (OpTy DPR_VFP2:$Vm),
+ imm:$lane)))))]> {
+ let isCommutable = 0;
+}
+class N3VQIntSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
+ string OpcodeStr, string Dt,
+ ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
+ : N3VLane16<1, 1, op21_20, op11_8, 1, 0,
+ (outs QPR:$Vd), (ins QPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
+ NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
+ [(set (ResTy QPR:$Vd),
+ (ResTy (IntOp (ResTy QPR:$Vn),
+ (ResTy (NEONvduplane (OpTy DPR_8:$Vm),
+ imm:$lane)))))]> {
+ let isCommutable = 0;
+}
+class N3VQIntSh<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+ Format f, InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
+ : N3V<op24, op23, op21_20, op11_8, 1, op4,
+ (outs QPR:$Vd), (ins QPR:$Vm, QPR:$Vn), f, itin,
+ OpcodeStr, Dt, "$Vd, $Vm, $Vn", "",
+ [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm), (OpTy QPR:$Vn))))]> {
+ let TwoOperandAliasConstraint = "$Vm = $Vd";
+ let isCommutable = 0;
+}
+
+// Multiply-Add/Sub operations: double- and quad-register.
+class N3VDMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType Ty, SDPatternOperator MulOp, SDPatternOperator OpNode>
+ : N3V<op24, op23, op21_20, op11_8, 0, op4,
+ (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
+ [(set DPR:$Vd, (Ty (OpNode DPR:$src1,
+ (Ty (MulOp DPR:$Vn, DPR:$Vm)))))]>;
+
+class N3VDMulOpSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
+ string OpcodeStr, string Dt,
+ ValueType Ty, SDPatternOperator MulOp, SDPatternOperator ShOp>
+ : N3VLane32<0, 1, op21_20, op11_8, 1, 0,
+ (outs DPR:$Vd),
+ (ins DPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
+ NVMulSLFrm, itin,
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd",
+ [(set (Ty DPR:$Vd),
+ (Ty (ShOp (Ty DPR:$src1),
+ (Ty (MulOp DPR:$Vn,
+ (Ty (NEONvduplane (Ty DPR_VFP2:$Vm),
+ imm:$lane)))))))]>;
+class N3VDMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
+ string OpcodeStr, string Dt,
+ ValueType Ty, SDPatternOperator MulOp, SDPatternOperator ShOp>
+ : N3VLane16<0, 1, op21_20, op11_8, 1, 0,
+ (outs DPR:$Vd),
+ (ins DPR:$src1, DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
+ NVMulSLFrm, itin,
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd",
+ [(set (Ty DPR:$Vd),
+ (Ty (ShOp (Ty DPR:$src1),
+ (Ty (MulOp DPR:$Vn,
+ (Ty (NEONvduplane (Ty DPR_8:$Vm),
+ imm:$lane)))))))]>;
+
+class N3VQMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt, ValueType Ty,
+ SDPatternOperator MulOp, SDPatternOperator OpNode>
+ : N3V<op24, op23, op21_20, op11_8, 1, op4,
+ (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), N3RegFrm, itin,
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
+ [(set QPR:$Vd, (Ty (OpNode QPR:$src1,
+ (Ty (MulOp QPR:$Vn, QPR:$Vm)))))]>;
+class N3VQMulOpSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
+ string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy,
+ SDPatternOperator MulOp, SDPatternOperator ShOp>
+ : N3VLane32<1, 1, op21_20, op11_8, 1, 0,
+ (outs QPR:$Vd),
+ (ins QPR:$src1, QPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
+ NVMulSLFrm, itin,
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd",
+ [(set (ResTy QPR:$Vd),
+ (ResTy (ShOp (ResTy QPR:$src1),
+ (ResTy (MulOp QPR:$Vn,
+ (ResTy (NEONvduplane (OpTy DPR_VFP2:$Vm),
+ imm:$lane)))))))]>;
+class N3VQMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
+ string OpcodeStr, string Dt,
+ ValueType ResTy, ValueType OpTy,
+ SDPatternOperator MulOp, SDPatternOperator ShOp>
+ : N3VLane16<1, 1, op21_20, op11_8, 1, 0,
+ (outs QPR:$Vd),
+ (ins QPR:$src1, QPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
+ NVMulSLFrm, itin,
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd",
+ [(set (ResTy QPR:$Vd),
+ (ResTy (ShOp (ResTy QPR:$src1),
+ (ResTy (MulOp QPR:$Vn,
+ (ResTy (NEONvduplane (OpTy DPR_8:$Vm),
+ imm:$lane)))))))]>;
+
+// Neon Intrinsic-Op instructions (VABA): double- and quad-register.
+class N3VDIntOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType Ty, SDPatternOperator IntOp, SDNode OpNode>
+ : N3V<op24, op23, op21_20, op11_8, 0, op4,
+ (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
+ [(set DPR:$Vd, (Ty (OpNode DPR:$src1,
+ (Ty (IntOp (Ty DPR:$Vn), (Ty DPR:$Vm))))))]>;
+class N3VQIntOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType Ty, SDPatternOperator IntOp, SDNode OpNode>
+ : N3V<op24, op23, op21_20, op11_8, 1, op4,
+ (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), N3RegFrm, itin,
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
+ [(set QPR:$Vd, (Ty (OpNode QPR:$src1,
+ (Ty (IntOp (Ty QPR:$Vn), (Ty QPR:$Vm))))))]>;
+
+// Neon 3-argument intrinsics, both double- and quad-register.
+// The destination register is also used as the first source operand register.
+class N3VDInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
+ : N3V<op24, op23, op21_20, op11_8, 0, op4,
+ (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
+ [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$src1),
+ (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>;
+class N3VQInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
+ : N3V<op24, op23, op21_20, op11_8, 1, op4,
+ (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), N3RegFrm, itin,
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
+ [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$src1),
+ (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]>;
+
+// Long Multiply-Add/Sub operations.
+class N3VLMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode>
+ : N3V<op24, op23, op21_20, op11_8, 0, op4,
+ (outs QPR:$Vd), (ins QPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
+ [(set QPR:$Vd, (OpNode (TyQ QPR:$src1),
+ (TyQ (MulOp (TyD DPR:$Vn),
+ (TyD DPR:$Vm)))))]>;
+class N3VLMulOpSL<bit op24, bits<2> op21_20, bits<4> op11_8,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode>
+ : N3VLane32<op24, 1, op21_20, op11_8, 1, 0, (outs QPR:$Vd),
+ (ins QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
+ NVMulSLFrm, itin,
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd",
+ [(set QPR:$Vd,
+ (OpNode (TyQ QPR:$src1),
+ (TyQ (MulOp (TyD DPR:$Vn),
+ (TyD (NEONvduplane (TyD DPR_VFP2:$Vm),
+ imm:$lane))))))]>;
+class N3VLMulOpSL16<bit op24, bits<2> op21_20, bits<4> op11_8,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode>
+ : N3VLane16<op24, 1, op21_20, op11_8, 1, 0, (outs QPR:$Vd),
+ (ins QPR:$src1, DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
+ NVMulSLFrm, itin,
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd",
+ [(set QPR:$Vd,
+ (OpNode (TyQ QPR:$src1),
+ (TyQ (MulOp (TyD DPR:$Vn),
+ (TyD (NEONvduplane (TyD DPR_8:$Vm),
+ imm:$lane))))))]>;
+
+// Long Intrinsic-Op vector operations with explicit extend (VABAL).
+class N3VLIntExtOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType TyQ, ValueType TyD, SDPatternOperator IntOp, SDNode ExtOp,
+ SDNode OpNode>
+ : N3V<op24, op23, op21_20, op11_8, 0, op4,
+ (outs QPR:$Vd), (ins QPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
+ [(set QPR:$Vd, (OpNode (TyQ QPR:$src1),
+ (TyQ (ExtOp (TyD (IntOp (TyD DPR:$Vn),
+ (TyD DPR:$Vm)))))))]>;
+
+// Neon Long 3-argument intrinsic. The destination register is
+// a quad-register and is also used as the first source operand register.
+class N3VLInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType TyQ, ValueType TyD, SDPatternOperator IntOp>
+ : N3V<op24, op23, op21_20, op11_8, 0, op4,
+ (outs QPR:$Vd), (ins QPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
+ [(set QPR:$Vd,
+ (TyQ (IntOp (TyQ QPR:$src1), (TyD DPR:$Vn), (TyD DPR:$Vm))))]>;
+class N3VLInt3SL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
+ string OpcodeStr, string Dt,
+ ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
+ : N3VLane32<op24, 1, op21_20, op11_8, 1, 0,
+ (outs QPR:$Vd),
+ (ins QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
+ NVMulSLFrm, itin,
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd",
+ [(set (ResTy QPR:$Vd),
+ (ResTy (IntOp (ResTy QPR:$src1),
+ (OpTy DPR:$Vn),
+ (OpTy (NEONvduplane (OpTy DPR_VFP2:$Vm),
+ imm:$lane)))))]>;
+class N3VLInt3SL16<bit op24, bits<2> op21_20, bits<4> op11_8,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
+ : N3VLane16<op24, 1, op21_20, op11_8, 1, 0,
+ (outs QPR:$Vd),
+ (ins QPR:$src1, DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
+ NVMulSLFrm, itin,
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd",
+ [(set (ResTy QPR:$Vd),
+ (ResTy (IntOp (ResTy QPR:$src1),
+ (OpTy DPR:$Vn),
+ (OpTy (NEONvduplane (OpTy DPR_8:$Vm),
+ imm:$lane)))))]>;
+
+// Narrowing 3-register intrinsics.
+class N3VNInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+ string OpcodeStr, string Dt, ValueType TyD, ValueType TyQ,
+ SDPatternOperator IntOp, bit Commutable>
+ : N3V<op24, op23, op21_20, op11_8, 0, op4,
+ (outs DPR:$Vd), (ins QPR:$Vn, QPR:$Vm), N3RegFrm, IIC_VBINi4D,
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
+ [(set DPR:$Vd, (TyD (IntOp (TyQ QPR:$Vn), (TyQ QPR:$Vm))))]> {
+ let isCommutable = Commutable;
+}
+
+// Long 3-register operations.
+class N3VL<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType TyQ, ValueType TyD, SDNode OpNode, bit Commutable>
+ : N3V<op24, op23, op21_20, op11_8, 0, op4,
+ (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
+ [(set QPR:$Vd, (TyQ (OpNode (TyD DPR:$Vn), (TyD DPR:$Vm))))]> {
+ let isCommutable = Commutable;
+}
+
+class N3VLSL<bit op24, bits<2> op21_20, bits<4> op11_8,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType TyQ, ValueType TyD, SDNode OpNode>
+ : N3VLane32<op24, 1, op21_20, op11_8, 1, 0,
+ (outs QPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
+ NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
+ [(set QPR:$Vd,
+ (TyQ (OpNode (TyD DPR:$Vn),
+ (TyD (NEONvduplane (TyD DPR_VFP2:$Vm),imm:$lane)))))]>;
+class N3VLSL16<bit op24, bits<2> op21_20, bits<4> op11_8,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType TyQ, ValueType TyD, SDNode OpNode>
+ : N3VLane16<op24, 1, op21_20, op11_8, 1, 0,
+ (outs QPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
+ NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
+ [(set QPR:$Vd,
+ (TyQ (OpNode (TyD DPR:$Vn),
+ (TyD (NEONvduplane (TyD DPR_8:$Vm), imm:$lane)))))]>;
+
+// Long 3-register operations with explicitly extended operands.
+class N3VLExt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType TyQ, ValueType TyD, SDNode OpNode, SDNode ExtOp,
+ bit Commutable>
+ : N3V<op24, op23, op21_20, op11_8, 0, op4,
+ (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
+ [(set QPR:$Vd, (OpNode (TyQ (ExtOp (TyD DPR:$Vn))),
+ (TyQ (ExtOp (TyD DPR:$Vm)))))]> {
+ let isCommutable = Commutable;
+}
+
+// Long 3-register intrinsics with explicit extend (VABDL).
+class N3VLIntExt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType TyQ, ValueType TyD, SDPatternOperator IntOp, SDNode ExtOp,
+ bit Commutable>
+ : N3V<op24, op23, op21_20, op11_8, 0, op4,
+ (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
+ [(set QPR:$Vd, (TyQ (ExtOp (TyD (IntOp (TyD DPR:$Vn),
+ (TyD DPR:$Vm))))))]> {
+ let isCommutable = Commutable;
+}
+
+// Long 3-register intrinsics.
+class N3VLInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType TyQ, ValueType TyD, SDPatternOperator IntOp, bit Commutable>
+ : N3V<op24, op23, op21_20, op11_8, 0, op4,
+ (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
+ [(set QPR:$Vd, (TyQ (IntOp (TyD DPR:$Vn), (TyD DPR:$Vm))))]> {
+ let isCommutable = Commutable;
+}
+
+// Same as above, but not predicated.
+class N3VLIntnp<bits<5> op27_23, bits<2> op21_20, bits<4> op11_8, bit op6,
+ bit op4, InstrItinClass itin, string OpcodeStr,
+ string Dt, ValueType ResTy, ValueType OpTy,
+ SDPatternOperator IntOp, bit Commutable>
+ : N3Vnp<op27_23, op21_20, op11_8, op6, op4,
+ (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, OpcodeStr, Dt,
+ [(set QPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>;
+
+class N3VLIntSL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
+ string OpcodeStr, string Dt,
+ ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
+ : N3VLane32<op24, 1, op21_20, op11_8, 1, 0,
+ (outs QPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
+ NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
+ [(set (ResTy QPR:$Vd),
+ (ResTy (IntOp (OpTy DPR:$Vn),
+ (OpTy (NEONvduplane (OpTy DPR_VFP2:$Vm),
+ imm:$lane)))))]>;
+class N3VLIntSL16<bit op24, bits<2> op21_20, bits<4> op11_8,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
+ : N3VLane16<op24, 1, op21_20, op11_8, 1, 0,
+ (outs QPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
+ NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
+ [(set (ResTy QPR:$Vd),
+ (ResTy (IntOp (OpTy DPR:$Vn),
+ (OpTy (NEONvduplane (OpTy DPR_8:$Vm),
+ imm:$lane)))))]>;
+
+// Wide 3-register operations.
+class N3VW<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+ string OpcodeStr, string Dt, ValueType TyQ, ValueType TyD,
+ SDNode OpNode, SDNode ExtOp, bit Commutable>
+ : N3V<op24, op23, op21_20, op11_8, 0, op4,
+ (outs QPR:$Vd), (ins QPR:$Vn, DPR:$Vm), N3RegFrm, IIC_VSUBiD,
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
+ [(set QPR:$Vd, (OpNode (TyQ QPR:$Vn),
+ (TyQ (ExtOp (TyD DPR:$Vm)))))]> {
+ // All of these have a two-operand InstAlias.
+ let TwoOperandAliasConstraint = "$Vn = $Vd";
+ let isCommutable = Commutable;
+}
+
+// Pairwise long 2-register intrinsics, both double- and quad-register.
+class N2VDPLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
+ bits<2> op17_16, bits<5> op11_7, bit op4,
+ string OpcodeStr, string Dt,
+ ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
+ : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$Vd),
+ (ins DPR:$Vm), IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm", "",
+ [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm))))]>;
+class N2VQPLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
+ bits<2> op17_16, bits<5> op11_7, bit op4,
+ string OpcodeStr, string Dt,
+ ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
+ : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$Vd),
+ (ins QPR:$Vm), IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm", "",
+ [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>;
+
+// Pairwise long 2-register accumulate intrinsics,
+// both double- and quad-register.
+// The destination register is also used as the first source operand register.
+class N2VDPLInt2<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
+ bits<2> op17_16, bits<5> op11_7, bit op4,
+ string OpcodeStr, string Dt,
+ ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
+ : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4,
+ (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vm), IIC_VPALiD,
+ OpcodeStr, Dt, "$Vd, $Vm", "$src1 = $Vd",
+ [(set DPR:$Vd, (ResTy (IntOp (ResTy DPR:$src1), (OpTy DPR:$Vm))))]>;
+class N2VQPLInt2<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
+ bits<2> op17_16, bits<5> op11_7, bit op4,
+ string OpcodeStr, string Dt,
+ ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
+ : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4,
+ (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vm), IIC_VPALiQ,
+ OpcodeStr, Dt, "$Vd, $Vm", "$src1 = $Vd",
+ [(set QPR:$Vd, (ResTy (IntOp (ResTy QPR:$src1), (OpTy QPR:$Vm))))]>;
+
+// Shift by immediate,
+// both double- and quad-register.
+let TwoOperandAliasConstraint = "$Vm = $Vd" in {
+class N2VDSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
+ Format f, InstrItinClass itin, Operand ImmTy,
+ string OpcodeStr, string Dt, ValueType Ty, SDNode OpNode>
+ : N2VImm<op24, op23, op11_8, op7, 0, op4,
+ (outs DPR:$Vd), (ins DPR:$Vm, ImmTy:$SIMM), f, itin,
+ OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "",
+ [(set DPR:$Vd, (Ty (OpNode (Ty DPR:$Vm), (i32 imm:$SIMM))))]>;
+class N2VQSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
+ Format f, InstrItinClass itin, Operand ImmTy,
+ string OpcodeStr, string Dt, ValueType Ty, SDNode OpNode>
+ : N2VImm<op24, op23, op11_8, op7, 1, op4,
+ (outs QPR:$Vd), (ins QPR:$Vm, ImmTy:$SIMM), f, itin,
+ OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "",
+ [(set QPR:$Vd, (Ty (OpNode (Ty QPR:$Vm), (i32 imm:$SIMM))))]>;
+}
+
+// Long shift by immediate.
+class N2VLSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4,
+ string OpcodeStr, string Dt,
+ ValueType ResTy, ValueType OpTy, Operand ImmTy,
+ SDPatternOperator OpNode>
+ : N2VImm<op24, op23, op11_8, op7, op6, op4,
+ (outs QPR:$Vd), (ins DPR:$Vm, ImmTy:$SIMM), N2RegVShLFrm,
+ IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "",
+ [(set QPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vm), ImmTy:$SIMM)))]>;
+
+// Narrow shift by immediate.
+class N2VNSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType ResTy, ValueType OpTy, Operand ImmTy,
+ SDPatternOperator OpNode>
+ : N2VImm<op24, op23, op11_8, op7, op6, op4,
+ (outs DPR:$Vd), (ins QPR:$Vm, ImmTy:$SIMM), N2RegVShRFrm, itin,
+ OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "",
+ [(set DPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vm),
+ (i32 ImmTy:$SIMM))))]>;
+
+// Shift right by immediate and accumulate,
+// both double- and quad-register.
+let TwoOperandAliasConstraint = "$Vm = $Vd" in {
+class N2VDShAdd<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
+ Operand ImmTy, string OpcodeStr, string Dt,
+ ValueType Ty, SDNode ShOp>
+ : N2VImm<op24, op23, op11_8, op7, 0, op4, (outs DPR:$Vd),
+ (ins DPR:$src1, DPR:$Vm, ImmTy:$SIMM), N2RegVShRFrm, IIC_VPALiD,
+ OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd",
+ [(set DPR:$Vd, (Ty (add DPR:$src1,
+ (Ty (ShOp DPR:$Vm, (i32 imm:$SIMM))))))]>;
+class N2VQShAdd<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
+ Operand ImmTy, string OpcodeStr, string Dt,
+ ValueType Ty, SDNode ShOp>
+ : N2VImm<op24, op23, op11_8, op7, 1, op4, (outs QPR:$Vd),
+ (ins QPR:$src1, QPR:$Vm, ImmTy:$SIMM), N2RegVShRFrm, IIC_VPALiD,
+ OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd",
+ [(set QPR:$Vd, (Ty (add QPR:$src1,
+ (Ty (ShOp QPR:$Vm, (i32 imm:$SIMM))))))]>;
+}
+
+// Shift by immediate and insert,
+// both double- and quad-register.
+let TwoOperandAliasConstraint = "$Vm = $Vd" in {
+class N2VDShIns<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
+ Operand ImmTy, Format f, string OpcodeStr, string Dt,
+ ValueType Ty,SDNode ShOp>
+ : N2VImm<op24, op23, op11_8, op7, 0, op4, (outs DPR:$Vd),
+ (ins DPR:$src1, DPR:$Vm, ImmTy:$SIMM), f, IIC_VSHLiD,
+ OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd",
+ [(set DPR:$Vd, (Ty (ShOp DPR:$src1, DPR:$Vm, (i32 imm:$SIMM))))]>;
+class N2VQShIns<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
+ Operand ImmTy, Format f, string OpcodeStr, string Dt,
+ ValueType Ty,SDNode ShOp>
+ : N2VImm<op24, op23, op11_8, op7, 1, op4, (outs QPR:$Vd),
+ (ins QPR:$src1, QPR:$Vm, ImmTy:$SIMM), f, IIC_VSHLiQ,
+ OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd",
+ [(set QPR:$Vd, (Ty (ShOp QPR:$src1, QPR:$Vm, (i32 imm:$SIMM))))]>;
+}
+
+// Convert, with fractional bits immediate,
+// both double- and quad-register.
+class N2VCvtD<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
+ string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy,
+ SDPatternOperator IntOp>
+ : N2VImm<op24, op23, op11_8, op7, 0, op4,
+ (outs DPR:$Vd), (ins DPR:$Vm, neon_vcvt_imm32:$SIMM), NVCVTFrm,
+ IIC_VUNAD, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "",
+ [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm), (i32 imm:$SIMM))))]>;
+class N2VCvtQ<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
+ string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy,
+ SDPatternOperator IntOp>
+ : N2VImm<op24, op23, op11_8, op7, 1, op4,
+ (outs QPR:$Vd), (ins QPR:$Vm, neon_vcvt_imm32:$SIMM), NVCVTFrm,
+ IIC_VUNAQ, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "",
+ [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm), (i32 imm:$SIMM))))]>;
+
+//===----------------------------------------------------------------------===//
+// Multiclasses
+//===----------------------------------------------------------------------===//
+
+// Abbreviations used in multiclass suffixes:
+// Q = quarter int (8 bit) elements
+// H = half int (16 bit) elements
+// S = single int (32 bit) elements
+// D = double int (64 bit) elements
+
+// Neon 2-register vector operations and intrinsics.
+
+// Neon 2-register comparisons.
+// source operand element sizes of 8, 16 and 32 bits:
+multiclass N2V_QHS_cmp<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
+ bits<5> op11_7, bit op4, string opc, string Dt,
+ string asm, SDNode OpNode> {
+ // 64-bit vector types.
+ def v8i8 : N2V<op24_23, op21_20, 0b00, op17_16, op11_7, 0, op4,
+ (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary,
+ opc, !strconcat(Dt, "8"), asm, "",
+ [(set DPR:$Vd, (v8i8 (OpNode (v8i8 DPR:$Vm))))]>;
+ def v4i16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 0, op4,
+ (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary,
+ opc, !strconcat(Dt, "16"), asm, "",
+ [(set DPR:$Vd, (v4i16 (OpNode (v4i16 DPR:$Vm))))]>;
+ def v2i32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 0, op4,
+ (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary,
+ opc, !strconcat(Dt, "32"), asm, "",
+ [(set DPR:$Vd, (v2i32 (OpNode (v2i32 DPR:$Vm))))]>;
+ def v2f32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 0, op4,
+ (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary,
+ opc, "f32", asm, "",
+ [(set DPR:$Vd, (v2i32 (OpNode (v2f32 DPR:$Vm))))]> {
+ let Inst{10} = 1; // overwrite F = 1
+ }
+ def v4f16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 0, op4,
+ (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary,
+ opc, "f16", asm, "",
+ [(set DPR:$Vd, (v4i16 (OpNode (v4f16 DPR:$Vm))))]>,
+ Requires<[HasNEON,HasFullFP16]> {
+ let Inst{10} = 1; // overwrite F = 1
+ }
+
+ // 128-bit vector types.
+ def v16i8 : N2V<op24_23, op21_20, 0b00, op17_16, op11_7, 1, op4,
+ (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary,
+ opc, !strconcat(Dt, "8"), asm, "",
+ [(set QPR:$Vd, (v16i8 (OpNode (v16i8 QPR:$Vm))))]>;
+ def v8i16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 1, op4,
+ (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary,
+ opc, !strconcat(Dt, "16"), asm, "",
+ [(set QPR:$Vd, (v8i16 (OpNode (v8i16 QPR:$Vm))))]>;
+ def v4i32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 1, op4,
+ (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary,
+ opc, !strconcat(Dt, "32"), asm, "",
+ [(set QPR:$Vd, (v4i32 (OpNode (v4i32 QPR:$Vm))))]>;
+ def v4f32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 1, op4,
+ (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary,
+ opc, "f32", asm, "",
+ [(set QPR:$Vd, (v4i32 (OpNode (v4f32 QPR:$Vm))))]> {
+ let Inst{10} = 1; // overwrite F = 1
+ }
+ def v8f16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 1, op4,
+ (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary,
+ opc, "f16", asm, "",
+ [(set QPR:$Vd, (v8i16 (OpNode (v8f16 QPR:$Vm))))]>,
+ Requires<[HasNEON,HasFullFP16]> {
+ let Inst{10} = 1; // overwrite F = 1
+ }
+}
+
+
+// Neon 2-register vector intrinsics,
+// element sizes of 8, 16 and 32 bits:
+multiclass N2VInt_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
+ bits<5> op11_7, bit op4,
+ InstrItinClass itinD, InstrItinClass itinQ,
+ string OpcodeStr, string Dt, SDPatternOperator IntOp> {
+ // 64-bit vector types.
+ def v8i8 : N2VDInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
+ itinD, OpcodeStr, !strconcat(Dt, "8"), v8i8, v8i8, IntOp>;
+ def v4i16 : N2VDInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
+ itinD, OpcodeStr, !strconcat(Dt, "16"),v4i16,v4i16,IntOp>;
+ def v2i32 : N2VDInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
+ itinD, OpcodeStr, !strconcat(Dt, "32"),v2i32,v2i32,IntOp>;
+
+ // 128-bit vector types.
+ def v16i8 : N2VQInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
+ itinQ, OpcodeStr, !strconcat(Dt, "8"), v16i8,v16i8,IntOp>;
+ def v8i16 : N2VQInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
+ itinQ, OpcodeStr, !strconcat(Dt, "16"),v8i16,v8i16,IntOp>;
+ def v4i32 : N2VQInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
+ itinQ, OpcodeStr, !strconcat(Dt, "32"),v4i32,v4i32,IntOp>;
+}
+
+
+// Neon Narrowing 2-register vector operations,
+// source operand element sizes of 16, 32 and 64 bits:
+multiclass N2VN_HSD<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
+ bits<5> op11_7, bit op6, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ SDNode OpNode> {
+ def v8i8 : N2VN<op24_23, op21_20, 0b00, op17_16, op11_7, op6, op4,
+ itin, OpcodeStr, !strconcat(Dt, "16"),
+ v8i8, v8i16, OpNode>;
+ def v4i16 : N2VN<op24_23, op21_20, 0b01, op17_16, op11_7, op6, op4,
+ itin, OpcodeStr, !strconcat(Dt, "32"),
+ v4i16, v4i32, OpNode>;
+ def v2i32 : N2VN<op24_23, op21_20, 0b10, op17_16, op11_7, op6, op4,
+ itin, OpcodeStr, !strconcat(Dt, "64"),
+ v2i32, v2i64, OpNode>;
+}
+
+// Neon Narrowing 2-register vector intrinsics,
+// source operand element sizes of 16, 32 and 64 bits:
+multiclass N2VNInt_HSD<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
+ bits<5> op11_7, bit op6, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ SDPatternOperator IntOp> {
+ def v8i8 : N2VNInt<op24_23, op21_20, 0b00, op17_16, op11_7, op6, op4,
+ itin, OpcodeStr, !strconcat(Dt, "16"),
+ v8i8, v8i16, IntOp>;
+ def v4i16 : N2VNInt<op24_23, op21_20, 0b01, op17_16, op11_7, op6, op4,
+ itin, OpcodeStr, !strconcat(Dt, "32"),
+ v4i16, v4i32, IntOp>;
+ def v2i32 : N2VNInt<op24_23, op21_20, 0b10, op17_16, op11_7, op6, op4,
+ itin, OpcodeStr, !strconcat(Dt, "64"),
+ v2i32, v2i64, IntOp>;
+}
+
+
+// Neon Lengthening 2-register vector intrinsic (currently specific to VMOVL).
+// source operand element sizes of 16, 32 and 64 bits:
+multiclass N2VL_QHS<bits<2> op24_23, bits<5> op11_7, bit op6, bit op4,
+ string OpcodeStr, string Dt, SDNode OpNode> {
+ def v8i16 : N2VL<op24_23, 0b00, 0b10, 0b00, op11_7, op6, op4, IIC_VQUNAiD,
+ OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, OpNode>;
+ def v4i32 : N2VL<op24_23, 0b01, 0b00, 0b00, op11_7, op6, op4, IIC_VQUNAiD,
+ OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, OpNode>;
+ def v2i64 : N2VL<op24_23, 0b10, 0b00, 0b00, op11_7, op6, op4, IIC_VQUNAiD,
+ OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, OpNode>;
+}
+
+
+// Neon 3-register vector operations.
+
+// First with only element sizes of 8, 16 and 32 bits:
+multiclass N3V_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
+ InstrItinClass itinD16, InstrItinClass itinD32,
+ InstrItinClass itinQ16, InstrItinClass itinQ32,
+ string OpcodeStr, string Dt,
+ SDNode OpNode, bit Commutable = 0> {
+ // 64-bit vector types.
+ def v8i8 : N3VD<op24, op23, 0b00, op11_8, op4, itinD16,
+ OpcodeStr, !strconcat(Dt, "8"),
+ v8i8, v8i8, OpNode, Commutable>;
+ def v4i16 : N3VD<op24, op23, 0b01, op11_8, op4, itinD16,
+ OpcodeStr, !strconcat(Dt, "16"),
+ v4i16, v4i16, OpNode, Commutable>;
+ def v2i32 : N3VD<op24, op23, 0b10, op11_8, op4, itinD32,
+ OpcodeStr, !strconcat(Dt, "32"),
+ v2i32, v2i32, OpNode, Commutable>;
+
+ // 128-bit vector types.
+ def v16i8 : N3VQ<op24, op23, 0b00, op11_8, op4, itinQ16,
+ OpcodeStr, !strconcat(Dt, "8"),
+ v16i8, v16i8, OpNode, Commutable>;
+ def v8i16 : N3VQ<op24, op23, 0b01, op11_8, op4, itinQ16,
+ OpcodeStr, !strconcat(Dt, "16"),
+ v8i16, v8i16, OpNode, Commutable>;
+ def v4i32 : N3VQ<op24, op23, 0b10, op11_8, op4, itinQ32,
+ OpcodeStr, !strconcat(Dt, "32"),
+ v4i32, v4i32, OpNode, Commutable>;
+}
+
+multiclass N3VSL_HS<bits<4> op11_8, string OpcodeStr, SDNode ShOp> {
+ def v4i16 : N3VDSL16<0b01, op11_8, OpcodeStr, "i16", v4i16, ShOp>;
+ def v2i32 : N3VDSL<0b10, op11_8, IIC_VMULi32D, OpcodeStr, "i32", v2i32, ShOp>;
+ def v8i16 : N3VQSL16<0b01, op11_8, OpcodeStr, "i16", v8i16, v4i16, ShOp>;
+ def v4i32 : N3VQSL<0b10, op11_8, IIC_VMULi32Q, OpcodeStr, "i32",
+ v4i32, v2i32, ShOp>;
+}
+
+// ....then also with element size 64 bits:
+multiclass N3V_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
+ InstrItinClass itinD, InstrItinClass itinQ,
+ string OpcodeStr, string Dt,
+ SDNode OpNode, bit Commutable = 0>
+ : N3V_QHS<op24, op23, op11_8, op4, itinD, itinD, itinQ, itinQ,
+ OpcodeStr, Dt, OpNode, Commutable> {
+ def v1i64 : N3VD<op24, op23, 0b11, op11_8, op4, itinD,
+ OpcodeStr, !strconcat(Dt, "64"),
+ v1i64, v1i64, OpNode, Commutable>;
+ def v2i64 : N3VQ<op24, op23, 0b11, op11_8, op4, itinQ,
+ OpcodeStr, !strconcat(Dt, "64"),
+ v2i64, v2i64, OpNode, Commutable>;
+}
+
+
+// Neon 3-register vector intrinsics.
+
+// First with only element sizes of 16 and 32 bits:
+multiclass N3VInt_HS<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
+ InstrItinClass itinD16, InstrItinClass itinD32,
+ InstrItinClass itinQ16, InstrItinClass itinQ32,
+ string OpcodeStr, string Dt,
+ SDPatternOperator IntOp, bit Commutable = 0> {
+ // 64-bit vector types.
+ def v4i16 : N3VDInt<op24, op23, 0b01, op11_8, op4, f, itinD16,
+ OpcodeStr, !strconcat(Dt, "16"),
+ v4i16, v4i16, IntOp, Commutable>;
+ def v2i32 : N3VDInt<op24, op23, 0b10, op11_8, op4, f, itinD32,
+ OpcodeStr, !strconcat(Dt, "32"),
+ v2i32, v2i32, IntOp, Commutable>;
+
+ // 128-bit vector types.
+ def v8i16 : N3VQInt<op24, op23, 0b01, op11_8, op4, f, itinQ16,
+ OpcodeStr, !strconcat(Dt, "16"),
+ v8i16, v8i16, IntOp, Commutable>;
+ def v4i32 : N3VQInt<op24, op23, 0b10, op11_8, op4, f, itinQ32,
+ OpcodeStr, !strconcat(Dt, "32"),
+ v4i32, v4i32, IntOp, Commutable>;
+}
+multiclass N3VInt_HSSh<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
+ InstrItinClass itinD16, InstrItinClass itinD32,
+ InstrItinClass itinQ16, InstrItinClass itinQ32,
+ string OpcodeStr, string Dt,
+ SDPatternOperator IntOp> {
+ // 64-bit vector types.
+ def v4i16 : N3VDIntSh<op24, op23, 0b01, op11_8, op4, f, itinD16,
+ OpcodeStr, !strconcat(Dt, "16"),
+ v4i16, v4i16, IntOp>;
+ def v2i32 : N3VDIntSh<op24, op23, 0b10, op11_8, op4, f, itinD32,
+ OpcodeStr, !strconcat(Dt, "32"),
+ v2i32, v2i32, IntOp>;
+
+ // 128-bit vector types.
+ def v8i16 : N3VQIntSh<op24, op23, 0b01, op11_8, op4, f, itinQ16,
+ OpcodeStr, !strconcat(Dt, "16"),
+ v8i16, v8i16, IntOp>;
+ def v4i32 : N3VQIntSh<op24, op23, 0b10, op11_8, op4, f, itinQ32,
+ OpcodeStr, !strconcat(Dt, "32"),
+ v4i32, v4i32, IntOp>;
+}
+
+multiclass N3VIntSL_HS<bits<4> op11_8,
+ InstrItinClass itinD16, InstrItinClass itinD32,
+ InstrItinClass itinQ16, InstrItinClass itinQ32,
+ string OpcodeStr, string Dt, SDPatternOperator IntOp> {
+ def v4i16 : N3VDIntSL16<0b01, op11_8, itinD16,
+ OpcodeStr, !strconcat(Dt, "16"), v4i16, IntOp>;
+ def v2i32 : N3VDIntSL<0b10, op11_8, itinD32,
+ OpcodeStr, !strconcat(Dt, "32"), v2i32, IntOp>;
+ def v8i16 : N3VQIntSL16<0b01, op11_8, itinQ16,
+ OpcodeStr, !strconcat(Dt, "16"), v8i16, v4i16, IntOp>;
+ def v4i32 : N3VQIntSL<0b10, op11_8, itinQ32,
+ OpcodeStr, !strconcat(Dt, "32"), v4i32, v2i32, IntOp>;
+}
+
+// ....then also with element size of 8 bits:
+multiclass N3VInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
+ InstrItinClass itinD16, InstrItinClass itinD32,
+ InstrItinClass itinQ16, InstrItinClass itinQ32,
+ string OpcodeStr, string Dt,
+ SDPatternOperator IntOp, bit Commutable = 0>
+ : N3VInt_HS<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32,
+ OpcodeStr, Dt, IntOp, Commutable> {
+ def v8i8 : N3VDInt<op24, op23, 0b00, op11_8, op4, f, itinD16,
+ OpcodeStr, !strconcat(Dt, "8"),
+ v8i8, v8i8, IntOp, Commutable>;
+ def v16i8 : N3VQInt<op24, op23, 0b00, op11_8, op4, f, itinQ16,
+ OpcodeStr, !strconcat(Dt, "8"),
+ v16i8, v16i8, IntOp, Commutable>;
+}
+multiclass N3VInt_QHSSh<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
+ InstrItinClass itinD16, InstrItinClass itinD32,
+ InstrItinClass itinQ16, InstrItinClass itinQ32,
+ string OpcodeStr, string Dt,
+ SDPatternOperator IntOp>
+ : N3VInt_HSSh<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32,
+ OpcodeStr, Dt, IntOp> {
+ def v8i8 : N3VDIntSh<op24, op23, 0b00, op11_8, op4, f, itinD16,
+ OpcodeStr, !strconcat(Dt, "8"),
+ v8i8, v8i8, IntOp>;
+ def v16i8 : N3VQIntSh<op24, op23, 0b00, op11_8, op4, f, itinQ16,
+ OpcodeStr, !strconcat(Dt, "8"),
+ v16i8, v16i8, IntOp>;
+}
+
+
+// ....then also with element size of 64 bits:
+multiclass N3VInt_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
+ InstrItinClass itinD16, InstrItinClass itinD32,
+ InstrItinClass itinQ16, InstrItinClass itinQ32,
+ string OpcodeStr, string Dt,
+ SDPatternOperator IntOp, bit Commutable = 0>
+ : N3VInt_QHS<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32,
+ OpcodeStr, Dt, IntOp, Commutable> {
+ def v1i64 : N3VDInt<op24, op23, 0b11, op11_8, op4, f, itinD32,
+ OpcodeStr, !strconcat(Dt, "64"),
+ v1i64, v1i64, IntOp, Commutable>;
+ def v2i64 : N3VQInt<op24, op23, 0b11, op11_8, op4, f, itinQ32,
+ OpcodeStr, !strconcat(Dt, "64"),
+ v2i64, v2i64, IntOp, Commutable>;
+}
+multiclass N3VInt_QHSDSh<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
+ InstrItinClass itinD16, InstrItinClass itinD32,
+ InstrItinClass itinQ16, InstrItinClass itinQ32,
+ string OpcodeStr, string Dt,
+ SDPatternOperator IntOp>
+ : N3VInt_QHSSh<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32,
+ OpcodeStr, Dt, IntOp> {
+ def v1i64 : N3VDIntSh<op24, op23, 0b11, op11_8, op4, f, itinD32,
+ OpcodeStr, !strconcat(Dt, "64"),
+ v1i64, v1i64, IntOp>;
+ def v2i64 : N3VQIntSh<op24, op23, 0b11, op11_8, op4, f, itinQ32,
+ OpcodeStr, !strconcat(Dt, "64"),
+ v2i64, v2i64, IntOp>;
+}
+
+// Neon Narrowing 3-register vector intrinsics,
+// source operand element sizes of 16, 32 and 64 bits:
+multiclass N3VNInt_HSD<bit op24, bit op23, bits<4> op11_8, bit op4,
+ string OpcodeStr, string Dt,
+ SDPatternOperator IntOp, bit Commutable = 0> {
+ def v8i8 : N3VNInt<op24, op23, 0b00, op11_8, op4,
+ OpcodeStr, !strconcat(Dt, "16"),
+ v8i8, v8i16, IntOp, Commutable>;
+ def v4i16 : N3VNInt<op24, op23, 0b01, op11_8, op4,
+ OpcodeStr, !strconcat(Dt, "32"),
+ v4i16, v4i32, IntOp, Commutable>;
+ def v2i32 : N3VNInt<op24, op23, 0b10, op11_8, op4,
+ OpcodeStr, !strconcat(Dt, "64"),
+ v2i32, v2i64, IntOp, Commutable>;
+}
+
+
+// Neon Long 3-register vector operations.
+
+multiclass N3VL_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
+ InstrItinClass itin16, InstrItinClass itin32,
+ string OpcodeStr, string Dt,
+ SDNode OpNode, bit Commutable = 0> {
+ def v8i16 : N3VL<op24, op23, 0b00, op11_8, op4, itin16,
+ OpcodeStr, !strconcat(Dt, "8"),
+ v8i16, v8i8, OpNode, Commutable>;
+ def v4i32 : N3VL<op24, op23, 0b01, op11_8, op4, itin16,
+ OpcodeStr, !strconcat(Dt, "16"),
+ v4i32, v4i16, OpNode, Commutable>;
+ def v2i64 : N3VL<op24, op23, 0b10, op11_8, op4, itin32,
+ OpcodeStr, !strconcat(Dt, "32"),
+ v2i64, v2i32, OpNode, Commutable>;
+}
+
+multiclass N3VLSL_HS<bit op24, bits<4> op11_8,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ SDNode OpNode> {
+ def v4i16 : N3VLSL16<op24, 0b01, op11_8, itin, OpcodeStr,
+ !strconcat(Dt, "16"), v4i32, v4i16, OpNode>;
+ def v2i32 : N3VLSL<op24, 0b10, op11_8, itin, OpcodeStr,
+ !strconcat(Dt, "32"), v2i64, v2i32, OpNode>;
+}
+
+multiclass N3VLExt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
+ InstrItinClass itin16, InstrItinClass itin32,
+ string OpcodeStr, string Dt,
+ SDNode OpNode, SDNode ExtOp, bit Commutable = 0> {
+ def v8i16 : N3VLExt<op24, op23, 0b00, op11_8, op4, itin16,
+ OpcodeStr, !strconcat(Dt, "8"),
+ v8i16, v8i8, OpNode, ExtOp, Commutable>;
+ def v4i32 : N3VLExt<op24, op23, 0b01, op11_8, op4, itin16,
+ OpcodeStr, !strconcat(Dt, "16"),
+ v4i32, v4i16, OpNode, ExtOp, Commutable>;
+ def v2i64 : N3VLExt<op24, op23, 0b10, op11_8, op4, itin32,
+ OpcodeStr, !strconcat(Dt, "32"),
+ v2i64, v2i32, OpNode, ExtOp, Commutable>;
+}
+
+// Neon Long 3-register vector intrinsics.
+
+// First with only element sizes of 16 and 32 bits:
+multiclass N3VLInt_HS<bit op24, bit op23, bits<4> op11_8, bit op4,
+ InstrItinClass itin16, InstrItinClass itin32,
+ string OpcodeStr, string Dt,
+ SDPatternOperator IntOp, bit Commutable = 0> {
+ def v4i32 : N3VLInt<op24, op23, 0b01, op11_8, op4, itin16,
+ OpcodeStr, !strconcat(Dt, "16"),
+ v4i32, v4i16, IntOp, Commutable>;
+ def v2i64 : N3VLInt<op24, op23, 0b10, op11_8, op4, itin32,
+ OpcodeStr, !strconcat(Dt, "32"),
+ v2i64, v2i32, IntOp, Commutable>;
+}
+
+multiclass N3VLIntSL_HS<bit op24, bits<4> op11_8,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ SDPatternOperator IntOp> {
+ def v4i16 : N3VLIntSL16<op24, 0b01, op11_8, itin,
+ OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, IntOp>;
+ def v2i32 : N3VLIntSL<op24, 0b10, op11_8, itin,
+ OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>;
+}
+
+// ....then also with element size of 8 bits:
+multiclass N3VLInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
+ InstrItinClass itin16, InstrItinClass itin32,
+ string OpcodeStr, string Dt,
+ SDPatternOperator IntOp, bit Commutable = 0>
+ : N3VLInt_HS<op24, op23, op11_8, op4, itin16, itin32, OpcodeStr, Dt,
+ IntOp, Commutable> {
+ def v8i16 : N3VLInt<op24, op23, 0b00, op11_8, op4, itin16,
+ OpcodeStr, !strconcat(Dt, "8"),
+ v8i16, v8i8, IntOp, Commutable>;
+}
+
+// ....with explicit extend (VABDL).
+multiclass N3VLIntExt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ SDPatternOperator IntOp, SDNode ExtOp, bit Commutable = 0> {
+ def v8i16 : N3VLIntExt<op24, op23, 0b00, op11_8, op4, itin,
+ OpcodeStr, !strconcat(Dt, "8"),
+ v8i16, v8i8, IntOp, ExtOp, Commutable>;
+ def v4i32 : N3VLIntExt<op24, op23, 0b01, op11_8, op4, itin,
+ OpcodeStr, !strconcat(Dt, "16"),
+ v4i32, v4i16, IntOp, ExtOp, Commutable>;
+ def v2i64 : N3VLIntExt<op24, op23, 0b10, op11_8, op4, itin,
+ OpcodeStr, !strconcat(Dt, "32"),
+ v2i64, v2i32, IntOp, ExtOp, Commutable>;
+}
+
+
+// Neon Wide 3-register vector intrinsics,
+// source operand element sizes of 8, 16 and 32 bits:
+multiclass N3VW_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
+ string OpcodeStr, string Dt,
+ SDNode OpNode, SDNode ExtOp, bit Commutable = 0> {
+ def v8i16 : N3VW<op24, op23, 0b00, op11_8, op4,
+ OpcodeStr, !strconcat(Dt, "8"),
+ v8i16, v8i8, OpNode, ExtOp, Commutable>;
+ def v4i32 : N3VW<op24, op23, 0b01, op11_8, op4,
+ OpcodeStr, !strconcat(Dt, "16"),
+ v4i32, v4i16, OpNode, ExtOp, Commutable>;
+ def v2i64 : N3VW<op24, op23, 0b10, op11_8, op4,
+ OpcodeStr, !strconcat(Dt, "32"),
+ v2i64, v2i32, OpNode, ExtOp, Commutable>;
+}
+
+
+// Neon Multiply-Op vector operations,
+// element sizes of 8, 16 and 32 bits:
+multiclass N3VMulOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
+ InstrItinClass itinD16, InstrItinClass itinD32,
+ InstrItinClass itinQ16, InstrItinClass itinQ32,
+ string OpcodeStr, string Dt, SDNode OpNode> {
+ // 64-bit vector types.
+ def v8i8 : N3VDMulOp<op24, op23, 0b00, op11_8, op4, itinD16,
+ OpcodeStr, !strconcat(Dt, "8"), v8i8, mul, OpNode>;
+ def v4i16 : N3VDMulOp<op24, op23, 0b01, op11_8, op4, itinD16,
+ OpcodeStr, !strconcat(Dt, "16"), v4i16, mul, OpNode>;
+ def v2i32 : N3VDMulOp<op24, op23, 0b10, op11_8, op4, itinD32,
+ OpcodeStr, !strconcat(Dt, "32"), v2i32, mul, OpNode>;
+
+ // 128-bit vector types.
+ def v16i8 : N3VQMulOp<op24, op23, 0b00, op11_8, op4, itinQ16,
+ OpcodeStr, !strconcat(Dt, "8"), v16i8, mul, OpNode>;
+ def v8i16 : N3VQMulOp<op24, op23, 0b01, op11_8, op4, itinQ16,
+ OpcodeStr, !strconcat(Dt, "16"), v8i16, mul, OpNode>;
+ def v4i32 : N3VQMulOp<op24, op23, 0b10, op11_8, op4, itinQ32,
+ OpcodeStr, !strconcat(Dt, "32"), v4i32, mul, OpNode>;
+}
+
+multiclass N3VMulOpSL_HS<bits<4> op11_8,
+ InstrItinClass itinD16, InstrItinClass itinD32,
+ InstrItinClass itinQ16, InstrItinClass itinQ32,
+ string OpcodeStr, string Dt, SDPatternOperator ShOp> {
+ def v4i16 : N3VDMulOpSL16<0b01, op11_8, itinD16,
+ OpcodeStr, !strconcat(Dt, "16"), v4i16, mul, ShOp>;
+ def v2i32 : N3VDMulOpSL<0b10, op11_8, itinD32,
+ OpcodeStr, !strconcat(Dt, "32"), v2i32, mul, ShOp>;
+ def v8i16 : N3VQMulOpSL16<0b01, op11_8, itinQ16,
+ OpcodeStr, !strconcat(Dt, "16"), v8i16, v4i16,
+ mul, ShOp>;
+ def v4i32 : N3VQMulOpSL<0b10, op11_8, itinQ32,
+ OpcodeStr, !strconcat(Dt, "32"), v4i32, v2i32,
+ mul, ShOp>;
+}
+
+// Neon Intrinsic-Op vector operations,
+// element sizes of 8, 16 and 32 bits:
+multiclass N3VIntOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
+ InstrItinClass itinD, InstrItinClass itinQ,
+ string OpcodeStr, string Dt, SDPatternOperator IntOp,
+ SDNode OpNode> {
+ // 64-bit vector types.
+ def v8i8 : N3VDIntOp<op24, op23, 0b00, op11_8, op4, itinD,
+ OpcodeStr, !strconcat(Dt, "8"), v8i8, IntOp, OpNode>;
+ def v4i16 : N3VDIntOp<op24, op23, 0b01, op11_8, op4, itinD,
+ OpcodeStr, !strconcat(Dt, "16"), v4i16, IntOp, OpNode>;
+ def v2i32 : N3VDIntOp<op24, op23, 0b10, op11_8, op4, itinD,
+ OpcodeStr, !strconcat(Dt, "32"), v2i32, IntOp, OpNode>;
+
+ // 128-bit vector types.
+ def v16i8 : N3VQIntOp<op24, op23, 0b00, op11_8, op4, itinQ,
+ OpcodeStr, !strconcat(Dt, "8"), v16i8, IntOp, OpNode>;
+ def v8i16 : N3VQIntOp<op24, op23, 0b01, op11_8, op4, itinQ,
+ OpcodeStr, !strconcat(Dt, "16"), v8i16, IntOp, OpNode>;
+ def v4i32 : N3VQIntOp<op24, op23, 0b10, op11_8, op4, itinQ,
+ OpcodeStr, !strconcat(Dt, "32"), v4i32, IntOp, OpNode>;
+}
+
+// Neon 3-argument intrinsics,
+// element sizes of 16 and 32 bits:
+multiclass N3VInt3_HS<bit op24, bit op23, bits<4> op11_8, bit op4,
+ InstrItinClass itinD16, InstrItinClass itinD32,
+ InstrItinClass itinQ16, InstrItinClass itinQ32,
+ string OpcodeStr, string Dt, SDPatternOperator IntOp> {
+ // 64-bit vector types.
+ def v4i16 : N3VDInt3<op24, op23, 0b01, op11_8, op4, itinD16,
+ OpcodeStr, !strconcat(Dt, "16"), v4i16, v4i16, IntOp>;
+ def v2i32 : N3VDInt3<op24, op23, 0b10, op11_8, op4, itinD32,
+ OpcodeStr, !strconcat(Dt, "32"), v2i32, v2i32, IntOp>;
+
+ // 128-bit vector types.
+ def v8i16 : N3VQInt3<op24, op23, 0b01, op11_8, op4, itinQ16,
+ OpcodeStr, !strconcat(Dt, "16"), v8i16, v8i16, IntOp>;
+ def v4i32 : N3VQInt3<op24, op23, 0b10, op11_8, op4, itinQ32,
+ OpcodeStr, !strconcat(Dt, "32"), v4i32, v4i32, IntOp>;
+}
+
+// element sizes of 8, 16 and 32 bits:
+multiclass N3VInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
+ InstrItinClass itinD16, InstrItinClass itinD32,
+ InstrItinClass itinQ16, InstrItinClass itinQ32,
+ string OpcodeStr, string Dt, SDPatternOperator IntOp>
+ :N3VInt3_HS <op24, op23, op11_8, op4, itinD16, itinD32,
+ itinQ16, itinQ32, OpcodeStr, Dt, IntOp>{
+ // 64-bit vector types.
+ def v8i8 : N3VDInt3<op24, op23, 0b00, op11_8, op4, itinD16,
+ OpcodeStr, !strconcat(Dt, "8"), v8i8, v8i8, IntOp>;
+ // 128-bit vector types.
+ def v16i8 : N3VQInt3<op24, op23, 0b00, op11_8, op4, itinQ16,
+ OpcodeStr, !strconcat(Dt, "8"), v16i8, v16i8, IntOp>;
+}
+
+// Neon Long Multiply-Op vector operations,
+// element sizes of 8, 16 and 32 bits:
+multiclass N3VLMulOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
+ InstrItinClass itin16, InstrItinClass itin32,
+ string OpcodeStr, string Dt, SDNode MulOp,
+ SDNode OpNode> {
+ def v8i16 : N3VLMulOp<op24, op23, 0b00, op11_8, op4, itin16, OpcodeStr,
+ !strconcat(Dt, "8"), v8i16, v8i8, MulOp, OpNode>;
+ def v4i32 : N3VLMulOp<op24, op23, 0b01, op11_8, op4, itin16, OpcodeStr,
+ !strconcat(Dt, "16"), v4i32, v4i16, MulOp, OpNode>;
+ def v2i64 : N3VLMulOp<op24, op23, 0b10, op11_8, op4, itin32, OpcodeStr,
+ !strconcat(Dt, "32"), v2i64, v2i32, MulOp, OpNode>;
+}
+
+multiclass N3VLMulOpSL_HS<bit op24, bits<4> op11_8, string OpcodeStr,
+ string Dt, SDNode MulOp, SDNode OpNode> {
+ def v4i16 : N3VLMulOpSL16<op24, 0b01, op11_8, IIC_VMACi16D, OpcodeStr,
+ !strconcat(Dt,"16"), v4i32, v4i16, MulOp, OpNode>;
+ def v2i32 : N3VLMulOpSL<op24, 0b10, op11_8, IIC_VMACi32D, OpcodeStr,
+ !strconcat(Dt, "32"), v2i64, v2i32, MulOp, OpNode>;
+}
+
+
+// Neon Long 3-argument intrinsics.
+
+// First with only element sizes of 16 and 32 bits:
+multiclass N3VLInt3_HS<bit op24, bit op23, bits<4> op11_8, bit op4,
+ InstrItinClass itin16, InstrItinClass itin32,
+ string OpcodeStr, string Dt, SDPatternOperator IntOp> {
+ def v4i32 : N3VLInt3<op24, op23, 0b01, op11_8, op4, itin16,
+ OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, IntOp>;
+ def v2i64 : N3VLInt3<op24, op23, 0b10, op11_8, op4, itin32,
+ OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>;
+}
+
+multiclass N3VLInt3SL_HS<bit op24, bits<4> op11_8,
+ string OpcodeStr, string Dt, SDPatternOperator IntOp> {
+ def v4i16 : N3VLInt3SL16<op24, 0b01, op11_8, IIC_VMACi16D,
+ OpcodeStr, !strconcat(Dt,"16"), v4i32, v4i16, IntOp>;
+ def v2i32 : N3VLInt3SL<op24, 0b10, op11_8, IIC_VMACi32D,
+ OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>;
+}
+
+// ....then also with element size of 8 bits:
+multiclass N3VLInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
+ InstrItinClass itin16, InstrItinClass itin32,
+ string OpcodeStr, string Dt, SDPatternOperator IntOp>
+ : N3VLInt3_HS<op24, op23, op11_8, op4, itin16, itin32, OpcodeStr, Dt, IntOp> {
+ def v8i16 : N3VLInt3<op24, op23, 0b00, op11_8, op4, itin16,
+ OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, IntOp>;
+}
+
+// ....with explicit extend (VABAL).
+multiclass N3VLIntExtOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ SDPatternOperator IntOp, SDNode ExtOp, SDNode OpNode> {
+ def v8i16 : N3VLIntExtOp<op24, op23, 0b00, op11_8, op4, itin,
+ OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8,
+ IntOp, ExtOp, OpNode>;
+ def v4i32 : N3VLIntExtOp<op24, op23, 0b01, op11_8, op4, itin,
+ OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16,
+ IntOp, ExtOp, OpNode>;
+ def v2i64 : N3VLIntExtOp<op24, op23, 0b10, op11_8, op4, itin,
+ OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32,
+ IntOp, ExtOp, OpNode>;
+}
+
+
+// Neon Pairwise long 2-register intrinsics,
+// element sizes of 8, 16 and 32 bits:
+multiclass N2VPLInt_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
+ bits<5> op11_7, bit op4,
+ string OpcodeStr, string Dt, SDPatternOperator IntOp> {
+ // 64-bit vector types.
+ def v8i8 : N2VDPLInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
+ OpcodeStr, !strconcat(Dt, "8"), v4i16, v8i8, IntOp>;
+ def v4i16 : N2VDPLInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
+ OpcodeStr, !strconcat(Dt, "16"), v2i32, v4i16, IntOp>;
+ def v2i32 : N2VDPLInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
+ OpcodeStr, !strconcat(Dt, "32"), v1i64, v2i32, IntOp>;
+
+ // 128-bit vector types.
+ def v16i8 : N2VQPLInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
+ OpcodeStr, !strconcat(Dt, "8"), v8i16, v16i8, IntOp>;
+ def v8i16 : N2VQPLInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
+ OpcodeStr, !strconcat(Dt, "16"), v4i32, v8i16, IntOp>;
+ def v4i32 : N2VQPLInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
+ OpcodeStr, !strconcat(Dt, "32"), v2i64, v4i32, IntOp>;
+}
+
+
+// Neon Pairwise long 2-register accumulate intrinsics,
+// element sizes of 8, 16 and 32 bits:
+multiclass N2VPLInt2_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
+ bits<5> op11_7, bit op4,
+ string OpcodeStr, string Dt, SDPatternOperator IntOp> {
+ // 64-bit vector types.
+ def v8i8 : N2VDPLInt2<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
+ OpcodeStr, !strconcat(Dt, "8"), v4i16, v8i8, IntOp>;
+ def v4i16 : N2VDPLInt2<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
+ OpcodeStr, !strconcat(Dt, "16"), v2i32, v4i16, IntOp>;
+ def v2i32 : N2VDPLInt2<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
+ OpcodeStr, !strconcat(Dt, "32"), v1i64, v2i32, IntOp>;
+
+ // 128-bit vector types.
+ def v16i8 : N2VQPLInt2<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
+ OpcodeStr, !strconcat(Dt, "8"), v8i16, v16i8, IntOp>;
+ def v8i16 : N2VQPLInt2<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
+ OpcodeStr, !strconcat(Dt, "16"), v4i32, v8i16, IntOp>;
+ def v4i32 : N2VQPLInt2<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
+ OpcodeStr, !strconcat(Dt, "32"), v2i64, v4i32, IntOp>;
+}
+
+
+// Neon 2-register vector shift by immediate,
+// with f of either N2RegVShLFrm or N2RegVShRFrm
+// element sizes of 8, 16, 32 and 64 bits:
+multiclass N2VShL_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ SDNode OpNode> {
+ // 64-bit vector types.
+ def v8i8 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm,
+ OpcodeStr, !strconcat(Dt, "8"), v8i8, OpNode> {
+ let Inst{21-19} = 0b001; // imm6 = 001xxx
+ }
+ def v4i16 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm,
+ OpcodeStr, !strconcat(Dt, "16"), v4i16, OpNode> {
+ let Inst{21-20} = 0b01; // imm6 = 01xxxx
+ }
+ def v2i32 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm,
+ OpcodeStr, !strconcat(Dt, "32"), v2i32, OpNode> {
+ let Inst{21} = 0b1; // imm6 = 1xxxxx
+ }
+ def v1i64 : N2VDSh<op24, op23, op11_8, 1, op4, N2RegVShLFrm, itin, i32imm,
+ OpcodeStr, !strconcat(Dt, "64"), v1i64, OpNode>;
+ // imm6 = xxxxxx
+
+ // 128-bit vector types.
+ def v16i8 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm,
+ OpcodeStr, !strconcat(Dt, "8"), v16i8, OpNode> {
+ let Inst{21-19} = 0b001; // imm6 = 001xxx
+ }
+ def v8i16 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm,
+ OpcodeStr, !strconcat(Dt, "16"), v8i16, OpNode> {
+ let Inst{21-20} = 0b01; // imm6 = 01xxxx
+ }
+ def v4i32 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm,
+ OpcodeStr, !strconcat(Dt, "32"), v4i32, OpNode> {
+ let Inst{21} = 0b1; // imm6 = 1xxxxx
+ }
+ def v2i64 : N2VQSh<op24, op23, op11_8, 1, op4, N2RegVShLFrm, itin, i32imm,
+ OpcodeStr, !strconcat(Dt, "64"), v2i64, OpNode>;
+ // imm6 = xxxxxx
+}
+multiclass N2VShR_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ string baseOpc, SDNode OpNode> {
+ // 64-bit vector types.
+ def v8i8 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm8,
+ OpcodeStr, !strconcat(Dt, "8"), v8i8, OpNode> {
+ let Inst{21-19} = 0b001; // imm6 = 001xxx
+ }
+ def v4i16 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm16,
+ OpcodeStr, !strconcat(Dt, "16"), v4i16, OpNode> {
+ let Inst{21-20} = 0b01; // imm6 = 01xxxx
+ }
+ def v2i32 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm32,
+ OpcodeStr, !strconcat(Dt, "32"), v2i32, OpNode> {
+ let Inst{21} = 0b1; // imm6 = 1xxxxx
+ }
+ def v1i64 : N2VDSh<op24, op23, op11_8, 1, op4, N2RegVShRFrm, itin, shr_imm64,
+ OpcodeStr, !strconcat(Dt, "64"), v1i64, OpNode>;
+ // imm6 = xxxxxx
+
+ // 128-bit vector types.
+ def v16i8 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm8,
+ OpcodeStr, !strconcat(Dt, "8"), v16i8, OpNode> {
+ let Inst{21-19} = 0b001; // imm6 = 001xxx
+ }
+ def v8i16 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm16,
+ OpcodeStr, !strconcat(Dt, "16"), v8i16, OpNode> {
+ let Inst{21-20} = 0b01; // imm6 = 01xxxx
+ }
+ def v4i32 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm32,
+ OpcodeStr, !strconcat(Dt, "32"), v4i32, OpNode> {
+ let Inst{21} = 0b1; // imm6 = 1xxxxx
+ }
+ def v2i64 : N2VQSh<op24, op23, op11_8, 1, op4, N2RegVShRFrm, itin, shr_imm64,
+ OpcodeStr, !strconcat(Dt, "64"), v2i64, OpNode>;
+ // imm6 = xxxxxx
+}
+
+// Neon Shift-Accumulate vector operations,
+// element sizes of 8, 16, 32 and 64 bits:
+multiclass N2VShAdd_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
+ string OpcodeStr, string Dt, SDNode ShOp> {
+ // 64-bit vector types.
+ def v8i8 : N2VDShAdd<op24, op23, op11_8, 0, op4, shr_imm8,
+ OpcodeStr, !strconcat(Dt, "8"), v8i8, ShOp> {
+ let Inst{21-19} = 0b001; // imm6 = 001xxx
+ }
+ def v4i16 : N2VDShAdd<op24, op23, op11_8, 0, op4, shr_imm16,
+ OpcodeStr, !strconcat(Dt, "16"), v4i16, ShOp> {
+ let Inst{21-20} = 0b01; // imm6 = 01xxxx
+ }
+ def v2i32 : N2VDShAdd<op24, op23, op11_8, 0, op4, shr_imm32,
+ OpcodeStr, !strconcat(Dt, "32"), v2i32, ShOp> {
+ let Inst{21} = 0b1; // imm6 = 1xxxxx
+ }
+ def v1i64 : N2VDShAdd<op24, op23, op11_8, 1, op4, shr_imm64,
+ OpcodeStr, !strconcat(Dt, "64"), v1i64, ShOp>;
+ // imm6 = xxxxxx
+
+ // 128-bit vector types.
+ def v16i8 : N2VQShAdd<op24, op23, op11_8, 0, op4, shr_imm8,
+ OpcodeStr, !strconcat(Dt, "8"), v16i8, ShOp> {
+ let Inst{21-19} = 0b001; // imm6 = 001xxx
+ }
+ def v8i16 : N2VQShAdd<op24, op23, op11_8, 0, op4, shr_imm16,
+ OpcodeStr, !strconcat(Dt, "16"), v8i16, ShOp> {
+ let Inst{21-20} = 0b01; // imm6 = 01xxxx
+ }
+ def v4i32 : N2VQShAdd<op24, op23, op11_8, 0, op4, shr_imm32,
+ OpcodeStr, !strconcat(Dt, "32"), v4i32, ShOp> {
+ let Inst{21} = 0b1; // imm6 = 1xxxxx
+ }
+ def v2i64 : N2VQShAdd<op24, op23, op11_8, 1, op4, shr_imm64,
+ OpcodeStr, !strconcat(Dt, "64"), v2i64, ShOp>;
+ // imm6 = xxxxxx
+}
+
+// Neon Shift-Insert vector operations,
+// with f of either N2RegVShLFrm or N2RegVShRFrm
+// element sizes of 8, 16, 32 and 64 bits:
+multiclass N2VShInsL_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
+ string OpcodeStr> {
+ // 64-bit vector types.
+ def v8i8 : N2VDShIns<op24, op23, op11_8, 0, op4, i32imm,
+ N2RegVShLFrm, OpcodeStr, "8", v8i8, NEONvsli> {
+ let Inst{21-19} = 0b001; // imm6 = 001xxx
+ }
+ def v4i16 : N2VDShIns<op24, op23, op11_8, 0, op4, i32imm,
+ N2RegVShLFrm, OpcodeStr, "16", v4i16, NEONvsli> {
+ let Inst{21-20} = 0b01; // imm6 = 01xxxx
+ }
+ def v2i32 : N2VDShIns<op24, op23, op11_8, 0, op4, i32imm,
+ N2RegVShLFrm, OpcodeStr, "32", v2i32, NEONvsli> {
+ let Inst{21} = 0b1; // imm6 = 1xxxxx
+ }
+ def v1i64 : N2VDShIns<op24, op23, op11_8, 1, op4, i32imm,
+ N2RegVShLFrm, OpcodeStr, "64", v1i64, NEONvsli>;
+ // imm6 = xxxxxx
+
+ // 128-bit vector types.
+ def v16i8 : N2VQShIns<op24, op23, op11_8, 0, op4, i32imm,
+ N2RegVShLFrm, OpcodeStr, "8", v16i8, NEONvsli> {
+ let Inst{21-19} = 0b001; // imm6 = 001xxx
+ }
+ def v8i16 : N2VQShIns<op24, op23, op11_8, 0, op4, i32imm,
+ N2RegVShLFrm, OpcodeStr, "16", v8i16, NEONvsli> {
+ let Inst{21-20} = 0b01; // imm6 = 01xxxx
+ }
+ def v4i32 : N2VQShIns<op24, op23, op11_8, 0, op4, i32imm,
+ N2RegVShLFrm, OpcodeStr, "32", v4i32, NEONvsli> {
+ let Inst{21} = 0b1; // imm6 = 1xxxxx
+ }
+ def v2i64 : N2VQShIns<op24, op23, op11_8, 1, op4, i32imm,
+ N2RegVShLFrm, OpcodeStr, "64", v2i64, NEONvsli>;
+ // imm6 = xxxxxx
+}
+multiclass N2VShInsR_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
+ string OpcodeStr> {
+ // 64-bit vector types.
+ def v8i8 : N2VDShIns<op24, op23, op11_8, 0, op4, shr_imm8,
+ N2RegVShRFrm, OpcodeStr, "8", v8i8, NEONvsri> {
+ let Inst{21-19} = 0b001; // imm6 = 001xxx
+ }
+ def v4i16 : N2VDShIns<op24, op23, op11_8, 0, op4, shr_imm16,
+ N2RegVShRFrm, OpcodeStr, "16", v4i16, NEONvsri> {
+ let Inst{21-20} = 0b01; // imm6 = 01xxxx
+ }
+ def v2i32 : N2VDShIns<op24, op23, op11_8, 0, op4, shr_imm32,
+ N2RegVShRFrm, OpcodeStr, "32", v2i32, NEONvsri> {
+ let Inst{21} = 0b1; // imm6 = 1xxxxx
+ }
+ def v1i64 : N2VDShIns<op24, op23, op11_8, 1, op4, shr_imm64,
+ N2RegVShRFrm, OpcodeStr, "64", v1i64, NEONvsri>;
+ // imm6 = xxxxxx
+
+ // 128-bit vector types.
+ def v16i8 : N2VQShIns<op24, op23, op11_8, 0, op4, shr_imm8,
+ N2RegVShRFrm, OpcodeStr, "8", v16i8, NEONvsri> {
+ let Inst{21-19} = 0b001; // imm6 = 001xxx
+ }
+ def v8i16 : N2VQShIns<op24, op23, op11_8, 0, op4, shr_imm16,
+ N2RegVShRFrm, OpcodeStr, "16", v8i16, NEONvsri> {
+ let Inst{21-20} = 0b01; // imm6 = 01xxxx
+ }
+ def v4i32 : N2VQShIns<op24, op23, op11_8, 0, op4, shr_imm32,
+ N2RegVShRFrm, OpcodeStr, "32", v4i32, NEONvsri> {
+ let Inst{21} = 0b1; // imm6 = 1xxxxx
+ }
+ def v2i64 : N2VQShIns<op24, op23, op11_8, 1, op4, shr_imm64,
+ N2RegVShRFrm, OpcodeStr, "64", v2i64, NEONvsri>;
+ // imm6 = xxxxxx
+}
+
+// Neon Shift Long operations,
+// element sizes of 8, 16, 32 bits:
+multiclass N2VLSh_QHS<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6,
+ bit op4, string OpcodeStr, string Dt,
+ SDPatternOperator OpNode> {
+ def v8i16 : N2VLSh<op24, op23, op11_8, op7, op6, op4,
+ OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, imm1_7, OpNode> {
+ let Inst{21-19} = 0b001; // imm6 = 001xxx
+ }
+ def v4i32 : N2VLSh<op24, op23, op11_8, op7, op6, op4,
+ OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, imm1_15, OpNode> {
+ let Inst{21-20} = 0b01; // imm6 = 01xxxx
+ }
+ def v2i64 : N2VLSh<op24, op23, op11_8, op7, op6, op4,
+ OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, imm1_31, OpNode> {
+ let Inst{21} = 0b1; // imm6 = 1xxxxx
+ }
+}
+
+// Neon Shift Narrow operations,
+// element sizes of 16, 32, 64 bits:
+multiclass N2VNSh_HSD<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6,
+ bit op4, InstrItinClass itin, string OpcodeStr, string Dt,
+ SDPatternOperator OpNode> {
+ def v8i8 : N2VNSh<op24, op23, op11_8, op7, op6, op4, itin,
+ OpcodeStr, !strconcat(Dt, "16"),
+ v8i8, v8i16, shr_imm8, OpNode> {
+ let Inst{21-19} = 0b001; // imm6 = 001xxx
+ }
+ def v4i16 : N2VNSh<op24, op23, op11_8, op7, op6, op4, itin,
+ OpcodeStr, !strconcat(Dt, "32"),
+ v4i16, v4i32, shr_imm16, OpNode> {
+ let Inst{21-20} = 0b01; // imm6 = 01xxxx
+ }
+ def v2i32 : N2VNSh<op24, op23, op11_8, op7, op6, op4, itin,
+ OpcodeStr, !strconcat(Dt, "64"),
+ v2i32, v2i64, shr_imm32, OpNode> {
+ let Inst{21} = 0b1; // imm6 = 1xxxxx
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// Instruction Definitions.
+//===----------------------------------------------------------------------===//
+
+// Vector Add Operations.
+
+// VADD : Vector Add (integer and floating-point)
+defm VADD : N3V_QHSD<0, 0, 0b1000, 0, IIC_VBINiD, IIC_VBINiQ, "vadd", "i",
+ add, 1>;
+def VADDfd : N3VD<0, 0, 0b00, 0b1101, 0, IIC_VBIND, "vadd", "f32",
+ v2f32, v2f32, fadd, 1>;
+def VADDfq : N3VQ<0, 0, 0b00, 0b1101, 0, IIC_VBINQ, "vadd", "f32",
+ v4f32, v4f32, fadd, 1>;
+def VADDhd : N3VD<0, 0, 0b01, 0b1101, 0, IIC_VBIND, "vadd", "f16",
+ v4f16, v4f16, fadd, 1>,
+ Requires<[HasNEON,HasFullFP16]>;
+def VADDhq : N3VQ<0, 0, 0b01, 0b1101, 0, IIC_VBINQ, "vadd", "f16",
+ v8f16, v8f16, fadd, 1>,
+ Requires<[HasNEON,HasFullFP16]>;
+// VADDL : Vector Add Long (Q = D + D)
+defm VADDLs : N3VLExt_QHS<0,1,0b0000,0, IIC_VSHLiD, IIC_VSHLiD,
+ "vaddl", "s", add, sext, 1>;
+defm VADDLu : N3VLExt_QHS<1,1,0b0000,0, IIC_VSHLiD, IIC_VSHLiD,
+ "vaddl", "u", add, zext, 1>;
+// VADDW : Vector Add Wide (Q = Q + D)
+defm VADDWs : N3VW_QHS<0,1,0b0001,0, "vaddw", "s", add, sext, 0>;
+defm VADDWu : N3VW_QHS<1,1,0b0001,0, "vaddw", "u", add, zext, 0>;
+// VHADD : Vector Halving Add
+defm VHADDs : N3VInt_QHS<0, 0, 0b0000, 0, N3RegFrm,
+ IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
+ "vhadd", "s", int_arm_neon_vhadds, 1>;
+defm VHADDu : N3VInt_QHS<1, 0, 0b0000, 0, N3RegFrm,
+ IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
+ "vhadd", "u", int_arm_neon_vhaddu, 1>;
+// VRHADD : Vector Rounding Halving Add
+defm VRHADDs : N3VInt_QHS<0, 0, 0b0001, 0, N3RegFrm,
+ IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
+ "vrhadd", "s", int_arm_neon_vrhadds, 1>;
+defm VRHADDu : N3VInt_QHS<1, 0, 0b0001, 0, N3RegFrm,
+ IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
+ "vrhadd", "u", int_arm_neon_vrhaddu, 1>;
+// VQADD : Vector Saturating Add
+defm VQADDs : N3VInt_QHSD<0, 0, 0b0000, 1, N3RegFrm,
+ IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
+ "vqadd", "s", int_arm_neon_vqadds, 1>;
+defm VQADDu : N3VInt_QHSD<1, 0, 0b0000, 1, N3RegFrm,
+ IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
+ "vqadd", "u", int_arm_neon_vqaddu, 1>;
+// VADDHN : Vector Add and Narrow Returning High Half (D = Q + Q)
+defm VADDHN : N3VNInt_HSD<0,1,0b0100,0, "vaddhn", "i", null_frag, 1>;
+// VRADDHN : Vector Rounding Add and Narrow Returning High Half (D = Q + Q)
+defm VRADDHN : N3VNInt_HSD<1,1,0b0100,0, "vraddhn", "i",
+ int_arm_neon_vraddhn, 1>;
+
+def : Pat<(v8i8 (trunc (NEONvshru (add (v8i16 QPR:$Vn), QPR:$Vm), 8))),
+ (VADDHNv8i8 QPR:$Vn, QPR:$Vm)>;
+def : Pat<(v4i16 (trunc (NEONvshru (add (v4i32 QPR:$Vn), QPR:$Vm), 16))),
+ (VADDHNv4i16 QPR:$Vn, QPR:$Vm)>;
+def : Pat<(v2i32 (trunc (NEONvshru (add (v2i64 QPR:$Vn), QPR:$Vm), 32))),
+ (VADDHNv2i32 QPR:$Vn, QPR:$Vm)>;
+
+// Vector Multiply Operations.
+
+// VMUL : Vector Multiply (integer, polynomial and floating-point)
+defm VMUL : N3V_QHS<0, 0, 0b1001, 1, IIC_VMULi16D, IIC_VMULi32D,
+ IIC_VMULi16Q, IIC_VMULi32Q, "vmul", "i", mul, 1>;
+def VMULpd : N3VDInt<1, 0, 0b00, 0b1001, 1, N3RegFrm, IIC_VMULi16D, "vmul",
+ "p8", v8i8, v8i8, int_arm_neon_vmulp, 1>;
+def VMULpq : N3VQInt<1, 0, 0b00, 0b1001, 1, N3RegFrm, IIC_VMULi16Q, "vmul",
+ "p8", v16i8, v16i8, int_arm_neon_vmulp, 1>;
+def VMULfd : N3VD<1, 0, 0b00, 0b1101, 1, IIC_VFMULD, "vmul", "f32",
+ v2f32, v2f32, fmul, 1>;
+def VMULfq : N3VQ<1, 0, 0b00, 0b1101, 1, IIC_VFMULQ, "vmul", "f32",
+ v4f32, v4f32, fmul, 1>;
+def VMULhd : N3VD<1, 0, 0b01, 0b1101, 1, IIC_VFMULD, "vmul", "f16",
+ v4f16, v4f16, fmul, 1>,
+ Requires<[HasNEON,HasFullFP16]>;
+def VMULhq : N3VQ<1, 0, 0b01, 0b1101, 1, IIC_VFMULQ, "vmul", "f16",
+ v8f16, v8f16, fmul, 1>,
+ Requires<[HasNEON,HasFullFP16]>;
+defm VMULsl : N3VSL_HS<0b1000, "vmul", mul>;
+def VMULslfd : N3VDSL<0b10, 0b1001, IIC_VBIND, "vmul", "f32", v2f32, fmul>;
+def VMULslfq : N3VQSL<0b10, 0b1001, IIC_VBINQ, "vmul", "f32", v4f32,
+ v2f32, fmul>;
+def VMULslhd : N3VDSL16<0b01, 0b1001, "vmul", "f16", v4f16, fmul>,
+ Requires<[HasNEON,HasFullFP16]>;
+def VMULslhq : N3VQSL16<0b01, 0b1001, "vmul", "f16", v8f16,
+ v4f16, fmul>,
+ Requires<[HasNEON,HasFullFP16]>;
+
+def : Pat<(v8i16 (mul (v8i16 QPR:$src1),
+ (v8i16 (NEONvduplane (v8i16 QPR:$src2), imm:$lane)))),
+ (v8i16 (VMULslv8i16 (v8i16 QPR:$src1),
+ (v4i16 (EXTRACT_SUBREG QPR:$src2,
+ (DSubReg_i16_reg imm:$lane))),
+ (SubReg_i16_lane imm:$lane)))>;
+def : Pat<(v4i32 (mul (v4i32 QPR:$src1),
+ (v4i32 (NEONvduplane (v4i32 QPR:$src2), imm:$lane)))),
+ (v4i32 (VMULslv4i32 (v4i32 QPR:$src1),
+ (v2i32 (EXTRACT_SUBREG QPR:$src2,
+ (DSubReg_i32_reg imm:$lane))),
+ (SubReg_i32_lane imm:$lane)))>;
+def : Pat<(v4f32 (fmul (v4f32 QPR:$src1),
+ (v4f32 (NEONvduplane (v4f32 QPR:$src2), imm:$lane)))),
+ (v4f32 (VMULslfq (v4f32 QPR:$src1),
+ (v2f32 (EXTRACT_SUBREG QPR:$src2,
+ (DSubReg_i32_reg imm:$lane))),
+ (SubReg_i32_lane imm:$lane)))>;
+
+
+def : Pat<(v2f32 (fmul DPR:$Rn, (NEONvdup (f32 SPR:$Rm)))),
+ (VMULslfd DPR:$Rn,
+ (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$Rm, ssub_0),
+ (i32 0))>;
+def : Pat<(v4f32 (fmul QPR:$Rn, (NEONvdup (f32 SPR:$Rm)))),
+ (VMULslfq QPR:$Rn,
+ (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$Rm, ssub_0),
+ (i32 0))>;
+
+
+// VQDMULH : Vector Saturating Doubling Multiply Returning High Half
+defm VQDMULH : N3VInt_HS<0, 0, 0b1011, 0, N3RegFrm, IIC_VMULi16D, IIC_VMULi32D,
+ IIC_VMULi16Q, IIC_VMULi32Q,
+ "vqdmulh", "s", int_arm_neon_vqdmulh, 1>;
+defm VQDMULHsl: N3VIntSL_HS<0b1100, IIC_VMULi16D, IIC_VMULi32D,
+ IIC_VMULi16Q, IIC_VMULi32Q,
+ "vqdmulh", "s", int_arm_neon_vqdmulh>;
+def : Pat<(v8i16 (int_arm_neon_vqdmulh (v8i16 QPR:$src1),
+ (v8i16 (NEONvduplane (v8i16 QPR:$src2),
+ imm:$lane)))),
+ (v8i16 (VQDMULHslv8i16 (v8i16 QPR:$src1),
+ (v4i16 (EXTRACT_SUBREG QPR:$src2,
+ (DSubReg_i16_reg imm:$lane))),
+ (SubReg_i16_lane imm:$lane)))>;
+def : Pat<(v4i32 (int_arm_neon_vqdmulh (v4i32 QPR:$src1),
+ (v4i32 (NEONvduplane (v4i32 QPR:$src2),
+ imm:$lane)))),
+ (v4i32 (VQDMULHslv4i32 (v4i32 QPR:$src1),
+ (v2i32 (EXTRACT_SUBREG QPR:$src2,
+ (DSubReg_i32_reg imm:$lane))),
+ (SubReg_i32_lane imm:$lane)))>;
+
+// VQRDMULH : Vector Rounding Saturating Doubling Multiply Returning High Half
+defm VQRDMULH : N3VInt_HS<1, 0, 0b1011, 0, N3RegFrm,
+ IIC_VMULi16D,IIC_VMULi32D,IIC_VMULi16Q,IIC_VMULi32Q,
+ "vqrdmulh", "s", int_arm_neon_vqrdmulh, 1>;
+defm VQRDMULHsl : N3VIntSL_HS<0b1101, IIC_VMULi16D, IIC_VMULi32D,
+ IIC_VMULi16Q, IIC_VMULi32Q,
+ "vqrdmulh", "s", int_arm_neon_vqrdmulh>;
+def : Pat<(v8i16 (int_arm_neon_vqrdmulh (v8i16 QPR:$src1),
+ (v8i16 (NEONvduplane (v8i16 QPR:$src2),
+ imm:$lane)))),
+ (v8i16 (VQRDMULHslv8i16 (v8i16 QPR:$src1),
+ (v4i16 (EXTRACT_SUBREG QPR:$src2,
+ (DSubReg_i16_reg imm:$lane))),
+ (SubReg_i16_lane imm:$lane)))>;
+def : Pat<(v4i32 (int_arm_neon_vqrdmulh (v4i32 QPR:$src1),
+ (v4i32 (NEONvduplane (v4i32 QPR:$src2),
+ imm:$lane)))),
+ (v4i32 (VQRDMULHslv4i32 (v4i32 QPR:$src1),
+ (v2i32 (EXTRACT_SUBREG QPR:$src2,
+ (DSubReg_i32_reg imm:$lane))),
+ (SubReg_i32_lane imm:$lane)))>;
+
+// VMULL : Vector Multiply Long (integer and polynomial) (Q = D * D)
+let PostEncoderMethod = "NEONThumb2DataIPostEncoder",
+ DecoderNamespace = "NEONData" in {
+ defm VMULLs : N3VL_QHS<0,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D,
+ "vmull", "s", NEONvmulls, 1>;
+ defm VMULLu : N3VL_QHS<1,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D,
+ "vmull", "u", NEONvmullu, 1>;
+ def VMULLp8 : N3VLInt<0, 1, 0b00, 0b1110, 0, IIC_VMULi16D, "vmull", "p8",
+ v8i16, v8i8, int_arm_neon_vmullp, 1>;
+ def VMULLp64 : N3VLIntnp<0b00101, 0b10, 0b1110, 0, 0, NoItinerary,
+ "vmull", "p64", v2i64, v1i64, int_arm_neon_vmullp, 1>,
+ Requires<[HasV8, HasCrypto]>;
+}
+defm VMULLsls : N3VLSL_HS<0, 0b1010, IIC_VMULi16D, "vmull", "s", NEONvmulls>;
+defm VMULLslu : N3VLSL_HS<1, 0b1010, IIC_VMULi16D, "vmull", "u", NEONvmullu>;
+
+// VQDMULL : Vector Saturating Doubling Multiply Long (Q = D * D)
+defm VQDMULL : N3VLInt_HS<0,1,0b1101,0, IIC_VMULi16D, IIC_VMULi32D,
+ "vqdmull", "s", int_arm_neon_vqdmull, 1>;
+defm VQDMULLsl: N3VLIntSL_HS<0, 0b1011, IIC_VMULi16D,
+ "vqdmull", "s", int_arm_neon_vqdmull>;
+
+// Vector Multiply-Accumulate and Multiply-Subtract Operations.
+
+// VMLA : Vector Multiply Accumulate (integer and floating-point)
+defm VMLA : N3VMulOp_QHS<0, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D,
+ IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>;
+def VMLAfd : N3VDMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACD, "vmla", "f32",
+ v2f32, fmul_su, fadd_mlx>,
+ Requires<[HasNEON, UseFPVMLx, DontUseFusedMAC]>;
+def VMLAfq : N3VQMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACQ, "vmla", "f32",
+ v4f32, fmul_su, fadd_mlx>,
+ Requires<[HasNEON, UseFPVMLx, DontUseFusedMAC]>;
+def VMLAhd : N3VDMulOp<0, 0, 0b01, 0b1101, 1, IIC_VMACD, "vmla", "f16",
+ v4f16, fmul_su, fadd_mlx>,
+ Requires<[HasNEON, HasFullFP16, UseFPVMLx, DontUseFusedMAC]>;
+def VMLAhq : N3VQMulOp<0, 0, 0b01, 0b1101, 1, IIC_VMACQ, "vmla", "f16",
+ v8f16, fmul_su, fadd_mlx>,
+ Requires<[HasNEON, HasFullFP16, UseFPVMLx, DontUseFusedMAC]>;
+defm VMLAsl : N3VMulOpSL_HS<0b0000, IIC_VMACi16D, IIC_VMACi32D,
+ IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>;
+def VMLAslfd : N3VDMulOpSL<0b10, 0b0001, IIC_VMACD, "vmla", "f32",
+ v2f32, fmul_su, fadd_mlx>,
+ Requires<[HasNEON, UseFPVMLx]>;
+def VMLAslfq : N3VQMulOpSL<0b10, 0b0001, IIC_VMACQ, "vmla", "f32",
+ v4f32, v2f32, fmul_su, fadd_mlx>,
+ Requires<[HasNEON, UseFPVMLx]>;
+def VMLAslhd : N3VDMulOpSL16<0b01, 0b0001, IIC_VMACD, "vmla", "f16",
+ v4f16, fmul, fadd>,
+ Requires<[HasNEON, HasFullFP16, UseFPVMLx]>;
+def VMLAslhq : N3VQMulOpSL16<0b01, 0b0001, IIC_VMACQ, "vmla", "f16",
+ v8f16, v4f16, fmul, fadd>,
+ Requires<[HasNEON, HasFullFP16, UseFPVMLx]>;
+
+def : Pat<(v8i16 (add (v8i16 QPR:$src1),
+ (mul (v8i16 QPR:$src2),
+ (v8i16 (NEONvduplane (v8i16 QPR:$src3), imm:$lane))))),
+ (v8i16 (VMLAslv8i16 (v8i16 QPR:$src1), (v8i16 QPR:$src2),
+ (v4i16 (EXTRACT_SUBREG QPR:$src3,
+ (DSubReg_i16_reg imm:$lane))),
+ (SubReg_i16_lane imm:$lane)))>;
+
+def : Pat<(v4i32 (add (v4i32 QPR:$src1),
+ (mul (v4i32 QPR:$src2),
+ (v4i32 (NEONvduplane (v4i32 QPR:$src3), imm:$lane))))),
+ (v4i32 (VMLAslv4i32 (v4i32 QPR:$src1), (v4i32 QPR:$src2),
+ (v2i32 (EXTRACT_SUBREG QPR:$src3,
+ (DSubReg_i32_reg imm:$lane))),
+ (SubReg_i32_lane imm:$lane)))>;
+
+def : Pat<(v4f32 (fadd_mlx (v4f32 QPR:$src1),
+ (fmul_su (v4f32 QPR:$src2),
+ (v4f32 (NEONvduplane (v4f32 QPR:$src3), imm:$lane))))),
+ (v4f32 (VMLAslfq (v4f32 QPR:$src1),
+ (v4f32 QPR:$src2),
+ (v2f32 (EXTRACT_SUBREG QPR:$src3,
+ (DSubReg_i32_reg imm:$lane))),
+ (SubReg_i32_lane imm:$lane)))>,
+ Requires<[HasNEON, UseFPVMLx]>;
+
+// VMLAL : Vector Multiply Accumulate Long (Q += D * D)
+defm VMLALs : N3VLMulOp_QHS<0,1,0b1000,0, IIC_VMACi16D, IIC_VMACi32D,
+ "vmlal", "s", NEONvmulls, add>;
+defm VMLALu : N3VLMulOp_QHS<1,1,0b1000,0, IIC_VMACi16D, IIC_VMACi32D,
+ "vmlal", "u", NEONvmullu, add>;
+
+defm VMLALsls : N3VLMulOpSL_HS<0, 0b0010, "vmlal", "s", NEONvmulls, add>;
+defm VMLALslu : N3VLMulOpSL_HS<1, 0b0010, "vmlal", "u", NEONvmullu, add>;
+
+let Predicates = [HasNEON, HasV8_1a] in {
+ // v8.1a Neon Rounding Double Multiply-Op vector operations,
+ // VQRDMLAH : Vector Saturating Rounding Doubling Multiply Accumulate Long
+ // (Q += D * D)
+ defm VQRDMLAH : N3VInt3_HS<1, 0, 0b1011, 1, IIC_VMACi16D, IIC_VMACi32D,
+ IIC_VMACi16Q, IIC_VMACi32Q, "vqrdmlah", "s",
+ null_frag>;
+ def : Pat<(v4i16 (int_arm_neon_vqadds
+ (v4i16 DPR:$src1),
+ (v4i16 (int_arm_neon_vqrdmulh (v4i16 DPR:$Vn),
+ (v4i16 DPR:$Vm))))),
+ (v4i16 (VQRDMLAHv4i16 DPR:$src1, DPR:$Vn, DPR:$Vm))>;
+ def : Pat<(v2i32 (int_arm_neon_vqadds
+ (v2i32 DPR:$src1),
+ (v2i32 (int_arm_neon_vqrdmulh (v2i32 DPR:$Vn),
+ (v2i32 DPR:$Vm))))),
+ (v2i32 (VQRDMLAHv2i32 DPR:$src1, DPR:$Vn, DPR:$Vm))>;
+ def : Pat<(v8i16 (int_arm_neon_vqadds
+ (v8i16 QPR:$src1),
+ (v8i16 (int_arm_neon_vqrdmulh (v8i16 QPR:$Vn),
+ (v8i16 QPR:$Vm))))),
+ (v8i16 (VQRDMLAHv8i16 QPR:$src1, QPR:$Vn, QPR:$Vm))>;
+ def : Pat<(v4i32 (int_arm_neon_vqadds
+ (v4i32 QPR:$src1),
+ (v4i32 (int_arm_neon_vqrdmulh (v4i32 QPR:$Vn),
+ (v4i32 QPR:$Vm))))),
+ (v4i32 (VQRDMLAHv4i32 QPR:$src1, QPR:$Vn, QPR:$Vm))>;
+
+ defm VQRDMLAHsl : N3VMulOpSL_HS<0b1110, IIC_VMACi16D, IIC_VMACi32D,
+ IIC_VMACi16Q, IIC_VMACi32Q, "vqrdmlah", "s",
+ null_frag>;
+ def : Pat<(v4i16 (int_arm_neon_vqadds
+ (v4i16 DPR:$src1),
+ (v4i16 (int_arm_neon_vqrdmulh
+ (v4i16 DPR:$Vn),
+ (v4i16 (NEONvduplane (v4i16 DPR_8:$Vm),
+ imm:$lane)))))),
+ (v4i16 (VQRDMLAHslv4i16 DPR:$src1, DPR:$Vn, DPR_8:$Vm,
+ imm:$lane))>;
+ def : Pat<(v2i32 (int_arm_neon_vqadds
+ (v2i32 DPR:$src1),
+ (v2i32 (int_arm_neon_vqrdmulh
+ (v2i32 DPR:$Vn),
+ (v2i32 (NEONvduplane (v2i32 DPR_VFP2:$Vm),
+ imm:$lane)))))),
+ (v2i32 (VQRDMLAHslv2i32 DPR:$src1, DPR:$Vn, DPR_VFP2:$Vm,
+ imm:$lane))>;
+ def : Pat<(v8i16 (int_arm_neon_vqadds
+ (v8i16 QPR:$src1),
+ (v8i16 (int_arm_neon_vqrdmulh
+ (v8i16 QPR:$src2),
+ (v8i16 (NEONvduplane (v8i16 QPR:$src3),
+ imm:$lane)))))),
+ (v8i16 (VQRDMLAHslv8i16 (v8i16 QPR:$src1),
+ (v8i16 QPR:$src2),
+ (v4i16 (EXTRACT_SUBREG
+ QPR:$src3,
+ (DSubReg_i16_reg imm:$lane))),
+ (SubReg_i16_lane imm:$lane)))>;
+ def : Pat<(v4i32 (int_arm_neon_vqadds
+ (v4i32 QPR:$src1),
+ (v4i32 (int_arm_neon_vqrdmulh
+ (v4i32 QPR:$src2),
+ (v4i32 (NEONvduplane (v4i32 QPR:$src3),
+ imm:$lane)))))),
+ (v4i32 (VQRDMLAHslv4i32 (v4i32 QPR:$src1),
+ (v4i32 QPR:$src2),
+ (v2i32 (EXTRACT_SUBREG
+ QPR:$src3,
+ (DSubReg_i32_reg imm:$lane))),
+ (SubReg_i32_lane imm:$lane)))>;
+
+ // VQRDMLSH : Vector Saturating Rounding Doubling Multiply Subtract Long
+ // (Q -= D * D)
+ defm VQRDMLSH : N3VInt3_HS<1, 0, 0b1100, 1, IIC_VMACi16D, IIC_VMACi32D,
+ IIC_VMACi16Q, IIC_VMACi32Q, "vqrdmlsh", "s",
+ null_frag>;
+ def : Pat<(v4i16 (int_arm_neon_vqsubs
+ (v4i16 DPR:$src1),
+ (v4i16 (int_arm_neon_vqrdmulh (v4i16 DPR:$Vn),
+ (v4i16 DPR:$Vm))))),
+ (v4i16 (VQRDMLSHv4i16 DPR:$src1, DPR:$Vn, DPR:$Vm))>;
+ def : Pat<(v2i32 (int_arm_neon_vqsubs
+ (v2i32 DPR:$src1),
+ (v2i32 (int_arm_neon_vqrdmulh (v2i32 DPR:$Vn),
+ (v2i32 DPR:$Vm))))),
+ (v2i32 (VQRDMLSHv2i32 DPR:$src1, DPR:$Vn, DPR:$Vm))>;
+ def : Pat<(v8i16 (int_arm_neon_vqsubs
+ (v8i16 QPR:$src1),
+ (v8i16 (int_arm_neon_vqrdmulh (v8i16 QPR:$Vn),
+ (v8i16 QPR:$Vm))))),
+ (v8i16 (VQRDMLSHv8i16 QPR:$src1, QPR:$Vn, QPR:$Vm))>;
+ def : Pat<(v4i32 (int_arm_neon_vqsubs
+ (v4i32 QPR:$src1),
+ (v4i32 (int_arm_neon_vqrdmulh (v4i32 QPR:$Vn),
+ (v4i32 QPR:$Vm))))),
+ (v4i32 (VQRDMLSHv4i32 QPR:$src1, QPR:$Vn, QPR:$Vm))>;
+
+ defm VQRDMLSHsl : N3VMulOpSL_HS<0b1111, IIC_VMACi16D, IIC_VMACi32D,
+ IIC_VMACi16Q, IIC_VMACi32Q, "vqrdmlsh", "s",
+ null_frag>;
+ def : Pat<(v4i16 (int_arm_neon_vqsubs
+ (v4i16 DPR:$src1),
+ (v4i16 (int_arm_neon_vqrdmulh
+ (v4i16 DPR:$Vn),
+ (v4i16 (NEONvduplane (v4i16 DPR_8:$Vm),
+ imm:$lane)))))),
+ (v4i16 (VQRDMLSHslv4i16 DPR:$src1, DPR:$Vn, DPR_8:$Vm, imm:$lane))>;
+ def : Pat<(v2i32 (int_arm_neon_vqsubs
+ (v2i32 DPR:$src1),
+ (v2i32 (int_arm_neon_vqrdmulh
+ (v2i32 DPR:$Vn),
+ (v2i32 (NEONvduplane (v2i32 DPR_VFP2:$Vm),
+ imm:$lane)))))),
+ (v2i32 (VQRDMLSHslv2i32 DPR:$src1, DPR:$Vn, DPR_VFP2:$Vm,
+ imm:$lane))>;
+ def : Pat<(v8i16 (int_arm_neon_vqsubs
+ (v8i16 QPR:$src1),
+ (v8i16 (int_arm_neon_vqrdmulh
+ (v8i16 QPR:$src2),
+ (v8i16 (NEONvduplane (v8i16 QPR:$src3),
+ imm:$lane)))))),
+ (v8i16 (VQRDMLSHslv8i16 (v8i16 QPR:$src1),
+ (v8i16 QPR:$src2),
+ (v4i16 (EXTRACT_SUBREG
+ QPR:$src3,
+ (DSubReg_i16_reg imm:$lane))),
+ (SubReg_i16_lane imm:$lane)))>;
+ def : Pat<(v4i32 (int_arm_neon_vqsubs
+ (v4i32 QPR:$src1),
+ (v4i32 (int_arm_neon_vqrdmulh
+ (v4i32 QPR:$src2),
+ (v4i32 (NEONvduplane (v4i32 QPR:$src3),
+ imm:$lane)))))),
+ (v4i32 (VQRDMLSHslv4i32 (v4i32 QPR:$src1),
+ (v4i32 QPR:$src2),
+ (v2i32 (EXTRACT_SUBREG
+ QPR:$src3,
+ (DSubReg_i32_reg imm:$lane))),
+ (SubReg_i32_lane imm:$lane)))>;
+}
+// VQDMLAL : Vector Saturating Doubling Multiply Accumulate Long (Q += D * D)
+defm VQDMLAL : N3VLInt3_HS<0, 1, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D,
+ "vqdmlal", "s", null_frag>;
+defm VQDMLALsl: N3VLInt3SL_HS<0, 0b0011, "vqdmlal", "s", null_frag>;
+
+def : Pat<(v4i32 (int_arm_neon_vqadds (v4i32 QPR:$src1),
+ (v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn),
+ (v4i16 DPR:$Vm))))),
+ (VQDMLALv4i32 QPR:$src1, DPR:$Vn, DPR:$Vm)>;
+def : Pat<(v2i64 (int_arm_neon_vqadds (v2i64 QPR:$src1),
+ (v2i64 (int_arm_neon_vqdmull (v2i32 DPR:$Vn),
+ (v2i32 DPR:$Vm))))),
+ (VQDMLALv2i64 QPR:$src1, DPR:$Vn, DPR:$Vm)>;
+def : Pat<(v4i32 (int_arm_neon_vqadds (v4i32 QPR:$src1),
+ (v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn),
+ (v4i16 (NEONvduplane (v4i16 DPR_8:$Vm),
+ imm:$lane)))))),
+ (VQDMLALslv4i16 QPR:$src1, DPR:$Vn, DPR_8:$Vm, imm:$lane)>;
+def : Pat<(v2i64 (int_arm_neon_vqadds (v2i64 QPR:$src1),
+ (v2i64 (int_arm_neon_vqdmull (v2i32 DPR:$Vn),
+ (v2i32 (NEONvduplane (v2i32 DPR_VFP2:$Vm),
+ imm:$lane)))))),
+ (VQDMLALslv2i32 QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, imm:$lane)>;
+
+// VMLS : Vector Multiply Subtract (integer and floating-point)
+defm VMLS : N3VMulOp_QHS<1, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D,
+ IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>;
+def VMLSfd : N3VDMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACD, "vmls", "f32",
+ v2f32, fmul_su, fsub_mlx>,
+ Requires<[HasNEON, UseFPVMLx, DontUseFusedMAC]>;
+def VMLSfq : N3VQMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACQ, "vmls", "f32",
+ v4f32, fmul_su, fsub_mlx>,
+ Requires<[HasNEON, UseFPVMLx, DontUseFusedMAC]>;
+def VMLShd : N3VDMulOp<0, 0, 0b11, 0b1101, 1, IIC_VMACD, "vmls", "f16",
+ v4f16, fmul, fsub>,
+ Requires<[HasNEON, HasFullFP16, UseFPVMLx, DontUseFusedMAC]>;
+def VMLShq : N3VQMulOp<0, 0, 0b11, 0b1101, 1, IIC_VMACQ, "vmls", "f16",
+ v8f16, fmul, fsub>,
+ Requires<[HasNEON, HasFullFP16, UseFPVMLx, DontUseFusedMAC]>;
+defm VMLSsl : N3VMulOpSL_HS<0b0100, IIC_VMACi16D, IIC_VMACi32D,
+ IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>;
+def VMLSslfd : N3VDMulOpSL<0b10, 0b0101, IIC_VMACD, "vmls", "f32",
+ v2f32, fmul_su, fsub_mlx>,
+ Requires<[HasNEON, UseFPVMLx]>;
+def VMLSslfq : N3VQMulOpSL<0b10, 0b0101, IIC_VMACQ, "vmls", "f32",
+ v4f32, v2f32, fmul_su, fsub_mlx>,
+ Requires<[HasNEON, UseFPVMLx]>;
+def VMLSslhd : N3VDMulOpSL16<0b01, 0b0101, IIC_VMACD, "vmls", "f16",
+ v4f16, fmul, fsub>,
+ Requires<[HasNEON, HasFullFP16, UseFPVMLx]>;
+def VMLSslhq : N3VQMulOpSL16<0b01, 0b0101, IIC_VMACQ, "vmls", "f16",
+ v8f16, v4f16, fmul, fsub>,
+ Requires<[HasNEON, HasFullFP16, UseFPVMLx]>;
+
+def : Pat<(v8i16 (sub (v8i16 QPR:$src1),
+ (mul (v8i16 QPR:$src2),
+ (v8i16 (NEONvduplane (v8i16 QPR:$src3), imm:$lane))))),
+ (v8i16 (VMLSslv8i16 (v8i16 QPR:$src1), (v8i16 QPR:$src2),
+ (v4i16 (EXTRACT_SUBREG QPR:$src3,
+ (DSubReg_i16_reg imm:$lane))),
+ (SubReg_i16_lane imm:$lane)))>;
+
+def : Pat<(v4i32 (sub (v4i32 QPR:$src1),
+ (mul (v4i32 QPR:$src2),
+ (v4i32 (NEONvduplane (v4i32 QPR:$src3), imm:$lane))))),
+ (v4i32 (VMLSslv4i32 (v4i32 QPR:$src1), (v4i32 QPR:$src2),
+ (v2i32 (EXTRACT_SUBREG QPR:$src3,
+ (DSubReg_i32_reg imm:$lane))),
+ (SubReg_i32_lane imm:$lane)))>;
+
+def : Pat<(v4f32 (fsub_mlx (v4f32 QPR:$src1),
+ (fmul_su (v4f32 QPR:$src2),
+ (v4f32 (NEONvduplane (v4f32 QPR:$src3), imm:$lane))))),
+ (v4f32 (VMLSslfq (v4f32 QPR:$src1), (v4f32 QPR:$src2),
+ (v2f32 (EXTRACT_SUBREG QPR:$src3,
+ (DSubReg_i32_reg imm:$lane))),
+ (SubReg_i32_lane imm:$lane)))>,
+ Requires<[HasNEON, UseFPVMLx]>;
+
+// VMLSL : Vector Multiply Subtract Long (Q -= D * D)
+defm VMLSLs : N3VLMulOp_QHS<0,1,0b1010,0, IIC_VMACi16D, IIC_VMACi32D,
+ "vmlsl", "s", NEONvmulls, sub>;
+defm VMLSLu : N3VLMulOp_QHS<1,1,0b1010,0, IIC_VMACi16D, IIC_VMACi32D,
+ "vmlsl", "u", NEONvmullu, sub>;
+
+defm VMLSLsls : N3VLMulOpSL_HS<0, 0b0110, "vmlsl", "s", NEONvmulls, sub>;
+defm VMLSLslu : N3VLMulOpSL_HS<1, 0b0110, "vmlsl", "u", NEONvmullu, sub>;
+
+// VQDMLSL : Vector Saturating Doubling Multiply Subtract Long (Q -= D * D)
+defm VQDMLSL : N3VLInt3_HS<0, 1, 0b1011, 0, IIC_VMACi16D, IIC_VMACi32D,
+ "vqdmlsl", "s", null_frag>;
+defm VQDMLSLsl: N3VLInt3SL_HS<0, 0b0111, "vqdmlsl", "s", null_frag>;
+
+def : Pat<(v4i32 (int_arm_neon_vqsubs (v4i32 QPR:$src1),
+ (v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn),
+ (v4i16 DPR:$Vm))))),
+ (VQDMLSLv4i32 QPR:$src1, DPR:$Vn, DPR:$Vm)>;
+def : Pat<(v2i64 (int_arm_neon_vqsubs (v2i64 QPR:$src1),
+ (v2i64 (int_arm_neon_vqdmull (v2i32 DPR:$Vn),
+ (v2i32 DPR:$Vm))))),
+ (VQDMLSLv2i64 QPR:$src1, DPR:$Vn, DPR:$Vm)>;
+def : Pat<(v4i32 (int_arm_neon_vqsubs (v4i32 QPR:$src1),
+ (v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn),
+ (v4i16 (NEONvduplane (v4i16 DPR_8:$Vm),
+ imm:$lane)))))),
+ (VQDMLSLslv4i16 QPR:$src1, DPR:$Vn, DPR_8:$Vm, imm:$lane)>;
+def : Pat<(v2i64 (int_arm_neon_vqsubs (v2i64 QPR:$src1),
+ (v2i64 (int_arm_neon_vqdmull (v2i32 DPR:$Vn),
+ (v2i32 (NEONvduplane (v2i32 DPR_VFP2:$Vm),
+ imm:$lane)))))),
+ (VQDMLSLslv2i32 QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, imm:$lane)>;
+
+// Fused Vector Multiply-Accumulate and Fused Multiply-Subtract Operations.
+def VFMAfd : N3VDMulOp<0, 0, 0b00, 0b1100, 1, IIC_VFMACD, "vfma", "f32",
+ v2f32, fmul_su, fadd_mlx>,
+ Requires<[HasNEON,HasVFP4,UseFusedMAC]>;
+
+def VFMAfq : N3VQMulOp<0, 0, 0b00, 0b1100, 1, IIC_VFMACQ, "vfma", "f32",
+ v4f32, fmul_su, fadd_mlx>,
+ Requires<[HasNEON,HasVFP4,UseFusedMAC]>;
+def VFMAhd : N3VDMulOp<0, 0, 0b01, 0b1100, 1, IIC_VFMACD, "vfma", "f16",
+ v4f16, fmul, fadd>,
+ Requires<[HasNEON,HasFullFP16,UseFusedMAC]>;
+
+def VFMAhq : N3VQMulOp<0, 0, 0b01, 0b1100, 1, IIC_VFMACQ, "vfma", "f16",
+ v8f16, fmul, fadd>,
+ Requires<[HasNEON,HasFullFP16,UseFusedMAC]>;
+
+// Fused Vector Multiply Subtract (floating-point)
+def VFMSfd : N3VDMulOp<0, 0, 0b10, 0b1100, 1, IIC_VFMACD, "vfms", "f32",
+ v2f32, fmul_su, fsub_mlx>,
+ Requires<[HasNEON,HasVFP4,UseFusedMAC]>;
+def VFMSfq : N3VQMulOp<0, 0, 0b10, 0b1100, 1, IIC_VFMACQ, "vfms", "f32",
+ v4f32, fmul_su, fsub_mlx>,
+ Requires<[HasNEON,HasVFP4,UseFusedMAC]>;
+def VFMShd : N3VDMulOp<0, 0, 0b11, 0b1100, 1, IIC_VFMACD, "vfms", "f16",
+ v4f16, fmul, fsub>,
+ Requires<[HasNEON,HasFullFP16,UseFusedMAC]>;
+def VFMShq : N3VQMulOp<0, 0, 0b11, 0b1100, 1, IIC_VFMACQ, "vfms", "f16",
+ v8f16, fmul, fsub>,
+ Requires<[HasNEON,HasFullFP16,UseFusedMAC]>;
+
+// Match @llvm.fma.* intrinsics
+def : Pat<(v2f32 (fma DPR:$Vn, DPR:$Vm, DPR:$src1)),
+ (VFMAfd DPR:$src1, DPR:$Vn, DPR:$Vm)>,
+ Requires<[HasVFP4]>;
+def : Pat<(v4f32 (fma QPR:$Vn, QPR:$Vm, QPR:$src1)),
+ (VFMAfq QPR:$src1, QPR:$Vn, QPR:$Vm)>,
+ Requires<[HasVFP4]>;
+def : Pat<(v2f32 (fma (fneg DPR:$Vn), DPR:$Vm, DPR:$src1)),
+ (VFMSfd DPR:$src1, DPR:$Vn, DPR:$Vm)>,
+ Requires<[HasVFP4]>;
+def : Pat<(v4f32 (fma (fneg QPR:$Vn), QPR:$Vm, QPR:$src1)),
+ (VFMSfq QPR:$src1, QPR:$Vn, QPR:$Vm)>,
+ Requires<[HasVFP4]>;
+
+// Vector Subtract Operations.
+
+// VSUB : Vector Subtract (integer and floating-point)
+defm VSUB : N3V_QHSD<1, 0, 0b1000, 0, IIC_VSUBiD, IIC_VSUBiQ,
+ "vsub", "i", sub, 0>;
+def VSUBfd : N3VD<0, 0, 0b10, 0b1101, 0, IIC_VBIND, "vsub", "f32",
+ v2f32, v2f32, fsub, 0>;
+def VSUBfq : N3VQ<0, 0, 0b10, 0b1101, 0, IIC_VBINQ, "vsub", "f32",
+ v4f32, v4f32, fsub, 0>;
+def VSUBhd : N3VD<0, 0, 0b11, 0b1101, 0, IIC_VBIND, "vsub", "f16",
+ v4f16, v4f16, fsub, 0>,
+ Requires<[HasNEON,HasFullFP16]>;
+def VSUBhq : N3VQ<0, 0, 0b11, 0b1101, 0, IIC_VBINQ, "vsub", "f16",
+ v8f16, v8f16, fsub, 0>,
+ Requires<[HasNEON,HasFullFP16]>;
+// VSUBL : Vector Subtract Long (Q = D - D)
+defm VSUBLs : N3VLExt_QHS<0,1,0b0010,0, IIC_VSHLiD, IIC_VSHLiD,
+ "vsubl", "s", sub, sext, 0>;
+defm VSUBLu : N3VLExt_QHS<1,1,0b0010,0, IIC_VSHLiD, IIC_VSHLiD,
+ "vsubl", "u", sub, zext, 0>;
+// VSUBW : Vector Subtract Wide (Q = Q - D)
+defm VSUBWs : N3VW_QHS<0,1,0b0011,0, "vsubw", "s", sub, sext, 0>;
+defm VSUBWu : N3VW_QHS<1,1,0b0011,0, "vsubw", "u", sub, zext, 0>;
+// VHSUB : Vector Halving Subtract
+defm VHSUBs : N3VInt_QHS<0, 0, 0b0010, 0, N3RegFrm,
+ IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
+ "vhsub", "s", int_arm_neon_vhsubs, 0>;
+defm VHSUBu : N3VInt_QHS<1, 0, 0b0010, 0, N3RegFrm,
+ IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
+ "vhsub", "u", int_arm_neon_vhsubu, 0>;
+// VQSUB : Vector Saturing Subtract
+defm VQSUBs : N3VInt_QHSD<0, 0, 0b0010, 1, N3RegFrm,
+ IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
+ "vqsub", "s", int_arm_neon_vqsubs, 0>;
+defm VQSUBu : N3VInt_QHSD<1, 0, 0b0010, 1, N3RegFrm,
+ IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
+ "vqsub", "u", int_arm_neon_vqsubu, 0>;
+// VSUBHN : Vector Subtract and Narrow Returning High Half (D = Q - Q)
+defm VSUBHN : N3VNInt_HSD<0,1,0b0110,0, "vsubhn", "i", null_frag, 0>;
+// VRSUBHN : Vector Rounding Subtract and Narrow Returning High Half (D=Q-Q)
+defm VRSUBHN : N3VNInt_HSD<1,1,0b0110,0, "vrsubhn", "i",
+ int_arm_neon_vrsubhn, 0>;
+
+def : Pat<(v8i8 (trunc (NEONvshru (sub (v8i16 QPR:$Vn), QPR:$Vm), 8))),
+ (VSUBHNv8i8 QPR:$Vn, QPR:$Vm)>;
+def : Pat<(v4i16 (trunc (NEONvshru (sub (v4i32 QPR:$Vn), QPR:$Vm), 16))),
+ (VSUBHNv4i16 QPR:$Vn, QPR:$Vm)>;
+def : Pat<(v2i32 (trunc (NEONvshru (sub (v2i64 QPR:$Vn), QPR:$Vm), 32))),
+ (VSUBHNv2i32 QPR:$Vn, QPR:$Vm)>;
+
+// Vector Comparisons.
+
+// VCEQ : Vector Compare Equal
+defm VCEQ : N3V_QHS<1, 0, 0b1000, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
+ IIC_VSUBi4Q, "vceq", "i", NEONvceq, 1>;
+def VCEQfd : N3VD<0,0,0b00,0b1110,0, IIC_VBIND, "vceq", "f32", v2i32, v2f32,
+ NEONvceq, 1>;
+def VCEQfq : N3VQ<0,0,0b00,0b1110,0, IIC_VBINQ, "vceq", "f32", v4i32, v4f32,
+ NEONvceq, 1>;
+def VCEQhd : N3VD<0,0,0b01,0b1110,0, IIC_VBIND, "vceq", "f16", v4i16, v4f16,
+ NEONvceq, 1>,
+ Requires<[HasNEON, HasFullFP16]>;
+def VCEQhq : N3VQ<0,0,0b01,0b1110,0, IIC_VBINQ, "vceq", "f16", v8i16, v8f16,
+ NEONvceq, 1>,
+ Requires<[HasNEON, HasFullFP16]>;
+
+let TwoOperandAliasConstraint = "$Vm = $Vd" in
+defm VCEQz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00010, 0, "vceq", "i",
+ "$Vd, $Vm, #0", NEONvceqz>;
+
+// VCGE : Vector Compare Greater Than or Equal
+defm VCGEs : N3V_QHS<0, 0, 0b0011, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
+ IIC_VSUBi4Q, "vcge", "s", NEONvcge, 0>;
+defm VCGEu : N3V_QHS<1, 0, 0b0011, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
+ IIC_VSUBi4Q, "vcge", "u", NEONvcgeu, 0>;
+def VCGEfd : N3VD<1,0,0b00,0b1110,0, IIC_VBIND, "vcge", "f32", v2i32, v2f32,
+ NEONvcge, 0>;
+def VCGEfq : N3VQ<1,0,0b00,0b1110,0, IIC_VBINQ, "vcge", "f32", v4i32, v4f32,
+ NEONvcge, 0>;
+def VCGEhd : N3VD<1,0,0b01,0b1110,0, IIC_VBIND, "vcge", "f16", v4i16, v4f16,
+ NEONvcge, 0>,
+ Requires<[HasNEON, HasFullFP16]>;
+def VCGEhq : N3VQ<1,0,0b01,0b1110,0, IIC_VBINQ, "vcge", "f16", v8i16, v8f16,
+ NEONvcge, 0>,
+ Requires<[HasNEON, HasFullFP16]>;
+
+let TwoOperandAliasConstraint = "$Vm = $Vd" in {
+defm VCGEz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00001, 0, "vcge", "s",
+ "$Vd, $Vm, #0", NEONvcgez>;
+defm VCLEz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00011, 0, "vcle", "s",
+ "$Vd, $Vm, #0", NEONvclez>;
+}
+
+// VCGT : Vector Compare Greater Than
+defm VCGTs : N3V_QHS<0, 0, 0b0011, 0, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
+ IIC_VSUBi4Q, "vcgt", "s", NEONvcgt, 0>;
+defm VCGTu : N3V_QHS<1, 0, 0b0011, 0, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
+ IIC_VSUBi4Q, "vcgt", "u", NEONvcgtu, 0>;
+def VCGTfd : N3VD<1,0,0b10,0b1110,0, IIC_VBIND, "vcgt", "f32", v2i32, v2f32,
+ NEONvcgt, 0>;
+def VCGTfq : N3VQ<1,0,0b10,0b1110,0, IIC_VBINQ, "vcgt", "f32", v4i32, v4f32,
+ NEONvcgt, 0>;
+def VCGThd : N3VD<1,0,0b11,0b1110,0, IIC_VBIND, "vcgt", "f16", v4i16, v4f16,
+ NEONvcgt, 0>,
+ Requires<[HasNEON, HasFullFP16]>;
+def VCGThq : N3VQ<1,0,0b11,0b1110,0, IIC_VBINQ, "vcgt", "f16", v8i16, v8f16,
+ NEONvcgt, 0>,
+ Requires<[HasNEON, HasFullFP16]>;
+
+let TwoOperandAliasConstraint = "$Vm = $Vd" in {
+defm VCGTz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00000, 0, "vcgt", "s",
+ "$Vd, $Vm, #0", NEONvcgtz>;
+defm VCLTz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00100, 0, "vclt", "s",
+ "$Vd, $Vm, #0", NEONvcltz>;
+}
+
+// VACGE : Vector Absolute Compare Greater Than or Equal (aka VCAGE)
+def VACGEfd : N3VDInt<1, 0, 0b00, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacge",
+ "f32", v2i32, v2f32, int_arm_neon_vacge, 0>;
+def VACGEfq : N3VQInt<1, 0, 0b00, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacge",
+ "f32", v4i32, v4f32, int_arm_neon_vacge, 0>;
+def VACGEhd : N3VDInt<1, 0, 0b01, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacge",
+ "f16", v4i16, v4f16, int_arm_neon_vacge, 0>,
+ Requires<[HasNEON, HasFullFP16]>;
+def VACGEhq : N3VQInt<1, 0, 0b01, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacge",
+ "f16", v8i16, v8f16, int_arm_neon_vacge, 0>,
+ Requires<[HasNEON, HasFullFP16]>;
+// VACGT : Vector Absolute Compare Greater Than (aka VCAGT)
+def VACGTfd : N3VDInt<1, 0, 0b10, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacgt",
+ "f32", v2i32, v2f32, int_arm_neon_vacgt, 0>;
+def VACGTfq : N3VQInt<1, 0, 0b10, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacgt",
+ "f32", v4i32, v4f32, int_arm_neon_vacgt, 0>;
+def VACGThd : N3VDInt<1, 0, 0b11, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacgt",
+ "f16", v4i16, v4f16, int_arm_neon_vacgt, 0>,
+ Requires<[HasNEON, HasFullFP16]>;
+def VACGThq : N3VQInt<1, 0, 0b11, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacgt",
+ "f16", v8f16, v8f16, int_arm_neon_vacgt, 0>,
+ Requires<[HasNEON, HasFullFP16]>;
+// VTST : Vector Test Bits
+defm VTST : N3V_QHS<0, 0, 0b1000, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q,
+ IIC_VBINi4Q, "vtst", "", NEONvtst, 1>;
+
+def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vn, $Vm",
+ (VACGTfd DPR:$Vd, DPR:$Vm, DPR:$Vn, pred:$p)>;
+def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vn, $Vm",
+ (VACGTfq QPR:$Vd, QPR:$Vm, QPR:$Vn, pred:$p)>;
+def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vn, $Vm",
+ (VACGEfd DPR:$Vd, DPR:$Vm, DPR:$Vn, pred:$p)>;
+def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vn, $Vm",
+ (VACGEfq QPR:$Vd, QPR:$Vm, QPR:$Vn, pred:$p)>;
+let Predicates = [HasNEON, HasFullFP16] in {
+def: NEONInstAlias<"vaclt${p}.f16 $Vd, $Vn, $Vm",
+ (VACGThd DPR:$Vd, DPR:$Vm, DPR:$Vn, pred:$p)>;
+def: NEONInstAlias<"vaclt${p}.f16 $Vd, $Vn, $Vm",
+ (VACGThq QPR:$Vd, QPR:$Vm, QPR:$Vn, pred:$p)>;
+def: NEONInstAlias<"vacle${p}.f16 $Vd, $Vn, $Vm",
+ (VACGEhd DPR:$Vd, DPR:$Vm, DPR:$Vn, pred:$p)>;
+def: NEONInstAlias<"vacle${p}.f16 $Vd, $Vn, $Vm",
+ (VACGEhq QPR:$Vd, QPR:$Vm, QPR:$Vn, pred:$p)>;
+}
+
+def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vm",
+ (VACGTfd DPR:$Vd, DPR:$Vm, DPR:$Vd, pred:$p)>;
+def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vm",
+ (VACGTfq QPR:$Vd, QPR:$Vm, QPR:$Vd, pred:$p)>;
+def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vm",
+ (VACGEfd DPR:$Vd, DPR:$Vm, DPR:$Vd, pred:$p)>;
+def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vm",
+ (VACGEfq QPR:$Vd, QPR:$Vm, QPR:$Vd, pred:$p)>;
+let Predicates = [HasNEON, HasFullFP16] in {
+def: NEONInstAlias<"vaclt${p}.f16 $Vd, $Vm",
+ (VACGThd DPR:$Vd, DPR:$Vm, DPR:$Vd, pred:$p)>;
+def: NEONInstAlias<"vaclt${p}.f16 $Vd, $Vm",
+ (VACGThq QPR:$Vd, QPR:$Vm, QPR:$Vd, pred:$p)>;
+def: NEONInstAlias<"vacle${p}.f16 $Vd, $Vm",
+ (VACGEhd DPR:$Vd, DPR:$Vm, DPR:$Vd, pred:$p)>;
+def: NEONInstAlias<"vacle${p}.f16 $Vd, $Vm",
+ (VACGEhq QPR:$Vd, QPR:$Vm, QPR:$Vd, pred:$p)>;
+}
+
+// Vector Bitwise Operations.
+
+def vnotd : PatFrag<(ops node:$in),
+ (xor node:$in, (bitconvert (v8i8 NEONimmAllOnesV)))>;
+def vnotq : PatFrag<(ops node:$in),
+ (xor node:$in, (bitconvert (v16i8 NEONimmAllOnesV)))>;
+
+
+// VAND : Vector Bitwise AND
+def VANDd : N3VDX<0, 0, 0b00, 0b0001, 1, IIC_VBINiD, "vand",
+ v2i32, v2i32, and, 1>;
+def VANDq : N3VQX<0, 0, 0b00, 0b0001, 1, IIC_VBINiQ, "vand",
+ v4i32, v4i32, and, 1>;
+
+// VEOR : Vector Bitwise Exclusive OR
+def VEORd : N3VDX<1, 0, 0b00, 0b0001, 1, IIC_VBINiD, "veor",
+ v2i32, v2i32, xor, 1>;
+def VEORq : N3VQX<1, 0, 0b00, 0b0001, 1, IIC_VBINiQ, "veor",
+ v4i32, v4i32, xor, 1>;
+
+// VORR : Vector Bitwise OR
+def VORRd : N3VDX<0, 0, 0b10, 0b0001, 1, IIC_VBINiD, "vorr",
+ v2i32, v2i32, or, 1>;
+def VORRq : N3VQX<0, 0, 0b10, 0b0001, 1, IIC_VBINiQ, "vorr",
+ v4i32, v4i32, or, 1>;
+
+def VORRiv4i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 0, 0, 1,
+ (outs DPR:$Vd), (ins nImmSplatI16:$SIMM, DPR:$src),
+ IIC_VMOVImm,
+ "vorr", "i16", "$Vd, $SIMM", "$src = $Vd",
+ [(set DPR:$Vd,
+ (v4i16 (NEONvorrImm DPR:$src, timm:$SIMM)))]> {
+ let Inst{9} = SIMM{9};
+}
+
+def VORRiv2i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 0, 0, 1,
+ (outs DPR:$Vd), (ins nImmSplatI32:$SIMM, DPR:$src),
+ IIC_VMOVImm,
+ "vorr", "i32", "$Vd, $SIMM", "$src = $Vd",
+ [(set DPR:$Vd,
+ (v2i32 (NEONvorrImm DPR:$src, timm:$SIMM)))]> {
+ let Inst{10-9} = SIMM{10-9};
+}
+
+def VORRiv8i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 1, 0, 1,
+ (outs QPR:$Vd), (ins nImmSplatI16:$SIMM, QPR:$src),
+ IIC_VMOVImm,
+ "vorr", "i16", "$Vd, $SIMM", "$src = $Vd",
+ [(set QPR:$Vd,
+ (v8i16 (NEONvorrImm QPR:$src, timm:$SIMM)))]> {
+ let Inst{9} = SIMM{9};
+}
+
+def VORRiv4i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 1, 0, 1,
+ (outs QPR:$Vd), (ins nImmSplatI32:$SIMM, QPR:$src),
+ IIC_VMOVImm,
+ "vorr", "i32", "$Vd, $SIMM", "$src = $Vd",
+ [(set QPR:$Vd,
+ (v4i32 (NEONvorrImm QPR:$src, timm:$SIMM)))]> {
+ let Inst{10-9} = SIMM{10-9};
+}
+
+
+// VBIC : Vector Bitwise Bit Clear (AND NOT)
+let TwoOperandAliasConstraint = "$Vn = $Vd" in {
+def VBICd : N3VX<0, 0, 0b01, 0b0001, 0, 1, (outs DPR:$Vd),
+ (ins DPR:$Vn, DPR:$Vm), N3RegFrm, IIC_VBINiD,
+ "vbic", "$Vd, $Vn, $Vm", "",
+ [(set DPR:$Vd, (v2i32 (and DPR:$Vn,
+ (vnotd DPR:$Vm))))]>;
+def VBICq : N3VX<0, 0, 0b01, 0b0001, 1, 1, (outs QPR:$Vd),
+ (ins QPR:$Vn, QPR:$Vm), N3RegFrm, IIC_VBINiQ,
+ "vbic", "$Vd, $Vn, $Vm", "",
+ [(set QPR:$Vd, (v4i32 (and QPR:$Vn,
+ (vnotq QPR:$Vm))))]>;
+}
+
+def VBICiv4i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 0, 1, 1,
+ (outs DPR:$Vd), (ins nImmSplatI16:$SIMM, DPR:$src),
+ IIC_VMOVImm,
+ "vbic", "i16", "$Vd, $SIMM", "$src = $Vd",
+ [(set DPR:$Vd,
+ (v4i16 (NEONvbicImm DPR:$src, timm:$SIMM)))]> {
+ let Inst{9} = SIMM{9};
+}
+
+def VBICiv2i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 0, 1, 1,
+ (outs DPR:$Vd), (ins nImmSplatI32:$SIMM, DPR:$src),
+ IIC_VMOVImm,
+ "vbic", "i32", "$Vd, $SIMM", "$src = $Vd",
+ [(set DPR:$Vd,
+ (v2i32 (NEONvbicImm DPR:$src, timm:$SIMM)))]> {
+ let Inst{10-9} = SIMM{10-9};
+}
+
+def VBICiv8i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 1, 1, 1,
+ (outs QPR:$Vd), (ins nImmSplatI16:$SIMM, QPR:$src),
+ IIC_VMOVImm,
+ "vbic", "i16", "$Vd, $SIMM", "$src = $Vd",
+ [(set QPR:$Vd,
+ (v8i16 (NEONvbicImm QPR:$src, timm:$SIMM)))]> {
+ let Inst{9} = SIMM{9};
+}
+
+def VBICiv4i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 1, 1, 1,
+ (outs QPR:$Vd), (ins nImmSplatI32:$SIMM, QPR:$src),
+ IIC_VMOVImm,
+ "vbic", "i32", "$Vd, $SIMM", "$src = $Vd",
+ [(set QPR:$Vd,
+ (v4i32 (NEONvbicImm QPR:$src, timm:$SIMM)))]> {
+ let Inst{10-9} = SIMM{10-9};
+}
+
+// VORN : Vector Bitwise OR NOT
+def VORNd : N3VX<0, 0, 0b11, 0b0001, 0, 1, (outs DPR:$Vd),
+ (ins DPR:$Vn, DPR:$Vm), N3RegFrm, IIC_VBINiD,
+ "vorn", "$Vd, $Vn, $Vm", "",
+ [(set DPR:$Vd, (v2i32 (or DPR:$Vn,
+ (vnotd DPR:$Vm))))]>;
+def VORNq : N3VX<0, 0, 0b11, 0b0001, 1, 1, (outs QPR:$Vd),
+ (ins QPR:$Vn, QPR:$Vm), N3RegFrm, IIC_VBINiQ,
+ "vorn", "$Vd, $Vn, $Vm", "",
+ [(set QPR:$Vd, (v4i32 (or QPR:$Vn,
+ (vnotq QPR:$Vm))))]>;
+
+// VMVN : Vector Bitwise NOT (Immediate)
+
+let isReMaterializable = 1 in {
+
+def VMVNv4i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 0, 1, 1, (outs DPR:$Vd),
+ (ins nImmSplatI16:$SIMM), IIC_VMOVImm,
+ "vmvn", "i16", "$Vd, $SIMM", "",
+ [(set DPR:$Vd, (v4i16 (NEONvmvnImm timm:$SIMM)))]> {
+ let Inst{9} = SIMM{9};
+}
+
+def VMVNv8i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 1, 1, 1, (outs QPR:$Vd),
+ (ins nImmSplatI16:$SIMM), IIC_VMOVImm,
+ "vmvn", "i16", "$Vd, $SIMM", "",
+ [(set QPR:$Vd, (v8i16 (NEONvmvnImm timm:$SIMM)))]> {
+ let Inst{9} = SIMM{9};
+}
+
+def VMVNv2i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 0, 1, 1, (outs DPR:$Vd),
+ (ins nImmVMOVI32:$SIMM), IIC_VMOVImm,
+ "vmvn", "i32", "$Vd, $SIMM", "",
+ [(set DPR:$Vd, (v2i32 (NEONvmvnImm timm:$SIMM)))]> {
+ let Inst{11-8} = SIMM{11-8};
+}
+
+def VMVNv4i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 1, 1, 1, (outs QPR:$Vd),
+ (ins nImmVMOVI32:$SIMM), IIC_VMOVImm,
+ "vmvn", "i32", "$Vd, $SIMM", "",
+ [(set QPR:$Vd, (v4i32 (NEONvmvnImm timm:$SIMM)))]> {
+ let Inst{11-8} = SIMM{11-8};
+}
+}
+
+// VMVN : Vector Bitwise NOT
+def VMVNd : N2VX<0b11, 0b11, 0b00, 0b00, 0b01011, 0, 0,
+ (outs DPR:$Vd), (ins DPR:$Vm), IIC_VSUBiD,
+ "vmvn", "$Vd, $Vm", "",
+ [(set DPR:$Vd, (v2i32 (vnotd DPR:$Vm)))]>;
+def VMVNq : N2VX<0b11, 0b11, 0b00, 0b00, 0b01011, 1, 0,
+ (outs QPR:$Vd), (ins QPR:$Vm), IIC_VSUBiD,
+ "vmvn", "$Vd, $Vm", "",
+ [(set QPR:$Vd, (v4i32 (vnotq QPR:$Vm)))]>;
+def : Pat<(v2i32 (vnotd DPR:$src)), (VMVNd DPR:$src)>;
+def : Pat<(v4i32 (vnotq QPR:$src)), (VMVNq QPR:$src)>;
+
+// VBSL : Vector Bitwise Select
+def VBSLd : N3VX<1, 0, 0b01, 0b0001, 0, 1, (outs DPR:$Vd),
+ (ins DPR:$src1, DPR:$Vn, DPR:$Vm),
+ N3RegFrm, IIC_VCNTiD,
+ "vbsl", "$Vd, $Vn, $Vm", "$src1 = $Vd",
+ [(set DPR:$Vd,
+ (v2i32 (NEONvbsl DPR:$src1, DPR:$Vn, DPR:$Vm)))]>;
+def : Pat<(v8i8 (int_arm_neon_vbsl (v8i8 DPR:$src1),
+ (v8i8 DPR:$Vn), (v8i8 DPR:$Vm))),
+ (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>,
+ Requires<[HasNEON]>;
+def : Pat<(v4i16 (int_arm_neon_vbsl (v4i16 DPR:$src1),
+ (v4i16 DPR:$Vn), (v4i16 DPR:$Vm))),
+ (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>,
+ Requires<[HasNEON]>;
+def : Pat<(v2i32 (int_arm_neon_vbsl (v2i32 DPR:$src1),
+ (v2i32 DPR:$Vn), (v2i32 DPR:$Vm))),
+ (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>,
+ Requires<[HasNEON]>;
+def : Pat<(v2f32 (int_arm_neon_vbsl (v2f32 DPR:$src1),
+ (v2f32 DPR:$Vn), (v2f32 DPR:$Vm))),
+ (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>,
+ Requires<[HasNEON]>;
+def : Pat<(v1i64 (int_arm_neon_vbsl (v1i64 DPR:$src1),
+ (v1i64 DPR:$Vn), (v1i64 DPR:$Vm))),
+ (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>,
+ Requires<[HasNEON]>;
+
+def : Pat<(v2i32 (or (and DPR:$Vn, DPR:$Vd),
+ (and DPR:$Vm, (vnotd DPR:$Vd)))),
+ (VBSLd DPR:$Vd, DPR:$Vn, DPR:$Vm)>,
+ Requires<[HasNEON]>;
+
+def : Pat<(v1i64 (or (and DPR:$Vn, DPR:$Vd),
+ (and DPR:$Vm, (vnotd DPR:$Vd)))),
+ (VBSLd DPR:$Vd, DPR:$Vn, DPR:$Vm)>,
+ Requires<[HasNEON]>;
+
+def VBSLq : N3VX<1, 0, 0b01, 0b0001, 1, 1, (outs QPR:$Vd),
+ (ins QPR:$src1, QPR:$Vn, QPR:$Vm),
+ N3RegFrm, IIC_VCNTiQ,
+ "vbsl", "$Vd, $Vn, $Vm", "$src1 = $Vd",
+ [(set QPR:$Vd,
+ (v4i32 (NEONvbsl QPR:$src1, QPR:$Vn, QPR:$Vm)))]>;
+
+def : Pat<(v16i8 (int_arm_neon_vbsl (v16i8 QPR:$src1),
+ (v16i8 QPR:$Vn), (v16i8 QPR:$Vm))),
+ (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>,
+ Requires<[HasNEON]>;
+def : Pat<(v8i16 (int_arm_neon_vbsl (v8i16 QPR:$src1),
+ (v8i16 QPR:$Vn), (v8i16 QPR:$Vm))),
+ (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>,
+ Requires<[HasNEON]>;
+def : Pat<(v4i32 (int_arm_neon_vbsl (v4i32 QPR:$src1),
+ (v4i32 QPR:$Vn), (v4i32 QPR:$Vm))),
+ (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>,
+ Requires<[HasNEON]>;
+def : Pat<(v4f32 (int_arm_neon_vbsl (v4f32 QPR:$src1),
+ (v4f32 QPR:$Vn), (v4f32 QPR:$Vm))),
+ (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>,
+ Requires<[HasNEON]>;
+def : Pat<(v2i64 (int_arm_neon_vbsl (v2i64 QPR:$src1),
+ (v2i64 QPR:$Vn), (v2i64 QPR:$Vm))),
+ (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>,
+ Requires<[HasNEON]>;
+
+def : Pat<(v4i32 (or (and QPR:$Vn, QPR:$Vd),
+ (and QPR:$Vm, (vnotq QPR:$Vd)))),
+ (VBSLq QPR:$Vd, QPR:$Vn, QPR:$Vm)>,
+ Requires<[HasNEON]>;
+def : Pat<(v2i64 (or (and QPR:$Vn, QPR:$Vd),
+ (and QPR:$Vm, (vnotq QPR:$Vd)))),
+ (VBSLq QPR:$Vd, QPR:$Vn, QPR:$Vm)>,
+ Requires<[HasNEON]>;
+
+// VBIF : Vector Bitwise Insert if False
+// like VBSL but with: "vbif $dst, $src3, $src1", "$src2 = $dst",
+// FIXME: This instruction's encoding MAY NOT BE correct.
+def VBIFd : N3VX<1, 0, 0b11, 0b0001, 0, 1,
+ (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm),
+ N3RegFrm, IIC_VBINiD,
+ "vbif", "$Vd, $Vn, $Vm", "$src1 = $Vd",
+ []>;
+def VBIFq : N3VX<1, 0, 0b11, 0b0001, 1, 1,
+ (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm),
+ N3RegFrm, IIC_VBINiQ,
+ "vbif", "$Vd, $Vn, $Vm", "$src1 = $Vd",
+ []>;
+
+// VBIT : Vector Bitwise Insert if True
+// like VBSL but with: "vbit $dst, $src2, $src1", "$src3 = $dst",
+// FIXME: This instruction's encoding MAY NOT BE correct.
+def VBITd : N3VX<1, 0, 0b10, 0b0001, 0, 1,
+ (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm),
+ N3RegFrm, IIC_VBINiD,
+ "vbit", "$Vd, $Vn, $Vm", "$src1 = $Vd",
+ []>;
+def VBITq : N3VX<1, 0, 0b10, 0b0001, 1, 1,
+ (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm),
+ N3RegFrm, IIC_VBINiQ,
+ "vbit", "$Vd, $Vn, $Vm", "$src1 = $Vd",
+ []>;
+
+// VBIT/VBIF are not yet implemented. The TwoAddress pass will not go looking
+// for equivalent operations with different register constraints; it just
+// inserts copies.
+
+// Vector Absolute Differences.
+
+// VABD : Vector Absolute Difference
+defm VABDs : N3VInt_QHS<0, 0, 0b0111, 0, N3RegFrm,
+ IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
+ "vabd", "s", int_arm_neon_vabds, 1>;
+defm VABDu : N3VInt_QHS<1, 0, 0b0111, 0, N3RegFrm,
+ IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
+ "vabd", "u", int_arm_neon_vabdu, 1>;
+def VABDfd : N3VDInt<1, 0, 0b10, 0b1101, 0, N3RegFrm, IIC_VBIND,
+ "vabd", "f32", v2f32, v2f32, int_arm_neon_vabds, 1>;
+def VABDfq : N3VQInt<1, 0, 0b10, 0b1101, 0, N3RegFrm, IIC_VBINQ,
+ "vabd", "f32", v4f32, v4f32, int_arm_neon_vabds, 1>;
+def VABDhd : N3VDInt<1, 0, 0b11, 0b1101, 0, N3RegFrm, IIC_VBIND,
+ "vabd", "f16", v4f16, v4f16, int_arm_neon_vabds, 1>,
+ Requires<[HasNEON, HasFullFP16]>;
+def VABDhq : N3VQInt<1, 0, 0b11, 0b1101, 0, N3RegFrm, IIC_VBINQ,
+ "vabd", "f16", v8f16, v8f16, int_arm_neon_vabds, 1>,
+ Requires<[HasNEON, HasFullFP16]>;
+
+// VABDL : Vector Absolute Difference Long (Q = | D - D |)
+defm VABDLs : N3VLIntExt_QHS<0,1,0b0111,0, IIC_VSUBi4Q,
+ "vabdl", "s", int_arm_neon_vabds, zext, 1>;
+defm VABDLu : N3VLIntExt_QHS<1,1,0b0111,0, IIC_VSUBi4Q,
+ "vabdl", "u", int_arm_neon_vabdu, zext, 1>;
+
+def abd_shr :
+ PatFrag<(ops node:$in1, node:$in2, node:$shift),
+ (NEONvshrs (sub (zext node:$in1),
+ (zext node:$in2)), (i32 $shift))>;
+
+def : Pat<(xor (v4i32 (bitconvert (v8i16 (abd_shr (v8i8 DPR:$opA), (v8i8 DPR:$opB), 15)))),
+ (v4i32 (bitconvert (v8i16 (add (sub (zext (v8i8 DPR:$opA)),
+ (zext (v8i8 DPR:$opB))),
+ (v8i16 (abd_shr (v8i8 DPR:$opA), (v8i8 DPR:$opB), 15))))))),
+ (VABDLuv8i16 DPR:$opA, DPR:$opB)>;
+
+def : Pat<(xor (v4i32 (abd_shr (v4i16 DPR:$opA), (v4i16 DPR:$opB), 31)),
+ (v4i32 (add (sub (zext (v4i16 DPR:$opA)),
+ (zext (v4i16 DPR:$opB))),
+ (abd_shr (v4i16 DPR:$opA), (v4i16 DPR:$opB), 31)))),
+ (VABDLuv4i32 DPR:$opA, DPR:$opB)>;
+
+def : Pat<(xor (v4i32 (bitconvert (v2i64 (abd_shr (v2i32 DPR:$opA), (v2i32 DPR:$opB), 63)))),
+ (v4i32 (bitconvert (v2i64 (add (sub (zext (v2i32 DPR:$opA)),
+ (zext (v2i32 DPR:$opB))),
+ (abd_shr (v2i32 DPR:$opA), (v2i32 DPR:$opB), 63)))))),
+ (VABDLuv2i64 DPR:$opA, DPR:$opB)>;
+
+// VABA : Vector Absolute Difference and Accumulate
+defm VABAs : N3VIntOp_QHS<0,0,0b0111,1, IIC_VABAD, IIC_VABAQ,
+ "vaba", "s", int_arm_neon_vabds, add>;
+defm VABAu : N3VIntOp_QHS<1,0,0b0111,1, IIC_VABAD, IIC_VABAQ,
+ "vaba", "u", int_arm_neon_vabdu, add>;
+
+// VABAL : Vector Absolute Difference and Accumulate Long (Q += | D - D |)
+defm VABALs : N3VLIntExtOp_QHS<0,1,0b0101,0, IIC_VABAD,
+ "vabal", "s", int_arm_neon_vabds, zext, add>;
+defm VABALu : N3VLIntExtOp_QHS<1,1,0b0101,0, IIC_VABAD,
+ "vabal", "u", int_arm_neon_vabdu, zext, add>;
+
+// Vector Maximum and Minimum.
+
+// VMAX : Vector Maximum
+defm VMAXs : N3VInt_QHS<0, 0, 0b0110, 0, N3RegFrm,
+ IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
+ "vmax", "s", smax, 1>;
+defm VMAXu : N3VInt_QHS<1, 0, 0b0110, 0, N3RegFrm,
+ IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
+ "vmax", "u", umax, 1>;
+def VMAXfd : N3VDInt<0, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VBIND,
+ "vmax", "f32",
+ v2f32, v2f32, fmaxnan, 1>;
+def VMAXfq : N3VQInt<0, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VBINQ,
+ "vmax", "f32",
+ v4f32, v4f32, fmaxnan, 1>;
+def VMAXhd : N3VDInt<0, 0, 0b01, 0b1111, 0, N3RegFrm, IIC_VBIND,
+ "vmax", "f16",
+ v4f16, v4f16, fmaxnan, 1>,
+ Requires<[HasNEON, HasFullFP16]>;
+def VMAXhq : N3VQInt<0, 0, 0b01, 0b1111, 0, N3RegFrm, IIC_VBINQ,
+ "vmax", "f16",
+ v8f16, v8f16, fmaxnan, 1>,
+ Requires<[HasNEON, HasFullFP16]>;
+
+// VMAXNM
+let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in {
+ def VMAXNMNDf : N3VDIntnp<0b00110, 0b00, 0b1111, 0, 1,
+ N3RegFrm, NoItinerary, "vmaxnm", "f32",
+ v2f32, v2f32, fmaxnum, 1>,
+ Requires<[HasV8, HasNEON]>;
+ def VMAXNMNQf : N3VQIntnp<0b00110, 0b00, 0b1111, 1, 1,
+ N3RegFrm, NoItinerary, "vmaxnm", "f32",
+ v4f32, v4f32, fmaxnum, 1>,
+ Requires<[HasV8, HasNEON]>;
+ def VMAXNMNDh : N3VDIntnp<0b00110, 0b01, 0b1111, 0, 1,
+ N3RegFrm, NoItinerary, "vmaxnm", "f16",
+ v4f16, v4f16, fmaxnum, 1>,
+ Requires<[HasV8, HasNEON, HasFullFP16]>;
+ def VMAXNMNQh : N3VQIntnp<0b00110, 0b01, 0b1111, 1, 1,
+ N3RegFrm, NoItinerary, "vmaxnm", "f16",
+ v8f16, v8f16, fmaxnum, 1>,
+ Requires<[HasV8, HasNEON, HasFullFP16]>;
+}
+
+// VMIN : Vector Minimum
+defm VMINs : N3VInt_QHS<0, 0, 0b0110, 1, N3RegFrm,
+ IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
+ "vmin", "s", smin, 1>;
+defm VMINu : N3VInt_QHS<1, 0, 0b0110, 1, N3RegFrm,
+ IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
+ "vmin", "u", umin, 1>;
+def VMINfd : N3VDInt<0, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VBIND,
+ "vmin", "f32",
+ v2f32, v2f32, fminnan, 1>;
+def VMINfq : N3VQInt<0, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VBINQ,
+ "vmin", "f32",
+ v4f32, v4f32, fminnan, 1>;
+def VMINhd : N3VDInt<0, 0, 0b11, 0b1111, 0, N3RegFrm, IIC_VBIND,
+ "vmin", "f16",
+ v4f16, v4f16, fminnan, 1>,
+ Requires<[HasNEON, HasFullFP16]>;
+def VMINhq : N3VQInt<0, 0, 0b11, 0b1111, 0, N3RegFrm, IIC_VBINQ,
+ "vmin", "f16",
+ v8f16, v8f16, fminnan, 1>,
+ Requires<[HasNEON, HasFullFP16]>;
+
+// VMINNM
+let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in {
+ def VMINNMNDf : N3VDIntnp<0b00110, 0b10, 0b1111, 0, 1,
+ N3RegFrm, NoItinerary, "vminnm", "f32",
+ v2f32, v2f32, fminnum, 1>,
+ Requires<[HasV8, HasNEON]>;
+ def VMINNMNQf : N3VQIntnp<0b00110, 0b10, 0b1111, 1, 1,
+ N3RegFrm, NoItinerary, "vminnm", "f32",
+ v4f32, v4f32, fminnum, 1>,
+ Requires<[HasV8, HasNEON]>;
+ def VMINNMNDh : N3VDIntnp<0b00110, 0b11, 0b1111, 0, 1,
+ N3RegFrm, NoItinerary, "vminnm", "f16",
+ v4f16, v4f16, fminnum, 1>,
+ Requires<[HasV8, HasNEON, HasFullFP16]>;
+ def VMINNMNQh : N3VQIntnp<0b00110, 0b11, 0b1111, 1, 1,
+ N3RegFrm, NoItinerary, "vminnm", "f16",
+ v8f16, v8f16, fminnum, 1>,
+ Requires<[HasV8, HasNEON, HasFullFP16]>;
+}
+
+// Vector Pairwise Operations.
+
+// VPADD : Vector Pairwise Add
+def VPADDi8 : N3VDInt<0, 0, 0b00, 0b1011, 1, N3RegFrm, IIC_VSHLiD,
+ "vpadd", "i8",
+ v8i8, v8i8, int_arm_neon_vpadd, 0>;
+def VPADDi16 : N3VDInt<0, 0, 0b01, 0b1011, 1, N3RegFrm, IIC_VSHLiD,
+ "vpadd", "i16",
+ v4i16, v4i16, int_arm_neon_vpadd, 0>;
+def VPADDi32 : N3VDInt<0, 0, 0b10, 0b1011, 1, N3RegFrm, IIC_VSHLiD,
+ "vpadd", "i32",
+ v2i32, v2i32, int_arm_neon_vpadd, 0>;
+def VPADDf : N3VDInt<1, 0, 0b00, 0b1101, 0, N3RegFrm,
+ IIC_VPBIND, "vpadd", "f32",
+ v2f32, v2f32, int_arm_neon_vpadd, 0>;
+def VPADDh : N3VDInt<1, 0, 0b01, 0b1101, 0, N3RegFrm,
+ IIC_VPBIND, "vpadd", "f16",
+ v4f16, v4f16, int_arm_neon_vpadd, 0>,
+ Requires<[HasNEON, HasFullFP16]>;
+
+// VPADDL : Vector Pairwise Add Long
+defm VPADDLs : N2VPLInt_QHS<0b11, 0b11, 0b00, 0b00100, 0, "vpaddl", "s",
+ int_arm_neon_vpaddls>;
+defm VPADDLu : N2VPLInt_QHS<0b11, 0b11, 0b00, 0b00101, 0, "vpaddl", "u",
+ int_arm_neon_vpaddlu>;
+
+// VPADAL : Vector Pairwise Add and Accumulate Long
+defm VPADALs : N2VPLInt2_QHS<0b11, 0b11, 0b00, 0b01100, 0, "vpadal", "s",
+ int_arm_neon_vpadals>;
+defm VPADALu : N2VPLInt2_QHS<0b11, 0b11, 0b00, 0b01101, 0, "vpadal", "u",
+ int_arm_neon_vpadalu>;
+
+// VPMAX : Vector Pairwise Maximum
+def VPMAXs8 : N3VDInt<0, 0, 0b00, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
+ "s8", v8i8, v8i8, int_arm_neon_vpmaxs, 0>;
+def VPMAXs16 : N3VDInt<0, 0, 0b01, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
+ "s16", v4i16, v4i16, int_arm_neon_vpmaxs, 0>;
+def VPMAXs32 : N3VDInt<0, 0, 0b10, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
+ "s32", v2i32, v2i32, int_arm_neon_vpmaxs, 0>;
+def VPMAXu8 : N3VDInt<1, 0, 0b00, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
+ "u8", v8i8, v8i8, int_arm_neon_vpmaxu, 0>;
+def VPMAXu16 : N3VDInt<1, 0, 0b01, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
+ "u16", v4i16, v4i16, int_arm_neon_vpmaxu, 0>;
+def VPMAXu32 : N3VDInt<1, 0, 0b10, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
+ "u32", v2i32, v2i32, int_arm_neon_vpmaxu, 0>;
+def VPMAXf : N3VDInt<1, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VPBIND, "vpmax",
+ "f32", v2f32, v2f32, int_arm_neon_vpmaxs, 0>;
+def VPMAXh : N3VDInt<1, 0, 0b01, 0b1111, 0, N3RegFrm, IIC_VPBIND, "vpmax",
+ "f16", v4f16, v4f16, int_arm_neon_vpmaxs, 0>,
+ Requires<[HasNEON, HasFullFP16]>;
+
+// VPMIN : Vector Pairwise Minimum
+def VPMINs8 : N3VDInt<0, 0, 0b00, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
+ "s8", v8i8, v8i8, int_arm_neon_vpmins, 0>;
+def VPMINs16 : N3VDInt<0, 0, 0b01, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
+ "s16", v4i16, v4i16, int_arm_neon_vpmins, 0>;
+def VPMINs32 : N3VDInt<0, 0, 0b10, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
+ "s32", v2i32, v2i32, int_arm_neon_vpmins, 0>;
+def VPMINu8 : N3VDInt<1, 0, 0b00, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
+ "u8", v8i8, v8i8, int_arm_neon_vpminu, 0>;
+def VPMINu16 : N3VDInt<1, 0, 0b01, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
+ "u16", v4i16, v4i16, int_arm_neon_vpminu, 0>;
+def VPMINu32 : N3VDInt<1, 0, 0b10, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
+ "u32", v2i32, v2i32, int_arm_neon_vpminu, 0>;
+def VPMINf : N3VDInt<1, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VPBIND, "vpmin",
+ "f32", v2f32, v2f32, int_arm_neon_vpmins, 0>;
+def VPMINh : N3VDInt<1, 0, 0b11, 0b1111, 0, N3RegFrm, IIC_VPBIND, "vpmin",
+ "f16", v4f16, v4f16, int_arm_neon_vpmins, 0>,
+ Requires<[HasNEON, HasFullFP16]>;
+
+// Vector Reciprocal and Reciprocal Square Root Estimate and Step.
+
+// VRECPE : Vector Reciprocal Estimate
+def VRECPEd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01000, 0,
+ IIC_VUNAD, "vrecpe", "u32",
+ v2i32, v2i32, int_arm_neon_vrecpe>;
+def VRECPEq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01000, 0,
+ IIC_VUNAQ, "vrecpe", "u32",
+ v4i32, v4i32, int_arm_neon_vrecpe>;
+def VRECPEfd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01010, 0,
+ IIC_VUNAD, "vrecpe", "f32",
+ v2f32, v2f32, int_arm_neon_vrecpe>;
+def VRECPEfq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01010, 0,
+ IIC_VUNAQ, "vrecpe", "f32",
+ v4f32, v4f32, int_arm_neon_vrecpe>;
+def VRECPEhd : N2VDInt<0b11, 0b11, 0b01, 0b11, 0b01010, 0,
+ IIC_VUNAD, "vrecpe", "f16",
+ v4f16, v4f16, int_arm_neon_vrecpe>,
+ Requires<[HasNEON, HasFullFP16]>;
+def VRECPEhq : N2VQInt<0b11, 0b11, 0b01, 0b11, 0b01010, 0,
+ IIC_VUNAQ, "vrecpe", "f16",
+ v8f16, v8f16, int_arm_neon_vrecpe>,
+ Requires<[HasNEON, HasFullFP16]>;
+
+// VRECPS : Vector Reciprocal Step
+def VRECPSfd : N3VDInt<0, 0, 0b00, 0b1111, 1, N3RegFrm,
+ IIC_VRECSD, "vrecps", "f32",
+ v2f32, v2f32, int_arm_neon_vrecps, 1>;
+def VRECPSfq : N3VQInt<0, 0, 0b00, 0b1111, 1, N3RegFrm,
+ IIC_VRECSQ, "vrecps", "f32",
+ v4f32, v4f32, int_arm_neon_vrecps, 1>;
+def VRECPShd : N3VDInt<0, 0, 0b01, 0b1111, 1, N3RegFrm,
+ IIC_VRECSD, "vrecps", "f16",
+ v4f16, v4f16, int_arm_neon_vrecps, 1>,
+ Requires<[HasNEON, HasFullFP16]>;
+def VRECPShq : N3VQInt<0, 0, 0b01, 0b1111, 1, N3RegFrm,
+ IIC_VRECSQ, "vrecps", "f16",
+ v8f16, v8f16, int_arm_neon_vrecps, 1>,
+ Requires<[HasNEON, HasFullFP16]>;
+
+// VRSQRTE : Vector Reciprocal Square Root Estimate
+def VRSQRTEd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01001, 0,
+ IIC_VUNAD, "vrsqrte", "u32",
+ v2i32, v2i32, int_arm_neon_vrsqrte>;
+def VRSQRTEq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01001, 0,
+ IIC_VUNAQ, "vrsqrte", "u32",
+ v4i32, v4i32, int_arm_neon_vrsqrte>;
+def VRSQRTEfd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01011, 0,
+ IIC_VUNAD, "vrsqrte", "f32",
+ v2f32, v2f32, int_arm_neon_vrsqrte>;
+def VRSQRTEfq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01011, 0,
+ IIC_VUNAQ, "vrsqrte", "f32",
+ v4f32, v4f32, int_arm_neon_vrsqrte>;
+def VRSQRTEhd : N2VDInt<0b11, 0b11, 0b01, 0b11, 0b01011, 0,
+ IIC_VUNAD, "vrsqrte", "f16",
+ v4f16, v4f16, int_arm_neon_vrsqrte>,
+ Requires<[HasNEON, HasFullFP16]>;
+def VRSQRTEhq : N2VQInt<0b11, 0b11, 0b01, 0b11, 0b01011, 0,
+ IIC_VUNAQ, "vrsqrte", "f16",
+ v8f16, v8f16, int_arm_neon_vrsqrte>,
+ Requires<[HasNEON, HasFullFP16]>;
+
+// VRSQRTS : Vector Reciprocal Square Root Step
+def VRSQRTSfd : N3VDInt<0, 0, 0b10, 0b1111, 1, N3RegFrm,
+ IIC_VRECSD, "vrsqrts", "f32",
+ v2f32, v2f32, int_arm_neon_vrsqrts, 1>;
+def VRSQRTSfq : N3VQInt<0, 0, 0b10, 0b1111, 1, N3RegFrm,
+ IIC_VRECSQ, "vrsqrts", "f32",
+ v4f32, v4f32, int_arm_neon_vrsqrts, 1>;
+def VRSQRTShd : N3VDInt<0, 0, 0b11, 0b1111, 1, N3RegFrm,
+ IIC_VRECSD, "vrsqrts", "f16",
+ v4f16, v4f16, int_arm_neon_vrsqrts, 1>,
+ Requires<[HasNEON, HasFullFP16]>;
+def VRSQRTShq : N3VQInt<0, 0, 0b11, 0b1111, 1, N3RegFrm,
+ IIC_VRECSQ, "vrsqrts", "f16",
+ v8f16, v8f16, int_arm_neon_vrsqrts, 1>,
+ Requires<[HasNEON, HasFullFP16]>;
+
+// Vector Shifts.
+
+// VSHL : Vector Shift
+defm VSHLs : N3VInt_QHSDSh<0, 0, 0b0100, 0, N3RegVShFrm,
+ IIC_VSHLiD, IIC_VSHLiD, IIC_VSHLiQ, IIC_VSHLiQ,
+ "vshl", "s", int_arm_neon_vshifts>;
+defm VSHLu : N3VInt_QHSDSh<1, 0, 0b0100, 0, N3RegVShFrm,
+ IIC_VSHLiD, IIC_VSHLiD, IIC_VSHLiQ, IIC_VSHLiQ,
+ "vshl", "u", int_arm_neon_vshiftu>;
+
+// VSHL : Vector Shift Left (Immediate)
+defm VSHLi : N2VShL_QHSD<0, 1, 0b0101, 1, IIC_VSHLiD, "vshl", "i", NEONvshl>;
+
+// VSHR : Vector Shift Right (Immediate)
+defm VSHRs : N2VShR_QHSD<0, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "s", "VSHRs",
+ NEONvshrs>;
+defm VSHRu : N2VShR_QHSD<1, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "u", "VSHRu",
+ NEONvshru>;
+
+// VSHLL : Vector Shift Left Long
+defm VSHLLs : N2VLSh_QHS<0, 1, 0b1010, 0, 0, 1, "vshll", "s",
+ PatFrag<(ops node:$LHS, node:$RHS), (NEONvshl (sext node:$LHS), node:$RHS)>>;
+defm VSHLLu : N2VLSh_QHS<1, 1, 0b1010, 0, 0, 1, "vshll", "u",
+ PatFrag<(ops node:$LHS, node:$RHS), (NEONvshl (zext node:$LHS), node:$RHS)>>;
+
+// VSHLL : Vector Shift Left Long (with maximum shift count)
+class N2VLShMax<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7,
+ bit op6, bit op4, string OpcodeStr, string Dt, ValueType ResTy,
+ ValueType OpTy, Operand ImmTy>
+ : N2VLSh<op24, op23, op11_8, op7, op6, op4, OpcodeStr, Dt,
+ ResTy, OpTy, ImmTy, null_frag> {
+ let Inst{21-16} = op21_16;
+ let DecoderMethod = "DecodeVSHLMaxInstruction";
+}
+def VSHLLi8 : N2VLShMax<1, 1, 0b110010, 0b0011, 0, 0, 0, "vshll", "i8",
+ v8i16, v8i8, imm8>;
+def VSHLLi16 : N2VLShMax<1, 1, 0b110110, 0b0011, 0, 0, 0, "vshll", "i16",
+ v4i32, v4i16, imm16>;
+def VSHLLi32 : N2VLShMax<1, 1, 0b111010, 0b0011, 0, 0, 0, "vshll", "i32",
+ v2i64, v2i32, imm32>;
+
+def : Pat<(v8i16 (NEONvshl (zext (v8i8 DPR:$Rn)), (i32 8))),
+ (VSHLLi8 DPR:$Rn, 8)>;
+def : Pat<(v4i32 (NEONvshl (zext (v4i16 DPR:$Rn)), (i32 16))),
+ (VSHLLi16 DPR:$Rn, 16)>;
+def : Pat<(v2i64 (NEONvshl (zext (v2i32 DPR:$Rn)), (i32 32))),
+ (VSHLLi32 DPR:$Rn, 32)>;
+def : Pat<(v8i16 (NEONvshl (sext (v8i8 DPR:$Rn)), (i32 8))),
+ (VSHLLi8 DPR:$Rn, 8)>;
+def : Pat<(v4i32 (NEONvshl (sext (v4i16 DPR:$Rn)), (i32 16))),
+ (VSHLLi16 DPR:$Rn, 16)>;
+def : Pat<(v2i64 (NEONvshl (sext (v2i32 DPR:$Rn)), (i32 32))),
+ (VSHLLi32 DPR:$Rn, 32)>;
+
+// VSHRN : Vector Shift Right and Narrow
+defm VSHRN : N2VNSh_HSD<0,1,0b1000,0,0,1, IIC_VSHLiD, "vshrn", "i",
+ PatFrag<(ops node:$Rn, node:$amt),
+ (trunc (NEONvshrs node:$Rn, node:$amt))>>;
+
+def : Pat<(v8i8 (trunc (NEONvshru (v8i16 QPR:$Vn), shr_imm8:$amt))),
+ (VSHRNv8i8 QPR:$Vn, shr_imm8:$amt)>;
+def : Pat<(v4i16 (trunc (NEONvshru (v4i32 QPR:$Vn), shr_imm16:$amt))),
+ (VSHRNv4i16 QPR:$Vn, shr_imm16:$amt)>;
+def : Pat<(v2i32 (trunc (NEONvshru (v2i64 QPR:$Vn), shr_imm32:$amt))),
+ (VSHRNv2i32 QPR:$Vn, shr_imm32:$amt)>;
+
+// VRSHL : Vector Rounding Shift
+defm VRSHLs : N3VInt_QHSDSh<0, 0, 0b0101, 0, N3RegVShFrm,
+ IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
+ "vrshl", "s", int_arm_neon_vrshifts>;
+defm VRSHLu : N3VInt_QHSDSh<1, 0, 0b0101, 0, N3RegVShFrm,
+ IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
+ "vrshl", "u", int_arm_neon_vrshiftu>;
+// VRSHR : Vector Rounding Shift Right
+defm VRSHRs : N2VShR_QHSD<0,1,0b0010,1, IIC_VSHLi4D, "vrshr", "s", "VRSHRs",
+ NEONvrshrs>;
+defm VRSHRu : N2VShR_QHSD<1,1,0b0010,1, IIC_VSHLi4D, "vrshr", "u", "VRSHRu",
+ NEONvrshru>;
+
+// VRSHRN : Vector Rounding Shift Right and Narrow
+defm VRSHRN : N2VNSh_HSD<0, 1, 0b1000, 0, 1, 1, IIC_VSHLi4D, "vrshrn", "i",
+ NEONvrshrn>;
+
+// VQSHL : Vector Saturating Shift
+defm VQSHLs : N3VInt_QHSDSh<0, 0, 0b0100, 1, N3RegVShFrm,
+ IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
+ "vqshl", "s", int_arm_neon_vqshifts>;
+defm VQSHLu : N3VInt_QHSDSh<1, 0, 0b0100, 1, N3RegVShFrm,
+ IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
+ "vqshl", "u", int_arm_neon_vqshiftu>;
+// VQSHL : Vector Saturating Shift Left (Immediate)
+defm VQSHLsi : N2VShL_QHSD<0,1,0b0111,1, IIC_VSHLi4D, "vqshl", "s",NEONvqshls>;
+defm VQSHLui : N2VShL_QHSD<1,1,0b0111,1, IIC_VSHLi4D, "vqshl", "u",NEONvqshlu>;
+
+// VQSHLU : Vector Saturating Shift Left (Immediate, Unsigned)
+defm VQSHLsu : N2VShL_QHSD<1,1,0b0110,1, IIC_VSHLi4D,"vqshlu","s",NEONvqshlsu>;
+
+// VQSHRN : Vector Saturating Shift Right and Narrow
+defm VQSHRNs : N2VNSh_HSD<0, 1, 0b1001, 0, 0, 1, IIC_VSHLi4D, "vqshrn", "s",
+ NEONvqshrns>;
+defm VQSHRNu : N2VNSh_HSD<1, 1, 0b1001, 0, 0, 1, IIC_VSHLi4D, "vqshrn", "u",
+ NEONvqshrnu>;
+
+// VQSHRUN : Vector Saturating Shift Right and Narrow (Unsigned)
+defm VQSHRUN : N2VNSh_HSD<1, 1, 0b1000, 0, 0, 1, IIC_VSHLi4D, "vqshrun", "s",
+ NEONvqshrnsu>;
+
+// VQRSHL : Vector Saturating Rounding Shift
+defm VQRSHLs : N3VInt_QHSDSh<0, 0, 0b0101, 1, N3RegVShFrm,
+ IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
+ "vqrshl", "s", int_arm_neon_vqrshifts>;
+defm VQRSHLu : N3VInt_QHSDSh<1, 0, 0b0101, 1, N3RegVShFrm,
+ IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
+ "vqrshl", "u", int_arm_neon_vqrshiftu>;
+
+// VQRSHRN : Vector Saturating Rounding Shift Right and Narrow
+defm VQRSHRNs : N2VNSh_HSD<0, 1, 0b1001, 0, 1, 1, IIC_VSHLi4D, "vqrshrn", "s",
+ NEONvqrshrns>;
+defm VQRSHRNu : N2VNSh_HSD<1, 1, 0b1001, 0, 1, 1, IIC_VSHLi4D, "vqrshrn", "u",
+ NEONvqrshrnu>;
+
+// VQRSHRUN : Vector Saturating Rounding Shift Right and Narrow (Unsigned)
+defm VQRSHRUN : N2VNSh_HSD<1, 1, 0b1000, 0, 1, 1, IIC_VSHLi4D, "vqrshrun", "s",
+ NEONvqrshrnsu>;
+
+// VSRA : Vector Shift Right and Accumulate
+defm VSRAs : N2VShAdd_QHSD<0, 1, 0b0001, 1, "vsra", "s", NEONvshrs>;
+defm VSRAu : N2VShAdd_QHSD<1, 1, 0b0001, 1, "vsra", "u", NEONvshru>;
+// VRSRA : Vector Rounding Shift Right and Accumulate
+defm VRSRAs : N2VShAdd_QHSD<0, 1, 0b0011, 1, "vrsra", "s", NEONvrshrs>;
+defm VRSRAu : N2VShAdd_QHSD<1, 1, 0b0011, 1, "vrsra", "u", NEONvrshru>;
+
+// VSLI : Vector Shift Left and Insert
+defm VSLI : N2VShInsL_QHSD<1, 1, 0b0101, 1, "vsli">;
+
+// VSRI : Vector Shift Right and Insert
+defm VSRI : N2VShInsR_QHSD<1, 1, 0b0100, 1, "vsri">;
+
+// Vector Absolute and Saturating Absolute.
+
+// VABS : Vector Absolute Value
+defm VABS : N2VInt_QHS<0b11, 0b11, 0b01, 0b00110, 0,
+ IIC_VUNAiD, IIC_VUNAiQ, "vabs", "s",
+ int_arm_neon_vabs>;
+def VABSfd : N2VD<0b11, 0b11, 0b10, 0b01, 0b01110, 0,
+ "vabs", "f32",
+ v2f32, v2f32, fabs>;
+def VABSfq : N2VQ<0b11, 0b11, 0b10, 0b01, 0b01110, 0,
+ "vabs", "f32",
+ v4f32, v4f32, fabs>;
+def VABShd : N2VD<0b11, 0b11, 0b01, 0b01, 0b01110, 0,
+ "vabs", "f16",
+ v4f16, v4f16, fabs>,
+ Requires<[HasNEON, HasFullFP16]>;
+def VABShq : N2VQ<0b11, 0b11, 0b01, 0b01, 0b01110, 0,
+ "vabs", "f16",
+ v8f16, v8f16, fabs>,
+ Requires<[HasNEON, HasFullFP16]>;
+
+def : Pat<(xor (v2i32 (bitconvert (v8i8 (NEONvshrs DPR:$src, (i32 7))))),
+ (v2i32 (bitconvert (v8i8 (add DPR:$src,
+ (NEONvshrs DPR:$src, (i32 7))))))),
+ (VABSv8i8 DPR:$src)>;
+def : Pat<(xor (v2i32 (bitconvert (v4i16 (NEONvshrs DPR:$src, (i32 15))))),
+ (v2i32 (bitconvert (v4i16 (add DPR:$src,
+ (NEONvshrs DPR:$src, (i32 15))))))),
+ (VABSv4i16 DPR:$src)>;
+def : Pat<(xor (v2i32 (NEONvshrs DPR:$src, (i32 31))),
+ (v2i32 (add DPR:$src, (NEONvshrs DPR:$src, (i32 31))))),
+ (VABSv2i32 DPR:$src)>;
+def : Pat<(xor (v4i32 (bitconvert (v16i8 (NEONvshrs QPR:$src, (i32 7))))),
+ (v4i32 (bitconvert (v16i8 (add QPR:$src,
+ (NEONvshrs QPR:$src, (i32 7))))))),
+ (VABSv16i8 QPR:$src)>;
+def : Pat<(xor (v4i32 (bitconvert (v8i16 (NEONvshrs QPR:$src, (i32 15))))),
+ (v4i32 (bitconvert (v8i16 (add QPR:$src,
+ (NEONvshrs QPR:$src, (i32 15))))))),
+ (VABSv8i16 QPR:$src)>;
+def : Pat<(xor (v4i32 (NEONvshrs QPR:$src, (i32 31))),
+ (v4i32 (add QPR:$src, (NEONvshrs QPR:$src, (i32 31))))),
+ (VABSv4i32 QPR:$src)>;
+
+// VQABS : Vector Saturating Absolute Value
+defm VQABS : N2VInt_QHS<0b11, 0b11, 0b00, 0b01110, 0,
+ IIC_VQUNAiD, IIC_VQUNAiQ, "vqabs", "s",
+ int_arm_neon_vqabs>;
+
+// Vector Negate.
+
+def vnegd : PatFrag<(ops node:$in),
+ (sub (bitconvert (v2i32 NEONimmAllZerosV)), node:$in)>;
+def vnegq : PatFrag<(ops node:$in),
+ (sub (bitconvert (v4i32 NEONimmAllZerosV)), node:$in)>;
+
+class VNEGD<bits<2> size, string OpcodeStr, string Dt, ValueType Ty>
+ : N2V<0b11, 0b11, size, 0b01, 0b00111, 0, 0, (outs DPR:$Vd), (ins DPR:$Vm),
+ IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm", "",
+ [(set DPR:$Vd, (Ty (vnegd DPR:$Vm)))]>;
+class VNEGQ<bits<2> size, string OpcodeStr, string Dt, ValueType Ty>
+ : N2V<0b11, 0b11, size, 0b01, 0b00111, 1, 0, (outs QPR:$Vd), (ins QPR:$Vm),
+ IIC_VSHLiQ, OpcodeStr, Dt, "$Vd, $Vm", "",
+ [(set QPR:$Vd, (Ty (vnegq QPR:$Vm)))]>;
+
+// VNEG : Vector Negate (integer)
+def VNEGs8d : VNEGD<0b00, "vneg", "s8", v8i8>;
+def VNEGs16d : VNEGD<0b01, "vneg", "s16", v4i16>;
+def VNEGs32d : VNEGD<0b10, "vneg", "s32", v2i32>;
+def VNEGs8q : VNEGQ<0b00, "vneg", "s8", v16i8>;
+def VNEGs16q : VNEGQ<0b01, "vneg", "s16", v8i16>;
+def VNEGs32q : VNEGQ<0b10, "vneg", "s32", v4i32>;
+
+// VNEG : Vector Negate (floating-point)
+def VNEGfd : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 0, 0,
+ (outs DPR:$Vd), (ins DPR:$Vm), IIC_VUNAD,
+ "vneg", "f32", "$Vd, $Vm", "",
+ [(set DPR:$Vd, (v2f32 (fneg DPR:$Vm)))]>;
+def VNEGf32q : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 1, 0,
+ (outs QPR:$Vd), (ins QPR:$Vm), IIC_VUNAQ,
+ "vneg", "f32", "$Vd, $Vm", "",
+ [(set QPR:$Vd, (v4f32 (fneg QPR:$Vm)))]>;
+def VNEGhd : N2V<0b11, 0b11, 0b01, 0b01, 0b01111, 0, 0,
+ (outs DPR:$Vd), (ins DPR:$Vm), IIC_VUNAD,
+ "vneg", "f16", "$Vd, $Vm", "",
+ [(set DPR:$Vd, (v4f16 (fneg DPR:$Vm)))]>,
+ Requires<[HasNEON, HasFullFP16]>;
+def VNEGhq : N2V<0b11, 0b11, 0b01, 0b01, 0b01111, 1, 0,
+ (outs QPR:$Vd), (ins QPR:$Vm), IIC_VUNAQ,
+ "vneg", "f16", "$Vd, $Vm", "",
+ [(set QPR:$Vd, (v8f16 (fneg QPR:$Vm)))]>,
+ Requires<[HasNEON, HasFullFP16]>;
+
+def : Pat<(v8i8 (vnegd DPR:$src)), (VNEGs8d DPR:$src)>;
+def : Pat<(v4i16 (vnegd DPR:$src)), (VNEGs16d DPR:$src)>;
+def : Pat<(v2i32 (vnegd DPR:$src)), (VNEGs32d DPR:$src)>;
+def : Pat<(v16i8 (vnegq QPR:$src)), (VNEGs8q QPR:$src)>;
+def : Pat<(v8i16 (vnegq QPR:$src)), (VNEGs16q QPR:$src)>;
+def : Pat<(v4i32 (vnegq QPR:$src)), (VNEGs32q QPR:$src)>;
+
+// VQNEG : Vector Saturating Negate
+defm VQNEG : N2VInt_QHS<0b11, 0b11, 0b00, 0b01111, 0,
+ IIC_VQUNAiD, IIC_VQUNAiQ, "vqneg", "s",
+ int_arm_neon_vqneg>;
+
+// Vector Bit Counting Operations.
+
+// VCLS : Vector Count Leading Sign Bits
+defm VCLS : N2VInt_QHS<0b11, 0b11, 0b00, 0b01000, 0,
+ IIC_VCNTiD, IIC_VCNTiQ, "vcls", "s",
+ int_arm_neon_vcls>;
+// VCLZ : Vector Count Leading Zeros
+defm VCLZ : N2VInt_QHS<0b11, 0b11, 0b00, 0b01001, 0,
+ IIC_VCNTiD, IIC_VCNTiQ, "vclz", "i",
+ ctlz>;
+// VCNT : Vector Count One Bits
+def VCNTd : N2VDInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0,
+ IIC_VCNTiD, "vcnt", "8",
+ v8i8, v8i8, ctpop>;
+def VCNTq : N2VQInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0,
+ IIC_VCNTiQ, "vcnt", "8",
+ v16i8, v16i8, ctpop>;
+
+// Vector Swap
+def VSWPd : N2VX<0b11, 0b11, 0b00, 0b10, 0b00000, 0, 0,
+ (outs DPR:$Vd, DPR:$Vm), (ins DPR:$in1, DPR:$in2),
+ NoItinerary, "vswp", "$Vd, $Vm", "$in1 = $Vd, $in2 = $Vm",
+ []>;
+def VSWPq : N2VX<0b11, 0b11, 0b00, 0b10, 0b00000, 1, 0,
+ (outs QPR:$Vd, QPR:$Vm), (ins QPR:$in1, QPR:$in2),
+ NoItinerary, "vswp", "$Vd, $Vm", "$in1 = $Vd, $in2 = $Vm",
+ []>;
+
+// Vector Move Operations.
+
+// VMOV : Vector Move (Register)
+def : NEONInstAlias<"vmov${p} $Vd, $Vm",
+ (VORRd DPR:$Vd, DPR:$Vm, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vmov${p} $Vd, $Vm",
+ (VORRq QPR:$Vd, QPR:$Vm, QPR:$Vm, pred:$p)>;
+
+// VMOV : Vector Move (Immediate)
+
+let isReMaterializable = 1 in {
+def VMOVv8i8 : N1ModImm<1, 0b000, 0b1110, 0, 0, 0, 1, (outs DPR:$Vd),
+ (ins nImmSplatI8:$SIMM), IIC_VMOVImm,
+ "vmov", "i8", "$Vd, $SIMM", "",
+ [(set DPR:$Vd, (v8i8 (NEONvmovImm timm:$SIMM)))]>;
+def VMOVv16i8 : N1ModImm<1, 0b000, 0b1110, 0, 1, 0, 1, (outs QPR:$Vd),
+ (ins nImmSplatI8:$SIMM), IIC_VMOVImm,
+ "vmov", "i8", "$Vd, $SIMM", "",
+ [(set QPR:$Vd, (v16i8 (NEONvmovImm timm:$SIMM)))]>;
+
+def VMOVv4i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 0, 0, 1, (outs DPR:$Vd),
+ (ins nImmSplatI16:$SIMM), IIC_VMOVImm,
+ "vmov", "i16", "$Vd, $SIMM", "",
+ [(set DPR:$Vd, (v4i16 (NEONvmovImm timm:$SIMM)))]> {
+ let Inst{9} = SIMM{9};
+}
+
+def VMOVv8i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 1, 0, 1, (outs QPR:$Vd),
+ (ins nImmSplatI16:$SIMM), IIC_VMOVImm,
+ "vmov", "i16", "$Vd, $SIMM", "",
+ [(set QPR:$Vd, (v8i16 (NEONvmovImm timm:$SIMM)))]> {
+ let Inst{9} = SIMM{9};
+}
+
+def VMOVv2i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 0, 0, 1, (outs DPR:$Vd),
+ (ins nImmVMOVI32:$SIMM), IIC_VMOVImm,
+ "vmov", "i32", "$Vd, $SIMM", "",
+ [(set DPR:$Vd, (v2i32 (NEONvmovImm timm:$SIMM)))]> {
+ let Inst{11-8} = SIMM{11-8};
+}
+
+def VMOVv4i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 1, 0, 1, (outs QPR:$Vd),
+ (ins nImmVMOVI32:$SIMM), IIC_VMOVImm,
+ "vmov", "i32", "$Vd, $SIMM", "",
+ [(set QPR:$Vd, (v4i32 (NEONvmovImm timm:$SIMM)))]> {
+ let Inst{11-8} = SIMM{11-8};
+}
+
+def VMOVv1i64 : N1ModImm<1, 0b000, 0b1110, 0, 0, 1, 1, (outs DPR:$Vd),
+ (ins nImmSplatI64:$SIMM), IIC_VMOVImm,
+ "vmov", "i64", "$Vd, $SIMM", "",
+ [(set DPR:$Vd, (v1i64 (NEONvmovImm timm:$SIMM)))]>;
+def VMOVv2i64 : N1ModImm<1, 0b000, 0b1110, 0, 1, 1, 1, (outs QPR:$Vd),
+ (ins nImmSplatI64:$SIMM), IIC_VMOVImm,
+ "vmov", "i64", "$Vd, $SIMM", "",
+ [(set QPR:$Vd, (v2i64 (NEONvmovImm timm:$SIMM)))]>;
+
+def VMOVv2f32 : N1ModImm<1, 0b000, 0b1111, 0, 0, 0, 1, (outs DPR:$Vd),
+ (ins nImmVMOVF32:$SIMM), IIC_VMOVImm,
+ "vmov", "f32", "$Vd, $SIMM", "",
+ [(set DPR:$Vd, (v2f32 (NEONvmovFPImm timm:$SIMM)))]>;
+def VMOVv4f32 : N1ModImm<1, 0b000, 0b1111, 0, 1, 0, 1, (outs QPR:$Vd),
+ (ins nImmVMOVF32:$SIMM), IIC_VMOVImm,
+ "vmov", "f32", "$Vd, $SIMM", "",
+ [(set QPR:$Vd, (v4f32 (NEONvmovFPImm timm:$SIMM)))]>;
+} // isReMaterializable
+
+// Add support for bytes replication feature, so it could be GAS compatible.
+// E.g. instructions below:
+// "vmov.i32 d0, 0xffffffff"
+// "vmov.i32 d0, 0xabababab"
+// "vmov.i16 d0, 0xabab"
+// are incorrect, but we could deal with such cases.
+// For last two instructions, for example, it should emit:
+// "vmov.i8 d0, 0xab"
+def : NEONInstAlias<"vmov${p}.i16 $Vd, $Vm",
+ (VMOVv8i8 DPR:$Vd, nImmVMOVI16ByteReplicate:$Vm, pred:$p)>;
+def : NEONInstAlias<"vmov${p}.i32 $Vd, $Vm",
+ (VMOVv8i8 DPR:$Vd, nImmVMOVI32ByteReplicate:$Vm, pred:$p)>;
+def : NEONInstAlias<"vmov${p}.i16 $Vd, $Vm",
+ (VMOVv16i8 QPR:$Vd, nImmVMOVI16ByteReplicate:$Vm, pred:$p)>;
+def : NEONInstAlias<"vmov${p}.i32 $Vd, $Vm",
+ (VMOVv16i8 QPR:$Vd, nImmVMOVI32ByteReplicate:$Vm, pred:$p)>;
+
+// Also add same support for VMVN instructions. So instruction:
+// "vmvn.i32 d0, 0xabababab"
+// actually means:
+// "vmov.i8 d0, 0x54"
+def : NEONInstAlias<"vmvn${p}.i16 $Vd, $Vm",
+ (VMOVv8i8 DPR:$Vd, nImmVMVNI16ByteReplicate:$Vm, pred:$p)>;
+def : NEONInstAlias<"vmvn${p}.i32 $Vd, $Vm",
+ (VMOVv8i8 DPR:$Vd, nImmVMVNI32ByteReplicate:$Vm, pred:$p)>;
+def : NEONInstAlias<"vmvn${p}.i16 $Vd, $Vm",
+ (VMOVv16i8 QPR:$Vd, nImmVMVNI16ByteReplicate:$Vm, pred:$p)>;
+def : NEONInstAlias<"vmvn${p}.i32 $Vd, $Vm",
+ (VMOVv16i8 QPR:$Vd, nImmVMVNI32ByteReplicate:$Vm, pred:$p)>;
+
+// On some CPUs the two instructions "vmov.i32 dD, #0" and "vmov.i32 qD, #0"
+// require zero cycles to execute so they should be used wherever possible for
+// setting a register to zero.
+
+// Even without these pseudo-insts we would probably end up with the correct
+// instruction, but we could not mark the general ones with "isAsCheapAsAMove"
+// since they are sometimes rather expensive (in general).
+
+let AddedComplexity = 50, isAsCheapAsAMove = 1, isReMaterializable = 1 in {
+ def VMOVD0 : ARMPseudoExpand<(outs DPR:$Vd), (ins), 4, IIC_VMOVImm,
+ [(set DPR:$Vd, (v2i32 NEONimmAllZerosV))],
+ (VMOVv2i32 DPR:$Vd, 0, (ops 14, zero_reg))>,
+ Requires<[HasZCZ]>;
+ def VMOVQ0 : ARMPseudoExpand<(outs QPR:$Vd), (ins), 4, IIC_VMOVImm,
+ [(set QPR:$Vd, (v4i32 NEONimmAllZerosV))],
+ (VMOVv4i32 QPR:$Vd, 0, (ops 14, zero_reg))>,
+ Requires<[HasZCZ]>;
+}
+
+// VMOV : Vector Get Lane (move scalar to ARM core register)
+
+def VGETLNs8 : NVGetLane<{1,1,1,0,0,1,?,1}, 0b1011, {?,?},
+ (outs GPR:$R), (ins DPR:$V, VectorIndex8:$lane),
+ IIC_VMOVSI, "vmov", "s8", "$R, $V$lane",
+ [(set GPR:$R, (NEONvgetlanes (v8i8 DPR:$V),
+ imm:$lane))]> {
+ let Inst{21} = lane{2};
+ let Inst{6-5} = lane{1-0};
+}
+def VGETLNs16 : NVGetLane<{1,1,1,0,0,0,?,1}, 0b1011, {?,1},
+ (outs GPR:$R), (ins DPR:$V, VectorIndex16:$lane),
+ IIC_VMOVSI, "vmov", "s16", "$R, $V$lane",
+ [(set GPR:$R, (NEONvgetlanes (v4i16 DPR:$V),
+ imm:$lane))]> {
+ let Inst{21} = lane{1};
+ let Inst{6} = lane{0};
+}
+def VGETLNu8 : NVGetLane<{1,1,1,0,1,1,?,1}, 0b1011, {?,?},
+ (outs GPR:$R), (ins DPR:$V, VectorIndex8:$lane),
+ IIC_VMOVSI, "vmov", "u8", "$R, $V$lane",
+ [(set GPR:$R, (NEONvgetlaneu (v8i8 DPR:$V),
+ imm:$lane))]> {
+ let Inst{21} = lane{2};
+ let Inst{6-5} = lane{1-0};
+}
+def VGETLNu16 : NVGetLane<{1,1,1,0,1,0,?,1}, 0b1011, {?,1},
+ (outs GPR:$R), (ins DPR:$V, VectorIndex16:$lane),
+ IIC_VMOVSI, "vmov", "u16", "$R, $V$lane",
+ [(set GPR:$R, (NEONvgetlaneu (v4i16 DPR:$V),
+ imm:$lane))]> {
+ let Inst{21} = lane{1};
+ let Inst{6} = lane{0};
+}
+def VGETLNi32 : NVGetLane<{1,1,1,0,0,0,?,1}, 0b1011, 0b00,
+ (outs GPR:$R), (ins DPR:$V, VectorIndex32:$lane),
+ IIC_VMOVSI, "vmov", "32", "$R, $V$lane",
+ [(set GPR:$R, (extractelt (v2i32 DPR:$V),
+ imm:$lane))]>,
+ Requires<[HasVFP2, HasFastVGETLNi32]> {
+ let Inst{21} = lane{0};
+}
+// def VGETLNf32: see FMRDH and FMRDL in ARMInstrVFP.td
+def : Pat<(NEONvgetlanes (v16i8 QPR:$src), imm:$lane),
+ (VGETLNs8 (v8i8 (EXTRACT_SUBREG QPR:$src,
+ (DSubReg_i8_reg imm:$lane))),
+ (SubReg_i8_lane imm:$lane))>;
+def : Pat<(NEONvgetlanes (v8i16 QPR:$src), imm:$lane),
+ (VGETLNs16 (v4i16 (EXTRACT_SUBREG QPR:$src,
+ (DSubReg_i16_reg imm:$lane))),
+ (SubReg_i16_lane imm:$lane))>;
+def : Pat<(NEONvgetlaneu (v16i8 QPR:$src), imm:$lane),
+ (VGETLNu8 (v8i8 (EXTRACT_SUBREG QPR:$src,
+ (DSubReg_i8_reg imm:$lane))),
+ (SubReg_i8_lane imm:$lane))>;
+def : Pat<(NEONvgetlaneu (v8i16 QPR:$src), imm:$lane),
+ (VGETLNu16 (v4i16 (EXTRACT_SUBREG QPR:$src,
+ (DSubReg_i16_reg imm:$lane))),
+ (SubReg_i16_lane imm:$lane))>;
+def : Pat<(extractelt (v4i32 QPR:$src), imm:$lane),
+ (VGETLNi32 (v2i32 (EXTRACT_SUBREG QPR:$src,
+ (DSubReg_i32_reg imm:$lane))),
+ (SubReg_i32_lane imm:$lane))>,
+ Requires<[HasNEON, HasFastVGETLNi32]>;
+def : Pat<(extractelt (v2i32 DPR:$src), imm:$lane),
+ (COPY_TO_REGCLASS
+ (i32 (EXTRACT_SUBREG DPR:$src, (SSubReg_f32_reg imm:$lane))), GPR)>,
+ Requires<[HasNEON, HasSlowVGETLNi32]>;
+def : Pat<(extractelt (v4i32 QPR:$src), imm:$lane),
+ (COPY_TO_REGCLASS
+ (i32 (EXTRACT_SUBREG QPR:$src, (SSubReg_f32_reg imm:$lane))), GPR)>,
+ Requires<[HasNEON, HasSlowVGETLNi32]>;
+def : Pat<(extractelt (v2f32 DPR:$src1), imm:$src2),
+ (EXTRACT_SUBREG (v2f32 (COPY_TO_REGCLASS (v2f32 DPR:$src1),DPR_VFP2)),
+ (SSubReg_f32_reg imm:$src2))>;
+def : Pat<(extractelt (v4f32 QPR:$src1), imm:$src2),
+ (EXTRACT_SUBREG (v4f32 (COPY_TO_REGCLASS (v4f32 QPR:$src1),QPR_VFP2)),
+ (SSubReg_f32_reg imm:$src2))>;
+//def : Pat<(extractelt (v2i64 QPR:$src1), imm:$src2),
+// (EXTRACT_SUBREG QPR:$src1, (DSubReg_f64_reg imm:$src2))>;
+def : Pat<(extractelt (v2f64 QPR:$src1), imm:$src2),
+ (EXTRACT_SUBREG QPR:$src1, (DSubReg_f64_reg imm:$src2))>;
+
+
+// VMOV : Vector Set Lane (move ARM core register to scalar)
+
+let Constraints = "$src1 = $V" in {
+def VSETLNi8 : NVSetLane<{1,1,1,0,0,1,?,0}, 0b1011, {?,?}, (outs DPR:$V),
+ (ins DPR:$src1, GPR:$R, VectorIndex8:$lane),
+ IIC_VMOVISL, "vmov", "8", "$V$lane, $R",
+ [(set DPR:$V, (vector_insert (v8i8 DPR:$src1),
+ GPR:$R, imm:$lane))]> {
+ let Inst{21} = lane{2};
+ let Inst{6-5} = lane{1-0};
+}
+def VSETLNi16 : NVSetLane<{1,1,1,0,0,0,?,0}, 0b1011, {?,1}, (outs DPR:$V),
+ (ins DPR:$src1, GPR:$R, VectorIndex16:$lane),
+ IIC_VMOVISL, "vmov", "16", "$V$lane, $R",
+ [(set DPR:$V, (vector_insert (v4i16 DPR:$src1),
+ GPR:$R, imm:$lane))]> {
+ let Inst{21} = lane{1};
+ let Inst{6} = lane{0};
+}
+def VSETLNi32 : NVSetLane<{1,1,1,0,0,0,?,0}, 0b1011, 0b00, (outs DPR:$V),
+ (ins DPR:$src1, GPR:$R, VectorIndex32:$lane),
+ IIC_VMOVISL, "vmov", "32", "$V$lane, $R",
+ [(set DPR:$V, (insertelt (v2i32 DPR:$src1),
+ GPR:$R, imm:$lane))]>,
+ Requires<[HasVFP2]> {
+ let Inst{21} = lane{0};
+ // This instruction is equivalent as
+ // $V = INSERT_SUBREG $src1, $R, translateImmToSubIdx($imm)
+ let isInsertSubreg = 1;
+}
+}
+def : Pat<(vector_insert (v16i8 QPR:$src1), GPR:$src2, imm:$lane),
+ (v16i8 (INSERT_SUBREG QPR:$src1,
+ (v8i8 (VSETLNi8 (v8i8 (EXTRACT_SUBREG QPR:$src1,
+ (DSubReg_i8_reg imm:$lane))),
+ GPR:$src2, (SubReg_i8_lane imm:$lane))),
+ (DSubReg_i8_reg imm:$lane)))>;
+def : Pat<(vector_insert (v8i16 QPR:$src1), GPR:$src2, imm:$lane),
+ (v8i16 (INSERT_SUBREG QPR:$src1,
+ (v4i16 (VSETLNi16 (v4i16 (EXTRACT_SUBREG QPR:$src1,
+ (DSubReg_i16_reg imm:$lane))),
+ GPR:$src2, (SubReg_i16_lane imm:$lane))),
+ (DSubReg_i16_reg imm:$lane)))>;
+def : Pat<(insertelt (v4i32 QPR:$src1), GPR:$src2, imm:$lane),
+ (v4i32 (INSERT_SUBREG QPR:$src1,
+ (v2i32 (VSETLNi32 (v2i32 (EXTRACT_SUBREG QPR:$src1,
+ (DSubReg_i32_reg imm:$lane))),
+ GPR:$src2, (SubReg_i32_lane imm:$lane))),
+ (DSubReg_i32_reg imm:$lane)))>;
+
+def : Pat<(v2f32 (insertelt DPR:$src1, SPR:$src2, imm:$src3)),
+ (INSERT_SUBREG (v2f32 (COPY_TO_REGCLASS DPR:$src1, DPR_VFP2)),
+ SPR:$src2, (SSubReg_f32_reg imm:$src3))>;
+def : Pat<(v4f32 (insertelt QPR:$src1, SPR:$src2, imm:$src3)),
+ (INSERT_SUBREG (v4f32 (COPY_TO_REGCLASS QPR:$src1, QPR_VFP2)),
+ SPR:$src2, (SSubReg_f32_reg imm:$src3))>;
+
+//def : Pat<(v2i64 (insertelt QPR:$src1, DPR:$src2, imm:$src3)),
+// (INSERT_SUBREG QPR:$src1, DPR:$src2, (DSubReg_f64_reg imm:$src3))>;
+def : Pat<(v2f64 (insertelt QPR:$src1, DPR:$src2, imm:$src3)),
+ (INSERT_SUBREG QPR:$src1, DPR:$src2, (DSubReg_f64_reg imm:$src3))>;
+
+def : Pat<(v2f32 (scalar_to_vector SPR:$src)),
+ (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$src, ssub_0)>;
+def : Pat<(v2f64 (scalar_to_vector (f64 DPR:$src))),
+ (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), DPR:$src, dsub_0)>;
+def : Pat<(v4f32 (scalar_to_vector SPR:$src)),
+ (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), SPR:$src, ssub_0)>;
+
+def : Pat<(v8i8 (scalar_to_vector GPR:$src)),
+ (VSETLNi8 (v8i8 (IMPLICIT_DEF)), GPR:$src, (i32 0))>;
+def : Pat<(v4i16 (scalar_to_vector GPR:$src)),
+ (VSETLNi16 (v4i16 (IMPLICIT_DEF)), GPR:$src, (i32 0))>;
+def : Pat<(v2i32 (scalar_to_vector GPR:$src)),
+ (VSETLNi32 (v2i32 (IMPLICIT_DEF)), GPR:$src, (i32 0))>;
+
+def : Pat<(v16i8 (scalar_to_vector GPR:$src)),
+ (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
+ (VSETLNi8 (v8i8 (IMPLICIT_DEF)), GPR:$src, (i32 0)),
+ dsub_0)>;
+def : Pat<(v8i16 (scalar_to_vector GPR:$src)),
+ (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)),
+ (VSETLNi16 (v4i16 (IMPLICIT_DEF)), GPR:$src, (i32 0)),
+ dsub_0)>;
+def : Pat<(v4i32 (scalar_to_vector GPR:$src)),
+ (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)),
+ (VSETLNi32 (v2i32 (IMPLICIT_DEF)), GPR:$src, (i32 0)),
+ dsub_0)>;
+
+// VDUP : Vector Duplicate (from ARM core register to all elements)
+
+class VDUPD<bits<8> opcod1, bits<2> opcod3, string Dt, ValueType Ty>
+ : NVDup<opcod1, 0b1011, opcod3, (outs DPR:$V), (ins GPR:$R),
+ IIC_VMOVIS, "vdup", Dt, "$V, $R",
+ [(set DPR:$V, (Ty (NEONvdup (i32 GPR:$R))))]>;
+class VDUPQ<bits<8> opcod1, bits<2> opcod3, string Dt, ValueType Ty>
+ : NVDup<opcod1, 0b1011, opcod3, (outs QPR:$V), (ins GPR:$R),
+ IIC_VMOVIS, "vdup", Dt, "$V, $R",
+ [(set QPR:$V, (Ty (NEONvdup (i32 GPR:$R))))]>;
+
+def VDUP8d : VDUPD<0b11101100, 0b00, "8", v8i8>;
+def VDUP16d : VDUPD<0b11101000, 0b01, "16", v4i16>;
+def VDUP32d : VDUPD<0b11101000, 0b00, "32", v2i32>,
+ Requires<[HasNEON, HasFastVDUP32]>;
+def VDUP8q : VDUPQ<0b11101110, 0b00, "8", v16i8>;
+def VDUP16q : VDUPQ<0b11101010, 0b01, "16", v8i16>;
+def VDUP32q : VDUPQ<0b11101010, 0b00, "32", v4i32>;
+
+// NEONvdup patterns for uarchs with fast VDUP.32.
+def : Pat<(v2f32 (NEONvdup (f32 (bitconvert GPR:$R)))), (VDUP32d GPR:$R)>,
+ Requires<[HasNEON,HasFastVDUP32]>;
+def : Pat<(v4f32 (NEONvdup (f32 (bitconvert GPR:$R)))), (VDUP32q GPR:$R)>;
+
+// NEONvdup patterns for uarchs with slow VDUP.32 - use VMOVDRR instead.
+def : Pat<(v2i32 (NEONvdup (i32 GPR:$R))), (VMOVDRR GPR:$R, GPR:$R)>,
+ Requires<[HasNEON,HasSlowVDUP32]>;
+def : Pat<(v2f32 (NEONvdup (f32 (bitconvert GPR:$R)))), (VMOVDRR GPR:$R, GPR:$R)>,
+ Requires<[HasNEON,HasSlowVDUP32]>;
+
+// VDUP : Vector Duplicate Lane (from scalar to all elements)
+
+class VDUPLND<bits<4> op19_16, string OpcodeStr, string Dt,
+ ValueType Ty, Operand IdxTy>
+ : NVDupLane<op19_16, 0, (outs DPR:$Vd), (ins DPR:$Vm, IdxTy:$lane),
+ IIC_VMOVD, OpcodeStr, Dt, "$Vd, $Vm$lane",
+ [(set DPR:$Vd, (Ty (NEONvduplane (Ty DPR:$Vm), imm:$lane)))]>;
+
+class VDUPLNQ<bits<4> op19_16, string OpcodeStr, string Dt,
+ ValueType ResTy, ValueType OpTy, Operand IdxTy>
+ : NVDupLane<op19_16, 1, (outs QPR:$Vd), (ins DPR:$Vm, IdxTy:$lane),
+ IIC_VMOVQ, OpcodeStr, Dt, "$Vd, $Vm$lane",
+ [(set QPR:$Vd, (ResTy (NEONvduplane (OpTy DPR:$Vm),
+ VectorIndex32:$lane)))]>;
+
+// Inst{19-16} is partially specified depending on the element size.
+
+def VDUPLN8d : VDUPLND<{?,?,?,1}, "vdup", "8", v8i8, VectorIndex8> {
+ bits<3> lane;
+ let Inst{19-17} = lane{2-0};
+}
+def VDUPLN16d : VDUPLND<{?,?,1,0}, "vdup", "16", v4i16, VectorIndex16> {
+ bits<2> lane;
+ let Inst{19-18} = lane{1-0};
+}
+def VDUPLN32d : VDUPLND<{?,1,0,0}, "vdup", "32", v2i32, VectorIndex32> {
+ bits<1> lane;
+ let Inst{19} = lane{0};
+}
+def VDUPLN8q : VDUPLNQ<{?,?,?,1}, "vdup", "8", v16i8, v8i8, VectorIndex8> {
+ bits<3> lane;
+ let Inst{19-17} = lane{2-0};
+}
+def VDUPLN16q : VDUPLNQ<{?,?,1,0}, "vdup", "16", v8i16, v4i16, VectorIndex16> {
+ bits<2> lane;
+ let Inst{19-18} = lane{1-0};
+}
+def VDUPLN32q : VDUPLNQ<{?,1,0,0}, "vdup", "32", v4i32, v2i32, VectorIndex32> {
+ bits<1> lane;
+ let Inst{19} = lane{0};
+}
+
+def : Pat<(v2f32 (NEONvduplane (v2f32 DPR:$Vm), imm:$lane)),
+ (VDUPLN32d DPR:$Vm, imm:$lane)>;
+
+def : Pat<(v4f32 (NEONvduplane (v2f32 DPR:$Vm), imm:$lane)),
+ (VDUPLN32q DPR:$Vm, imm:$lane)>;
+
+def : Pat<(v16i8 (NEONvduplane (v16i8 QPR:$src), imm:$lane)),
+ (v16i8 (VDUPLN8q (v8i8 (EXTRACT_SUBREG QPR:$src,
+ (DSubReg_i8_reg imm:$lane))),
+ (SubReg_i8_lane imm:$lane)))>;
+def : Pat<(v8i16 (NEONvduplane (v8i16 QPR:$src), imm:$lane)),
+ (v8i16 (VDUPLN16q (v4i16 (EXTRACT_SUBREG QPR:$src,
+ (DSubReg_i16_reg imm:$lane))),
+ (SubReg_i16_lane imm:$lane)))>;
+def : Pat<(v4i32 (NEONvduplane (v4i32 QPR:$src), imm:$lane)),
+ (v4i32 (VDUPLN32q (v2i32 (EXTRACT_SUBREG QPR:$src,
+ (DSubReg_i32_reg imm:$lane))),
+ (SubReg_i32_lane imm:$lane)))>;
+def : Pat<(v4f32 (NEONvduplane (v4f32 QPR:$src), imm:$lane)),
+ (v4f32 (VDUPLN32q (v2f32 (EXTRACT_SUBREG QPR:$src,
+ (DSubReg_i32_reg imm:$lane))),
+ (SubReg_i32_lane imm:$lane)))>;
+
+def : Pat<(v2f32 (NEONvdup (f32 SPR:$src))),
+ (v2f32 (VDUPLN32d (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)),
+ SPR:$src, ssub_0), (i32 0)))>;
+def : Pat<(v4f32 (NEONvdup (f32 SPR:$src))),
+ (v4f32 (VDUPLN32q (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)),
+ SPR:$src, ssub_0), (i32 0)))>;
+
+// VMOVN : Vector Narrowing Move
+defm VMOVN : N2VN_HSD<0b11,0b11,0b10,0b00100,0,0, IIC_VMOVN,
+ "vmovn", "i", trunc>;
+// VQMOVN : Vector Saturating Narrowing Move
+defm VQMOVNs : N2VNInt_HSD<0b11,0b11,0b10,0b00101,0,0, IIC_VQUNAiD,
+ "vqmovn", "s", int_arm_neon_vqmovns>;
+defm VQMOVNu : N2VNInt_HSD<0b11,0b11,0b10,0b00101,1,0, IIC_VQUNAiD,
+ "vqmovn", "u", int_arm_neon_vqmovnu>;
+defm VQMOVNsu : N2VNInt_HSD<0b11,0b11,0b10,0b00100,1,0, IIC_VQUNAiD,
+ "vqmovun", "s", int_arm_neon_vqmovnsu>;
+// VMOVL : Vector Lengthening Move
+defm VMOVLs : N2VL_QHS<0b01,0b10100,0,1, "vmovl", "s", sext>;
+defm VMOVLu : N2VL_QHS<0b11,0b10100,0,1, "vmovl", "u", zext>;
+def : Pat<(v8i16 (anyext (v8i8 DPR:$Vm))), (VMOVLuv8i16 DPR:$Vm)>;
+def : Pat<(v4i32 (anyext (v4i16 DPR:$Vm))), (VMOVLuv4i32 DPR:$Vm)>;
+def : Pat<(v2i64 (anyext (v2i32 DPR:$Vm))), (VMOVLuv2i64 DPR:$Vm)>;
+
+// Vector Conversions.
+
+// VCVT : Vector Convert Between Floating-Point and Integers
+def VCVTf2sd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt", "s32.f32",
+ v2i32, v2f32, fp_to_sint>;
+def VCVTf2ud : N2VD<0b11, 0b11, 0b10, 0b11, 0b01111, 0, "vcvt", "u32.f32",
+ v2i32, v2f32, fp_to_uint>;
+def VCVTs2fd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt", "f32.s32",
+ v2f32, v2i32, sint_to_fp>;
+def VCVTu2fd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt", "f32.u32",
+ v2f32, v2i32, uint_to_fp>;
+
+def VCVTf2sq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt", "s32.f32",
+ v4i32, v4f32, fp_to_sint>;
+def VCVTf2uq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01111, 0, "vcvt", "u32.f32",
+ v4i32, v4f32, fp_to_uint>;
+def VCVTs2fq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt", "f32.s32",
+ v4f32, v4i32, sint_to_fp>;
+def VCVTu2fq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt", "f32.u32",
+ v4f32, v4i32, uint_to_fp>;
+
+def VCVTh2sd : N2VD<0b11, 0b11, 0b01, 0b11, 0b01110, 0, "vcvt", "s16.f16",
+ v4i16, v4f16, fp_to_sint>,
+ Requires<[HasNEON, HasFullFP16]>;
+def VCVTh2ud : N2VD<0b11, 0b11, 0b01, 0b11, 0b01111, 0, "vcvt", "u16.f16",
+ v4i16, v4f16, fp_to_uint>,
+ Requires<[HasNEON, HasFullFP16]>;
+def VCVTs2hd : N2VD<0b11, 0b11, 0b01, 0b11, 0b01100, 0, "vcvt", "f16.s16",
+ v4f16, v4i16, sint_to_fp>,
+ Requires<[HasNEON, HasFullFP16]>;
+def VCVTu2hd : N2VD<0b11, 0b11, 0b01, 0b11, 0b01101, 0, "vcvt", "f16.u16",
+ v4f16, v4i16, uint_to_fp>,
+ Requires<[HasNEON, HasFullFP16]>;
+
+def VCVTh2sq : N2VQ<0b11, 0b11, 0b01, 0b11, 0b01110, 0, "vcvt", "s16.f16",
+ v8i16, v8f16, fp_to_sint>,
+ Requires<[HasNEON, HasFullFP16]>;
+def VCVTh2uq : N2VQ<0b11, 0b11, 0b01, 0b11, 0b01111, 0, "vcvt", "u16.f16",
+ v8i16, v8f16, fp_to_uint>,
+ Requires<[HasNEON, HasFullFP16]>;
+def VCVTs2hq : N2VQ<0b11, 0b11, 0b01, 0b11, 0b01100, 0, "vcvt", "f16.s16",
+ v8f16, v8i16, sint_to_fp>,
+ Requires<[HasNEON, HasFullFP16]>;
+def VCVTu2hq : N2VQ<0b11, 0b11, 0b01, 0b11, 0b01101, 0, "vcvt", "f16.u16",
+ v8f16, v8i16, uint_to_fp>,
+ Requires<[HasNEON, HasFullFP16]>;
+
+// VCVT{A, N, P, M}
+multiclass VCVT_FPI<string op, bits<3> op10_8, SDPatternOperator IntS,
+ SDPatternOperator IntU> {
+ let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in {
+ def SDf : N2VDIntnp<0b10, 0b11, op10_8, 0, NoItinerary, !strconcat("vcvt", op),
+ "s32.f32", v2i32, v2f32, IntS>, Requires<[HasV8, HasNEON]>;
+ def SQf : N2VQIntnp<0b10, 0b11, op10_8, 0, NoItinerary, !strconcat("vcvt", op),
+ "s32.f32", v4i32, v4f32, IntS>, Requires<[HasV8, HasNEON]>;
+ def UDf : N2VDIntnp<0b10, 0b11, op10_8, 1, NoItinerary, !strconcat("vcvt", op),
+ "u32.f32", v2i32, v2f32, IntU>, Requires<[HasV8, HasNEON]>;
+ def UQf : N2VQIntnp<0b10, 0b11, op10_8, 1, NoItinerary, !strconcat("vcvt", op),
+ "u32.f32", v4i32, v4f32, IntU>, Requires<[HasV8, HasNEON]>;
+ def SDh : N2VDIntnp<0b01, 0b11, op10_8, 0, NoItinerary, !strconcat("vcvt", op),
+ "s16.f16", v4i16, v4f16, IntS>,
+ Requires<[HasV8, HasNEON, HasFullFP16]>;
+ def SQh : N2VQIntnp<0b01, 0b11, op10_8, 0, NoItinerary, !strconcat("vcvt", op),
+ "s16.f16", v8i16, v8f16, IntS>,
+ Requires<[HasV8, HasNEON, HasFullFP16]>;
+ def UDh : N2VDIntnp<0b01, 0b11, op10_8, 1, NoItinerary, !strconcat("vcvt", op),
+ "u16.f16", v4i16, v4f16, IntU>,
+ Requires<[HasV8, HasNEON, HasFullFP16]>;
+ def UQh : N2VQIntnp<0b01, 0b11, op10_8, 1, NoItinerary, !strconcat("vcvt", op),
+ "u16.f16", v8i16, v8f16, IntU>,
+ Requires<[HasV8, HasNEON, HasFullFP16]>;
+ }
+}
+
+defm VCVTAN : VCVT_FPI<"a", 0b000, int_arm_neon_vcvtas, int_arm_neon_vcvtau>;
+defm VCVTNN : VCVT_FPI<"n", 0b001, int_arm_neon_vcvtns, int_arm_neon_vcvtnu>;
+defm VCVTPN : VCVT_FPI<"p", 0b010, int_arm_neon_vcvtps, int_arm_neon_vcvtpu>;
+defm VCVTMN : VCVT_FPI<"m", 0b011, int_arm_neon_vcvtms, int_arm_neon_vcvtmu>;
+
+// VCVT : Vector Convert Between Floating-Point and Fixed-Point.
+let DecoderMethod = "DecodeVCVTD" in {
+def VCVTf2xsd : N2VCvtD<0, 1, 0b1111, 0, 1, "vcvt", "s32.f32",
+ v2i32, v2f32, int_arm_neon_vcvtfp2fxs>;
+def VCVTf2xud : N2VCvtD<1, 1, 0b1111, 0, 1, "vcvt", "u32.f32",
+ v2i32, v2f32, int_arm_neon_vcvtfp2fxu>;
+def VCVTxs2fd : N2VCvtD<0, 1, 0b1110, 0, 1, "vcvt", "f32.s32",
+ v2f32, v2i32, int_arm_neon_vcvtfxs2fp>;
+def VCVTxu2fd : N2VCvtD<1, 1, 0b1110, 0, 1, "vcvt", "f32.u32",
+ v2f32, v2i32, int_arm_neon_vcvtfxu2fp>;
+let Predicates = [HasNEON, HasFullFP16] in {
+def VCVTh2xsd : N2VCvtD<0, 1, 0b1101, 0, 1, "vcvt", "s16.f16",
+ v4i16, v4f16, int_arm_neon_vcvtfp2fxs>;
+def VCVTh2xud : N2VCvtD<1, 1, 0b1101, 0, 1, "vcvt", "u16.f16",
+ v4i16, v4f16, int_arm_neon_vcvtfp2fxu>;
+def VCVTxs2hd : N2VCvtD<0, 1, 0b1100, 0, 1, "vcvt", "f16.s16",
+ v4f16, v4i16, int_arm_neon_vcvtfxs2fp>;
+def VCVTxu2hd : N2VCvtD<1, 1, 0b1100, 0, 1, "vcvt", "f16.u16",
+ v4f16, v4i16, int_arm_neon_vcvtfxu2fp>;
+} // Predicates = [HasNEON, HasFullFP16]
+}
+
+let DecoderMethod = "DecodeVCVTQ" in {
+def VCVTf2xsq : N2VCvtQ<0, 1, 0b1111, 0, 1, "vcvt", "s32.f32",
+ v4i32, v4f32, int_arm_neon_vcvtfp2fxs>;
+def VCVTf2xuq : N2VCvtQ<1, 1, 0b1111, 0, 1, "vcvt", "u32.f32",
+ v4i32, v4f32, int_arm_neon_vcvtfp2fxu>;
+def VCVTxs2fq : N2VCvtQ<0, 1, 0b1110, 0, 1, "vcvt", "f32.s32",
+ v4f32, v4i32, int_arm_neon_vcvtfxs2fp>;
+def VCVTxu2fq : N2VCvtQ<1, 1, 0b1110, 0, 1, "vcvt", "f32.u32",
+ v4f32, v4i32, int_arm_neon_vcvtfxu2fp>;
+let Predicates = [HasNEON, HasFullFP16] in {
+def VCVTh2xsq : N2VCvtQ<0, 1, 0b1101, 0, 1, "vcvt", "s16.f16",
+ v8i16, v8f16, int_arm_neon_vcvtfp2fxs>;
+def VCVTh2xuq : N2VCvtQ<1, 1, 0b1101, 0, 1, "vcvt", "u16.f16",
+ v8i16, v8f16, int_arm_neon_vcvtfp2fxu>;
+def VCVTxs2hq : N2VCvtQ<0, 1, 0b1100, 0, 1, "vcvt", "f16.s16",
+ v8f16, v8i16, int_arm_neon_vcvtfxs2fp>;
+def VCVTxu2hq : N2VCvtQ<1, 1, 0b1100, 0, 1, "vcvt", "f16.u16",
+ v8f16, v8i16, int_arm_neon_vcvtfxu2fp>;
+} // Predicates = [HasNEON, HasFullFP16]
+}
+
+def : NEONInstAlias<"vcvt${p}.s32.f32 $Dd, $Dm, #0",
+ (VCVTf2sd DPR:$Dd, DPR:$Dm, pred:$p)>;
+def : NEONInstAlias<"vcvt${p}.u32.f32 $Dd, $Dm, #0",
+ (VCVTf2ud DPR:$Dd, DPR:$Dm, pred:$p)>;
+def : NEONInstAlias<"vcvt${p}.f32.s32 $Dd, $Dm, #0",
+ (VCVTs2fd DPR:$Dd, DPR:$Dm, pred:$p)>;
+def : NEONInstAlias<"vcvt${p}.f32.u32 $Dd, $Dm, #0",
+ (VCVTu2fd DPR:$Dd, DPR:$Dm, pred:$p)>;
+
+def : NEONInstAlias<"vcvt${p}.s32.f32 $Qd, $Qm, #0",
+ (VCVTf2sq QPR:$Qd, QPR:$Qm, pred:$p)>;
+def : NEONInstAlias<"vcvt${p}.u32.f32 $Qd, $Qm, #0",
+ (VCVTf2uq QPR:$Qd, QPR:$Qm, pred:$p)>;
+def : NEONInstAlias<"vcvt${p}.f32.s32 $Qd, $Qm, #0",
+ (VCVTs2fq QPR:$Qd, QPR:$Qm, pred:$p)>;
+def : NEONInstAlias<"vcvt${p}.f32.u32 $Qd, $Qm, #0",
+ (VCVTu2fq QPR:$Qd, QPR:$Qm, pred:$p)>;
+
+def : NEONInstAlias<"vcvt${p}.s16.f16 $Dd, $Dm, #0",
+ (VCVTh2sd DPR:$Dd, DPR:$Dm, pred:$p)>;
+def : NEONInstAlias<"vcvt${p}.u16.f16 $Dd, $Dm, #0",
+ (VCVTh2ud DPR:$Dd, DPR:$Dm, pred:$p)>;
+def : NEONInstAlias<"vcvt${p}.f16.s16 $Dd, $Dm, #0",
+ (VCVTs2hd DPR:$Dd, DPR:$Dm, pred:$p)>;
+def : NEONInstAlias<"vcvt${p}.f16.u16 $Dd, $Dm, #0",
+ (VCVTu2hd DPR:$Dd, DPR:$Dm, pred:$p)>;
+
+def : NEONInstAlias<"vcvt${p}.s16.f16 $Qd, $Qm, #0",
+ (VCVTh2sq QPR:$Qd, QPR:$Qm, pred:$p)>;
+def : NEONInstAlias<"vcvt${p}.u16.f16 $Qd, $Qm, #0",
+ (VCVTh2uq QPR:$Qd, QPR:$Qm, pred:$p)>;
+def : NEONInstAlias<"vcvt${p}.f16.s16 $Qd, $Qm, #0",
+ (VCVTs2hq QPR:$Qd, QPR:$Qm, pred:$p)>;
+def : NEONInstAlias<"vcvt${p}.f16.u16 $Qd, $Qm, #0",
+ (VCVTu2hq QPR:$Qd, QPR:$Qm, pred:$p)>;
+
+
+// VCVT : Vector Convert Between Half-Precision and Single-Precision.
+def VCVTf2h : N2VNInt<0b11, 0b11, 0b01, 0b10, 0b01100, 0, 0,
+ IIC_VUNAQ, "vcvt", "f16.f32",
+ v4i16, v4f32, int_arm_neon_vcvtfp2hf>,
+ Requires<[HasNEON, HasFP16]>;
+def VCVTh2f : N2VLInt<0b11, 0b11, 0b01, 0b10, 0b01110, 0, 0,
+ IIC_VUNAQ, "vcvt", "f32.f16",
+ v4f32, v4i16, int_arm_neon_vcvthf2fp>,
+ Requires<[HasNEON, HasFP16]>;
+
+// Vector Reverse.
+
+// VREV64 : Vector Reverse elements within 64-bit doublewords
+
+class VREV64D<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
+ : N2V<0b11, 0b11, op19_18, 0b00, 0b00000, 0, 0, (outs DPR:$Vd),
+ (ins DPR:$Vm), IIC_VMOVD,
+ OpcodeStr, Dt, "$Vd, $Vm", "",
+ [(set DPR:$Vd, (Ty (NEONvrev64 (Ty DPR:$Vm))))]>;
+class VREV64Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
+ : N2V<0b11, 0b11, op19_18, 0b00, 0b00000, 1, 0, (outs QPR:$Vd),
+ (ins QPR:$Vm), IIC_VMOVQ,
+ OpcodeStr, Dt, "$Vd, $Vm", "",
+ [(set QPR:$Vd, (Ty (NEONvrev64 (Ty QPR:$Vm))))]>;
+
+def VREV64d8 : VREV64D<0b00, "vrev64", "8", v8i8>;
+def VREV64d16 : VREV64D<0b01, "vrev64", "16", v4i16>;
+def VREV64d32 : VREV64D<0b10, "vrev64", "32", v2i32>;
+def : Pat<(v2f32 (NEONvrev64 (v2f32 DPR:$Vm))), (VREV64d32 DPR:$Vm)>;
+
+def VREV64q8 : VREV64Q<0b00, "vrev64", "8", v16i8>;
+def VREV64q16 : VREV64Q<0b01, "vrev64", "16", v8i16>;
+def VREV64q32 : VREV64Q<0b10, "vrev64", "32", v4i32>;
+def : Pat<(v4f32 (NEONvrev64 (v4f32 QPR:$Vm))), (VREV64q32 QPR:$Vm)>;
+
+// VREV32 : Vector Reverse elements within 32-bit words
+
+class VREV32D<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
+ : N2V<0b11, 0b11, op19_18, 0b00, 0b00001, 0, 0, (outs DPR:$Vd),
+ (ins DPR:$Vm), IIC_VMOVD,
+ OpcodeStr, Dt, "$Vd, $Vm", "",
+ [(set DPR:$Vd, (Ty (NEONvrev32 (Ty DPR:$Vm))))]>;
+class VREV32Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
+ : N2V<0b11, 0b11, op19_18, 0b00, 0b00001, 1, 0, (outs QPR:$Vd),
+ (ins QPR:$Vm), IIC_VMOVQ,
+ OpcodeStr, Dt, "$Vd, $Vm", "",
+ [(set QPR:$Vd, (Ty (NEONvrev32 (Ty QPR:$Vm))))]>;
+
+def VREV32d8 : VREV32D<0b00, "vrev32", "8", v8i8>;
+def VREV32d16 : VREV32D<0b01, "vrev32", "16", v4i16>;
+
+def VREV32q8 : VREV32Q<0b00, "vrev32", "8", v16i8>;
+def VREV32q16 : VREV32Q<0b01, "vrev32", "16", v8i16>;
+
+// VREV16 : Vector Reverse elements within 16-bit halfwords
+
+class VREV16D<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
+ : N2V<0b11, 0b11, op19_18, 0b00, 0b00010, 0, 0, (outs DPR:$Vd),
+ (ins DPR:$Vm), IIC_VMOVD,
+ OpcodeStr, Dt, "$Vd, $Vm", "",
+ [(set DPR:$Vd, (Ty (NEONvrev16 (Ty DPR:$Vm))))]>;
+class VREV16Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
+ : N2V<0b11, 0b11, op19_18, 0b00, 0b00010, 1, 0, (outs QPR:$Vd),
+ (ins QPR:$Vm), IIC_VMOVQ,
+ OpcodeStr, Dt, "$Vd, $Vm", "",
+ [(set QPR:$Vd, (Ty (NEONvrev16 (Ty QPR:$Vm))))]>;
+
+def VREV16d8 : VREV16D<0b00, "vrev16", "8", v8i8>;
+def VREV16q8 : VREV16Q<0b00, "vrev16", "8", v16i8>;
+
+// Other Vector Shuffles.
+
+// Aligned extractions: really just dropping registers
+
+class AlignedVEXTq<ValueType DestTy, ValueType SrcTy, SDNodeXForm LaneCVT>
+ : Pat<(DestTy (vector_extract_subvec (SrcTy QPR:$src), (i32 imm:$start))),
+ (EXTRACT_SUBREG (SrcTy QPR:$src), (LaneCVT imm:$start))>;
+
+def : AlignedVEXTq<v8i8, v16i8, DSubReg_i8_reg>;
+
+def : AlignedVEXTq<v4i16, v8i16, DSubReg_i16_reg>;
+
+def : AlignedVEXTq<v2i32, v4i32, DSubReg_i32_reg>;
+
+def : AlignedVEXTq<v1i64, v2i64, DSubReg_f64_reg>;
+
+def : AlignedVEXTq<v2f32, v4f32, DSubReg_i32_reg>;
+
+
+// VEXT : Vector Extract
+
+
+// All of these have a two-operand InstAlias.
+let TwoOperandAliasConstraint = "$Vn = $Vd" in {
+class VEXTd<string OpcodeStr, string Dt, ValueType Ty, Operand immTy>
+ : N3V<0,1,0b11,{?,?,?,?},0,0, (outs DPR:$Vd),
+ (ins DPR:$Vn, DPR:$Vm, immTy:$index), NVExtFrm,
+ IIC_VEXTD, OpcodeStr, Dt, "$Vd, $Vn, $Vm, $index", "",
+ [(set DPR:$Vd, (Ty (NEONvext (Ty DPR:$Vn),
+ (Ty DPR:$Vm), imm:$index)))]> {
+ bits<3> index;
+ let Inst{11} = 0b0;
+ let Inst{10-8} = index{2-0};
+}
+
+class VEXTq<string OpcodeStr, string Dt, ValueType Ty, Operand immTy>
+ : N3V<0,1,0b11,{?,?,?,?},1,0, (outs QPR:$Vd),
+ (ins QPR:$Vn, QPR:$Vm, imm0_15:$index), NVExtFrm,
+ IIC_VEXTQ, OpcodeStr, Dt, "$Vd, $Vn, $Vm, $index", "",
+ [(set QPR:$Vd, (Ty (NEONvext (Ty QPR:$Vn),
+ (Ty QPR:$Vm), imm:$index)))]> {
+ bits<4> index;
+ let Inst{11-8} = index{3-0};
+}
+}
+
+def VEXTd8 : VEXTd<"vext", "8", v8i8, imm0_7> {
+ let Inst{10-8} = index{2-0};
+}
+def VEXTd16 : VEXTd<"vext", "16", v4i16, imm0_3> {
+ let Inst{10-9} = index{1-0};
+ let Inst{8} = 0b0;
+}
+def VEXTd32 : VEXTd<"vext", "32", v2i32, imm0_1> {
+ let Inst{10} = index{0};
+ let Inst{9-8} = 0b00;
+}
+def : Pat<(v2f32 (NEONvext (v2f32 DPR:$Vn),
+ (v2f32 DPR:$Vm),
+ (i32 imm:$index))),
+ (VEXTd32 DPR:$Vn, DPR:$Vm, imm:$index)>;
+
+def VEXTq8 : VEXTq<"vext", "8", v16i8, imm0_15> {
+ let Inst{11-8} = index{3-0};
+}
+def VEXTq16 : VEXTq<"vext", "16", v8i16, imm0_7> {
+ let Inst{11-9} = index{2-0};
+ let Inst{8} = 0b0;
+}
+def VEXTq32 : VEXTq<"vext", "32", v4i32, imm0_3> {
+ let Inst{11-10} = index{1-0};
+ let Inst{9-8} = 0b00;
+}
+def VEXTq64 : VEXTq<"vext", "64", v2i64, imm0_1> {
+ let Inst{11} = index{0};
+ let Inst{10-8} = 0b000;
+}
+def : Pat<(v4f32 (NEONvext (v4f32 QPR:$Vn),
+ (v4f32 QPR:$Vm),
+ (i32 imm:$index))),
+ (VEXTq32 QPR:$Vn, QPR:$Vm, imm:$index)>;
+
+// VTRN : Vector Transpose
+
+def VTRNd8 : N2VDShuffle<0b00, 0b00001, "vtrn", "8">;
+def VTRNd16 : N2VDShuffle<0b01, 0b00001, "vtrn", "16">;
+def VTRNd32 : N2VDShuffle<0b10, 0b00001, "vtrn", "32">;
+
+def VTRNq8 : N2VQShuffle<0b00, 0b00001, IIC_VPERMQ, "vtrn", "8">;
+def VTRNq16 : N2VQShuffle<0b01, 0b00001, IIC_VPERMQ, "vtrn", "16">;
+def VTRNq32 : N2VQShuffle<0b10, 0b00001, IIC_VPERMQ, "vtrn", "32">;
+
+// VUZP : Vector Unzip (Deinterleave)
+
+def VUZPd8 : N2VDShuffle<0b00, 0b00010, "vuzp", "8">;
+def VUZPd16 : N2VDShuffle<0b01, 0b00010, "vuzp", "16">;
+// vuzp.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
+def : NEONInstAlias<"vuzp${p}.32 $Dd, $Dm",
+ (VTRNd32 DPR:$Dd, DPR:$Dm, pred:$p)>;
+
+def VUZPq8 : N2VQShuffle<0b00, 0b00010, IIC_VPERMQ3, "vuzp", "8">;
+def VUZPq16 : N2VQShuffle<0b01, 0b00010, IIC_VPERMQ3, "vuzp", "16">;
+def VUZPq32 : N2VQShuffle<0b10, 0b00010, IIC_VPERMQ3, "vuzp", "32">;
+
+// VZIP : Vector Zip (Interleave)
+
+def VZIPd8 : N2VDShuffle<0b00, 0b00011, "vzip", "8">;
+def VZIPd16 : N2VDShuffle<0b01, 0b00011, "vzip", "16">;
+// vzip.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
+def : NEONInstAlias<"vzip${p}.32 $Dd, $Dm",
+ (VTRNd32 DPR:$Dd, DPR:$Dm, pred:$p)>;
+
+def VZIPq8 : N2VQShuffle<0b00, 0b00011, IIC_VPERMQ3, "vzip", "8">;
+def VZIPq16 : N2VQShuffle<0b01, 0b00011, IIC_VPERMQ3, "vzip", "16">;
+def VZIPq32 : N2VQShuffle<0b10, 0b00011, IIC_VPERMQ3, "vzip", "32">;
+
+// Vector Table Lookup and Table Extension.
+
+// VTBL : Vector Table Lookup
+let DecoderMethod = "DecodeTBLInstruction" in {
+def VTBL1
+ : N3V<1,1,0b11,0b1000,0,0, (outs DPR:$Vd),
+ (ins VecListOneD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB1,
+ "vtbl", "8", "$Vd, $Vn, $Vm", "",
+ [(set DPR:$Vd, (v8i8 (int_arm_neon_vtbl1 VecListOneD:$Vn, DPR:$Vm)))]>;
+let hasExtraSrcRegAllocReq = 1 in {
+def VTBL2
+ : N3V<1,1,0b11,0b1001,0,0, (outs DPR:$Vd),
+ (ins VecListDPair:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB2,
+ "vtbl", "8", "$Vd, $Vn, $Vm", "", []>;
+def VTBL3
+ : N3V<1,1,0b11,0b1010,0,0, (outs DPR:$Vd),
+ (ins VecListThreeD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB3,
+ "vtbl", "8", "$Vd, $Vn, $Vm", "", []>;
+def VTBL4
+ : N3V<1,1,0b11,0b1011,0,0, (outs DPR:$Vd),
+ (ins VecListFourD:$Vn, DPR:$Vm),
+ NVTBLFrm, IIC_VTB4,
+ "vtbl", "8", "$Vd, $Vn, $Vm", "", []>;
+} // hasExtraSrcRegAllocReq = 1
+
+def VTBL3Pseudo
+ : PseudoNeonI<(outs DPR:$dst), (ins QQPR:$tbl, DPR:$src), IIC_VTB3, "", []>;
+def VTBL4Pseudo
+ : PseudoNeonI<(outs DPR:$dst), (ins QQPR:$tbl, DPR:$src), IIC_VTB4, "", []>;
+
+// VTBX : Vector Table Extension
+def VTBX1
+ : N3V<1,1,0b11,0b1000,1,0, (outs DPR:$Vd),
+ (ins DPR:$orig, VecListOneD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTBX1,
+ "vtbx", "8", "$Vd, $Vn, $Vm", "$orig = $Vd",
+ [(set DPR:$Vd, (v8i8 (int_arm_neon_vtbx1
+ DPR:$orig, VecListOneD:$Vn, DPR:$Vm)))]>;
+let hasExtraSrcRegAllocReq = 1 in {
+def VTBX2
+ : N3V<1,1,0b11,0b1001,1,0, (outs DPR:$Vd),
+ (ins DPR:$orig, VecListDPair:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTBX2,
+ "vtbx", "8", "$Vd, $Vn, $Vm", "$orig = $Vd", []>;
+def VTBX3
+ : N3V<1,1,0b11,0b1010,1,0, (outs DPR:$Vd),
+ (ins DPR:$orig, VecListThreeD:$Vn, DPR:$Vm),
+ NVTBLFrm, IIC_VTBX3,
+ "vtbx", "8", "$Vd, $Vn, $Vm",
+ "$orig = $Vd", []>;
+def VTBX4
+ : N3V<1,1,0b11,0b1011,1,0, (outs DPR:$Vd),
+ (ins DPR:$orig, VecListFourD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTBX4,
+ "vtbx", "8", "$Vd, $Vn, $Vm",
+ "$orig = $Vd", []>;
+} // hasExtraSrcRegAllocReq = 1
+
+def VTBX3Pseudo
+ : PseudoNeonI<(outs DPR:$dst), (ins DPR:$orig, QQPR:$tbl, DPR:$src),
+ IIC_VTBX3, "$orig = $dst", []>;
+def VTBX4Pseudo
+ : PseudoNeonI<(outs DPR:$dst), (ins DPR:$orig, QQPR:$tbl, DPR:$src),
+ IIC_VTBX4, "$orig = $dst", []>;
+} // DecoderMethod = "DecodeTBLInstruction"
+
+// VRINT : Vector Rounding
+multiclass VRINT_FPI<string op, bits<3> op9_7, SDPatternOperator Int> {
+ let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in {
+ def Df : N2VDIntnp<0b10, 0b10, 0b100, 0, NoItinerary,
+ !strconcat("vrint", op), "f32",
+ v2f32, v2f32, Int>, Requires<[HasV8, HasNEON]> {
+ let Inst{9-7} = op9_7;
+ }
+ def Qf : N2VQIntnp<0b10, 0b10, 0b100, 0, NoItinerary,
+ !strconcat("vrint", op), "f32",
+ v4f32, v4f32, Int>, Requires<[HasV8, HasNEON]> {
+ let Inst{9-7} = op9_7;
+ }
+ def Dh : N2VDIntnp<0b01, 0b10, 0b100, 0, NoItinerary,
+ !strconcat("vrint", op), "f16",
+ v4f16, v4f16, Int>,
+ Requires<[HasV8, HasNEON, HasFullFP16]> {
+ let Inst{9-7} = op9_7;
+ }
+ def Qh : N2VQIntnp<0b01, 0b10, 0b100, 0, NoItinerary,
+ !strconcat("vrint", op), "f16",
+ v8f16, v8f16, Int>,
+ Requires<[HasV8, HasNEON, HasFullFP16]> {
+ let Inst{9-7} = op9_7;
+ }
+ }
+
+ def : NEONInstAlias<!strconcat("vrint", op, ".f32.f32\t$Dd, $Dm"),
+ (!cast<Instruction>(NAME#"Df") DPR:$Dd, DPR:$Dm)>;
+ def : NEONInstAlias<!strconcat("vrint", op, ".f32.f32\t$Qd, $Qm"),
+ (!cast<Instruction>(NAME#"Qf") QPR:$Qd, QPR:$Qm)>;
+ let Predicates = [HasNEON, HasFullFP16] in {
+ def : NEONInstAlias<!strconcat("vrint", op, ".f16.f16\t$Dd, $Dm"),
+ (!cast<Instruction>(NAME#"Dh") DPR:$Dd, DPR:$Dm)>;
+ def : NEONInstAlias<!strconcat("vrint", op, ".f16.f16\t$Qd, $Qm"),
+ (!cast<Instruction>(NAME#"Qh") QPR:$Qd, QPR:$Qm)>;
+ }
+}
+
+defm VRINTNN : VRINT_FPI<"n", 0b000, int_arm_neon_vrintn>;
+defm VRINTXN : VRINT_FPI<"x", 0b001, int_arm_neon_vrintx>;
+defm VRINTAN : VRINT_FPI<"a", 0b010, int_arm_neon_vrinta>;
+defm VRINTZN : VRINT_FPI<"z", 0b011, int_arm_neon_vrintz>;
+defm VRINTMN : VRINT_FPI<"m", 0b101, int_arm_neon_vrintm>;
+defm VRINTPN : VRINT_FPI<"p", 0b111, int_arm_neon_vrintp>;
+
+// Cryptography instructions
+let PostEncoderMethod = "NEONThumb2DataIPostEncoder",
+ DecoderNamespace = "v8Crypto", hasSideEffects = 0 in {
+ class AES<string op, bit op7, bit op6, SDPatternOperator Int>
+ : N2VQIntXnp<0b00, 0b00, 0b011, op6, op7, NoItinerary,
+ !strconcat("aes", op), "8", v16i8, v16i8, Int>,
+ Requires<[HasV8, HasCrypto]>;
+ class AES2Op<string op, bit op7, bit op6, SDPatternOperator Int>
+ : N2VQIntX2np<0b00, 0b00, 0b011, op6, op7, NoItinerary,
+ !strconcat("aes", op), "8", v16i8, v16i8, Int>,
+ Requires<[HasV8, HasCrypto]>;
+ class N2SHA<string op, bits<2> op17_16, bits<3> op10_8, bit op7, bit op6,
+ SDPatternOperator Int>
+ : N2VQIntXnp<0b10, op17_16, op10_8, op6, op7, NoItinerary,
+ !strconcat("sha", op), "32", v4i32, v4i32, Int>,
+ Requires<[HasV8, HasCrypto]>;
+ class N2SHA2Op<string op, bits<2> op17_16, bits<3> op10_8, bit op7, bit op6,
+ SDPatternOperator Int>
+ : N2VQIntX2np<0b10, op17_16, op10_8, op6, op7, NoItinerary,
+ !strconcat("sha", op), "32", v4i32, v4i32, Int>,
+ Requires<[HasV8, HasCrypto]>;
+ class N3SHA3Op<string op, bits<5> op27_23, bits<2> op21_20, SDPatternOperator Int>
+ : N3VQInt3np<op27_23, op21_20, 0b1100, 1, 0, N3RegFrm, NoItinerary,
+ !strconcat("sha", op), "32", v4i32, v4i32, Int, 0>,
+ Requires<[HasV8, HasCrypto]>;
+}
+
+def AESD : AES2Op<"d", 0, 1, int_arm_neon_aesd>;
+def AESE : AES2Op<"e", 0, 0, int_arm_neon_aese>;
+def AESIMC : AES<"imc", 1, 1, int_arm_neon_aesimc>;
+def AESMC : AES<"mc", 1, 0, int_arm_neon_aesmc>;
+
+def SHA1H : N2SHA<"1h", 0b01, 0b010, 1, 1, null_frag>;
+def SHA1SU1 : N2SHA2Op<"1su1", 0b10, 0b011, 1, 0, int_arm_neon_sha1su1>;
+def SHA256SU0 : N2SHA2Op<"256su0", 0b10, 0b011, 1, 1, int_arm_neon_sha256su0>;
+def SHA1C : N3SHA3Op<"1c", 0b00100, 0b00, null_frag>;
+def SHA1M : N3SHA3Op<"1m", 0b00100, 0b10, null_frag>;
+def SHA1P : N3SHA3Op<"1p", 0b00100, 0b01, null_frag>;
+def SHA1SU0 : N3SHA3Op<"1su0", 0b00100, 0b11, int_arm_neon_sha1su0>;
+def SHA256H : N3SHA3Op<"256h", 0b00110, 0b00, int_arm_neon_sha256h>;
+def SHA256H2 : N3SHA3Op<"256h2", 0b00110, 0b01, int_arm_neon_sha256h2>;
+def SHA256SU1 : N3SHA3Op<"256su1", 0b00110, 0b10, int_arm_neon_sha256su1>;
+
+def : Pat<(i32 (int_arm_neon_sha1h i32:$Rn)),
+ (COPY_TO_REGCLASS (f32 (EXTRACT_SUBREG
+ (SHA1H (SUBREG_TO_REG (i64 0),
+ (f32 (COPY_TO_REGCLASS i32:$Rn, SPR)),
+ ssub_0)),
+ ssub_0)), GPR)>;
+
+def : Pat<(v4i32 (int_arm_neon_sha1c v4i32:$hash_abcd, i32:$hash_e, v4i32:$wk)),
+ (SHA1C v4i32:$hash_abcd,
+ (SUBREG_TO_REG (i64 0),
+ (f32 (COPY_TO_REGCLASS i32:$hash_e, SPR)),
+ ssub_0),
+ v4i32:$wk)>;
+
+def : Pat<(v4i32 (int_arm_neon_sha1m v4i32:$hash_abcd, i32:$hash_e, v4i32:$wk)),
+ (SHA1M v4i32:$hash_abcd,
+ (SUBREG_TO_REG (i64 0),
+ (f32 (COPY_TO_REGCLASS i32:$hash_e, SPR)),
+ ssub_0),
+ v4i32:$wk)>;
+
+def : Pat<(v4i32 (int_arm_neon_sha1p v4i32:$hash_abcd, i32:$hash_e, v4i32:$wk)),
+ (SHA1P v4i32:$hash_abcd,
+ (SUBREG_TO_REG (i64 0),
+ (f32 (COPY_TO_REGCLASS i32:$hash_e, SPR)),
+ ssub_0),
+ v4i32:$wk)>;
+
+//===----------------------------------------------------------------------===//
+// NEON instructions for single-precision FP math
+//===----------------------------------------------------------------------===//
+
+class N2VSPat<SDNode OpNode, NeonI Inst>
+ : NEONFPPat<(f32 (OpNode SPR:$a)),
+ (EXTRACT_SUBREG
+ (v2f32 (COPY_TO_REGCLASS (Inst
+ (INSERT_SUBREG
+ (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)),
+ SPR:$a, ssub_0)), DPR_VFP2)), ssub_0)>;
+
+class N3VSPat<SDNode OpNode, NeonI Inst>
+ : NEONFPPat<(f32 (OpNode SPR:$a, SPR:$b)),
+ (EXTRACT_SUBREG
+ (v2f32 (COPY_TO_REGCLASS (Inst
+ (INSERT_SUBREG
+ (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)),
+ SPR:$a, ssub_0),
+ (INSERT_SUBREG
+ (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)),
+ SPR:$b, ssub_0)), DPR_VFP2)), ssub_0)>;
+
+class N3VSMulOpPat<SDNode MulNode, SDNode OpNode, NeonI Inst>
+ : NEONFPPat<(f32 (OpNode SPR:$acc, (f32 (MulNode SPR:$a, SPR:$b)))),
+ (EXTRACT_SUBREG
+ (v2f32 (COPY_TO_REGCLASS (Inst
+ (INSERT_SUBREG
+ (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)),
+ SPR:$acc, ssub_0),
+ (INSERT_SUBREG
+ (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)),
+ SPR:$a, ssub_0),
+ (INSERT_SUBREG
+ (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)),
+ SPR:$b, ssub_0)), DPR_VFP2)), ssub_0)>;
+
+class NVCVTIFPat<SDNode OpNode, NeonI Inst>
+ : NEONFPPat<(f32 (OpNode GPR:$a)),
+ (f32 (EXTRACT_SUBREG
+ (v2f32 (Inst
+ (INSERT_SUBREG
+ (v2f32 (IMPLICIT_DEF)),
+ (i32 (COPY_TO_REGCLASS GPR:$a, SPR)), ssub_0))),
+ ssub_0))>;
+class NVCVTFIPat<SDNode OpNode, NeonI Inst>
+ : NEONFPPat<(i32 (OpNode SPR:$a)),
+ (i32 (EXTRACT_SUBREG
+ (v2f32 (Inst (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)),
+ SPR:$a, ssub_0))),
+ ssub_0))>;
+
+def : N3VSPat<fadd, VADDfd>;
+def : N3VSPat<fsub, VSUBfd>;
+def : N3VSPat<fmul, VMULfd>;
+def : N3VSMulOpPat<fmul, fadd, VMLAfd>,
+ Requires<[HasNEON, UseNEONForFP, UseFPVMLx, DontUseFusedMAC]>;
+def : N3VSMulOpPat<fmul, fsub, VMLSfd>,
+ Requires<[HasNEON, UseNEONForFP, UseFPVMLx, DontUseFusedMAC]>;
+def : N3VSMulOpPat<fmul, fadd, VFMAfd>,
+ Requires<[HasVFP4, UseNEONForFP, UseFusedMAC]>;
+def : N3VSMulOpPat<fmul, fsub, VFMSfd>,
+ Requires<[HasVFP4, UseNEONForFP, UseFusedMAC]>;
+def : N2VSPat<fabs, VABSfd>;
+def : N2VSPat<fneg, VNEGfd>;
+def : N3VSPat<fmaxnan, VMAXfd>, Requires<[HasNEON]>;
+def : N3VSPat<fminnan, VMINfd>, Requires<[HasNEON]>;
+def : NVCVTFIPat<fp_to_sint, VCVTf2sd>;
+def : NVCVTFIPat<fp_to_uint, VCVTf2ud>;
+def : NVCVTIFPat<sint_to_fp, VCVTs2fd>;
+def : NVCVTIFPat<uint_to_fp, VCVTu2fd>;
+
+// NEON doesn't have any f64 conversions, so provide patterns to make
+// sure the VFP conversions match when extracting from a vector.
+def : VFPPat<(f64 (sint_to_fp (extractelt (v2i32 DPR:$src), imm:$lane))),
+ (VSITOD (EXTRACT_SUBREG DPR:$src, (SSubReg_f32_reg imm:$lane)))>;
+def : VFPPat<(f64 (sint_to_fp (extractelt (v4i32 QPR:$src), imm:$lane))),
+ (VSITOD (EXTRACT_SUBREG QPR:$src, (SSubReg_f32_reg imm:$lane)))>;
+def : VFPPat<(f64 (uint_to_fp (extractelt (v2i32 DPR:$src), imm:$lane))),
+ (VUITOD (EXTRACT_SUBREG DPR:$src, (SSubReg_f32_reg imm:$lane)))>;
+def : VFPPat<(f64 (uint_to_fp (extractelt (v4i32 QPR:$src), imm:$lane))),
+ (VUITOD (EXTRACT_SUBREG QPR:$src, (SSubReg_f32_reg imm:$lane)))>;
+
+
+// Prefer VMOVDRR for i32 -> f32 bitcasts, it can write all DPR registers.
+def : Pat<(f32 (bitconvert GPR:$a)),
+ (EXTRACT_SUBREG (VMOVDRR GPR:$a, GPR:$a), ssub_0)>,
+ Requires<[HasNEON, DontUseVMOVSR]>;
+
+//===----------------------------------------------------------------------===//
+// Non-Instruction Patterns
+//===----------------------------------------------------------------------===//
+
+// bit_convert
+let Predicates = [IsLE] in {
+ def : Pat<(v1i64 (bitconvert (v2i32 DPR:$src))), (v1i64 DPR:$src)>;
+ def : Pat<(v1i64 (bitconvert (v4i16 DPR:$src))), (v1i64 DPR:$src)>;
+ def : Pat<(v1i64 (bitconvert (v8i8 DPR:$src))), (v1i64 DPR:$src)>;
+}
+def : Pat<(v1i64 (bitconvert (f64 DPR:$src))), (v1i64 DPR:$src)>;
+let Predicates = [IsLE] in {
+ def : Pat<(v1i64 (bitconvert (v2f32 DPR:$src))), (v1i64 DPR:$src)>;
+ def : Pat<(v2i32 (bitconvert (v1i64 DPR:$src))), (v2i32 DPR:$src)>;
+ def : Pat<(v2i32 (bitconvert (v4i16 DPR:$src))), (v2i32 DPR:$src)>;
+ def : Pat<(v2i32 (bitconvert (v8i8 DPR:$src))), (v2i32 DPR:$src)>;
+ def : Pat<(v2i32 (bitconvert (f64 DPR:$src))), (v2i32 DPR:$src)>;
+}
+def : Pat<(v2i32 (bitconvert (v2f32 DPR:$src))), (v2i32 DPR:$src)>;
+let Predicates = [IsLE] in {
+ def : Pat<(v4i16 (bitconvert (v1i64 DPR:$src))), (v4i16 DPR:$src)>;
+ def : Pat<(v4i16 (bitconvert (v2i32 DPR:$src))), (v4i16 DPR:$src)>;
+ def : Pat<(v4i16 (bitconvert (v8i8 DPR:$src))), (v4i16 DPR:$src)>;
+ def : Pat<(v4i16 (bitconvert (f64 DPR:$src))), (v4i16 DPR:$src)>;
+ def : Pat<(v4i16 (bitconvert (v2f32 DPR:$src))), (v4i16 DPR:$src)>;
+ def : Pat<(v8i8 (bitconvert (v1i64 DPR:$src))), (v8i8 DPR:$src)>;
+ def : Pat<(v8i8 (bitconvert (v2i32 DPR:$src))), (v8i8 DPR:$src)>;
+ def : Pat<(v8i8 (bitconvert (v4i16 DPR:$src))), (v8i8 DPR:$src)>;
+ def : Pat<(v8i8 (bitconvert (f64 DPR:$src))), (v8i8 DPR:$src)>;
+ def : Pat<(v8i8 (bitconvert (v2f32 DPR:$src))), (v8i8 DPR:$src)>;
+}
+def : Pat<(f64 (bitconvert (v1i64 DPR:$src))), (f64 DPR:$src)>;
+let Predicates = [IsLE] in {
+ def : Pat<(f64 (bitconvert (v2i32 DPR:$src))), (f64 DPR:$src)>;
+ def : Pat<(f64 (bitconvert (v4i16 DPR:$src))), (f64 DPR:$src)>;
+ def : Pat<(f64 (bitconvert (v8i8 DPR:$src))), (f64 DPR:$src)>;
+ def : Pat<(f64 (bitconvert (v2f32 DPR:$src))), (f64 DPR:$src)>;
+ def : Pat<(v2f32 (bitconvert (f64 DPR:$src))), (v2f32 DPR:$src)>;
+ def : Pat<(v2f32 (bitconvert (v1i64 DPR:$src))), (v2f32 DPR:$src)>;
+}
+def : Pat<(v2f32 (bitconvert (v2i32 DPR:$src))), (v2f32 DPR:$src)>;
+let Predicates = [IsLE] in {
+ def : Pat<(v2f32 (bitconvert (v4i16 DPR:$src))), (v2f32 DPR:$src)>;
+ def : Pat<(v2f32 (bitconvert (v8i8 DPR:$src))), (v2f32 DPR:$src)>;
+}
+
+let Predicates = [IsLE] in {
+ def : Pat<(v2i64 (bitconvert (v4i32 QPR:$src))), (v2i64 QPR:$src)>;
+ def : Pat<(v2i64 (bitconvert (v8i16 QPR:$src))), (v2i64 QPR:$src)>;
+ def : Pat<(v2i64 (bitconvert (v16i8 QPR:$src))), (v2i64 QPR:$src)>;
+}
+def : Pat<(v2i64 (bitconvert (v2f64 QPR:$src))), (v2i64 QPR:$src)>;
+let Predicates = [IsLE] in {
+ def : Pat<(v2i64 (bitconvert (v4f32 QPR:$src))), (v2i64 QPR:$src)>;
+ def : Pat<(v4i32 (bitconvert (v2i64 QPR:$src))), (v4i32 QPR:$src)>;
+ def : Pat<(v4i32 (bitconvert (v8i16 QPR:$src))), (v4i32 QPR:$src)>;
+ def : Pat<(v4i32 (bitconvert (v16i8 QPR:$src))), (v4i32 QPR:$src)>;
+ def : Pat<(v4i32 (bitconvert (v2f64 QPR:$src))), (v4i32 QPR:$src)>;
+}
+def : Pat<(v4i32 (bitconvert (v4f32 QPR:$src))), (v4i32 QPR:$src)>;
+let Predicates = [IsLE] in {
+ def : Pat<(v8i16 (bitconvert (v2i64 QPR:$src))), (v8i16 QPR:$src)>;
+ def : Pat<(v8i16 (bitconvert (v4i32 QPR:$src))), (v8i16 QPR:$src)>;
+ def : Pat<(v8i16 (bitconvert (v16i8 QPR:$src))), (v8i16 QPR:$src)>;
+ def : Pat<(v8i16 (bitconvert (v2f64 QPR:$src))), (v8i16 QPR:$src)>;
+ def : Pat<(v8i16 (bitconvert (v4f32 QPR:$src))), (v8i16 QPR:$src)>;
+ def : Pat<(v16i8 (bitconvert (v2i64 QPR:$src))), (v16i8 QPR:$src)>;
+ def : Pat<(v16i8 (bitconvert (v4i32 QPR:$src))), (v16i8 QPR:$src)>;
+ def : Pat<(v16i8 (bitconvert (v8i16 QPR:$src))), (v16i8 QPR:$src)>;
+ def : Pat<(v16i8 (bitconvert (v2f64 QPR:$src))), (v16i8 QPR:$src)>;
+ def : Pat<(v16i8 (bitconvert (v4f32 QPR:$src))), (v16i8 QPR:$src)>;
+ def : Pat<(v4f32 (bitconvert (v2i64 QPR:$src))), (v4f32 QPR:$src)>;
+}
+def : Pat<(v4f32 (bitconvert (v4i32 QPR:$src))), (v4f32 QPR:$src)>;
+let Predicates = [IsLE] in {
+ def : Pat<(v4f32 (bitconvert (v8i16 QPR:$src))), (v4f32 QPR:$src)>;
+ def : Pat<(v4f32 (bitconvert (v16i8 QPR:$src))), (v4f32 QPR:$src)>;
+ def : Pat<(v4f32 (bitconvert (v2f64 QPR:$src))), (v4f32 QPR:$src)>;
+}
+def : Pat<(v2f64 (bitconvert (v2i64 QPR:$src))), (v2f64 QPR:$src)>;
+let Predicates = [IsLE] in {
+ def : Pat<(v2f64 (bitconvert (v4i32 QPR:$src))), (v2f64 QPR:$src)>;
+ def : Pat<(v2f64 (bitconvert (v8i16 QPR:$src))), (v2f64 QPR:$src)>;
+ def : Pat<(v2f64 (bitconvert (v16i8 QPR:$src))), (v2f64 QPR:$src)>;
+ def : Pat<(v2f64 (bitconvert (v4f32 QPR:$src))), (v2f64 QPR:$src)>;
+}
+
+let Predicates = [IsBE] in {
+ // 64 bit conversions
+ def : Pat<(v1i64 (bitconvert (v2i32 DPR:$src))), (VREV64d32 DPR:$src)>;
+ def : Pat<(v1i64 (bitconvert (v4i16 DPR:$src))), (VREV64d16 DPR:$src)>;
+ def : Pat<(v1i64 (bitconvert (v8i8 DPR:$src))), (VREV64d8 DPR:$src)>;
+ def : Pat<(v1i64 (bitconvert (v2f32 DPR:$src))), (VREV64d32 DPR:$src)>;
+ def : Pat<(v2i32 (bitconvert (v1i64 DPR:$src))), (VREV64d32 DPR:$src)>;
+ def : Pat<(v2i32 (bitconvert (v4i16 DPR:$src))), (VREV32d16 DPR:$src)>;
+ def : Pat<(v2i32 (bitconvert (v8i8 DPR:$src))), (VREV32d8 DPR:$src)>;
+ def : Pat<(v2i32 (bitconvert (f64 DPR:$src))), (VREV64d32 DPR:$src)>;
+ def : Pat<(v4i16 (bitconvert (v1i64 DPR:$src))), (VREV64d16 DPR:$src)>;
+ def : Pat<(v4i16 (bitconvert (v2i32 DPR:$src))), (VREV32d16 DPR:$src)>;
+ def : Pat<(v4i16 (bitconvert (v8i8 DPR:$src))), (VREV16d8 DPR:$src)>;
+ def : Pat<(v4i16 (bitconvert (f64 DPR:$src))), (VREV64d16 DPR:$src)>;
+ def : Pat<(v4i16 (bitconvert (v2f32 DPR:$src))), (VREV32d16 DPR:$src)>;
+ def : Pat<(v8i8 (bitconvert (v1i64 DPR:$src))), (VREV64d8 DPR:$src)>;
+ def : Pat<(v8i8 (bitconvert (v2i32 DPR:$src))), (VREV32d8 DPR:$src)>;
+ def : Pat<(v8i8 (bitconvert (v4i16 DPR:$src))), (VREV16d8 DPR:$src)>;
+ def : Pat<(v8i8 (bitconvert (f64 DPR:$src))), (VREV64d8 DPR:$src)>;
+ def : Pat<(v8i8 (bitconvert (v2f32 DPR:$src))), (VREV32d8 DPR:$src)>;
+ def : Pat<(f64 (bitconvert (v2i32 DPR:$src))), (VREV64d32 DPR:$src)>;
+ def : Pat<(f64 (bitconvert (v4i16 DPR:$src))), (VREV64d16 DPR:$src)>;
+ def : Pat<(f64 (bitconvert (v8i8 DPR:$src))), (VREV64d8 DPR:$src)>;
+ def : Pat<(f64 (bitconvert (v2f32 DPR:$src))), (VREV64d32 DPR:$src)>;
+ def : Pat<(v2f32 (bitconvert (f64 DPR:$src))), (VREV64d32 DPR:$src)>;
+ def : Pat<(v2f32 (bitconvert (v1i64 DPR:$src))), (VREV64d32 DPR:$src)>;
+ def : Pat<(v2f32 (bitconvert (v4i16 DPR:$src))), (VREV32d16 DPR:$src)>;
+ def : Pat<(v2f32 (bitconvert (v8i8 DPR:$src))), (VREV32d8 DPR:$src)>;
+
+ // 128 bit conversions
+ def : Pat<(v2i64 (bitconvert (v4i32 QPR:$src))), (VREV64q32 QPR:$src)>;
+ def : Pat<(v2i64 (bitconvert (v8i16 QPR:$src))), (VREV64q16 QPR:$src)>;
+ def : Pat<(v2i64 (bitconvert (v16i8 QPR:$src))), (VREV64q8 QPR:$src)>;
+ def : Pat<(v2i64 (bitconvert (v4f32 QPR:$src))), (VREV64q32 QPR:$src)>;
+ def : Pat<(v4i32 (bitconvert (v2i64 QPR:$src))), (VREV64q32 QPR:$src)>;
+ def : Pat<(v4i32 (bitconvert (v8i16 QPR:$src))), (VREV32q16 QPR:$src)>;
+ def : Pat<(v4i32 (bitconvert (v16i8 QPR:$src))), (VREV32q8 QPR:$src)>;
+ def : Pat<(v4i32 (bitconvert (v2f64 QPR:$src))), (VREV64q32 QPR:$src)>;
+ def : Pat<(v8i16 (bitconvert (v2i64 QPR:$src))), (VREV64q16 QPR:$src)>;
+ def : Pat<(v8i16 (bitconvert (v4i32 QPR:$src))), (VREV32q16 QPR:$src)>;
+ def : Pat<(v8i16 (bitconvert (v16i8 QPR:$src))), (VREV16q8 QPR:$src)>;
+ def : Pat<(v8i16 (bitconvert (v2f64 QPR:$src))), (VREV64q16 QPR:$src)>;
+ def : Pat<(v8i16 (bitconvert (v4f32 QPR:$src))), (VREV32q16 QPR:$src)>;
+ def : Pat<(v16i8 (bitconvert (v2i64 QPR:$src))), (VREV64q8 QPR:$src)>;
+ def : Pat<(v16i8 (bitconvert (v4i32 QPR:$src))), (VREV32q8 QPR:$src)>;
+ def : Pat<(v16i8 (bitconvert (v8i16 QPR:$src))), (VREV16q8 QPR:$src)>;
+ def : Pat<(v16i8 (bitconvert (v2f64 QPR:$src))), (VREV64q8 QPR:$src)>;
+ def : Pat<(v16i8 (bitconvert (v4f32 QPR:$src))), (VREV32q8 QPR:$src)>;
+ def : Pat<(v4f32 (bitconvert (v2i64 QPR:$src))), (VREV64q32 QPR:$src)>;
+ def : Pat<(v4f32 (bitconvert (v8i16 QPR:$src))), (VREV32q16 QPR:$src)>;
+ def : Pat<(v4f32 (bitconvert (v16i8 QPR:$src))), (VREV32q8 QPR:$src)>;
+ def : Pat<(v4f32 (bitconvert (v2f64 QPR:$src))), (VREV64q32 QPR:$src)>;
+ def : Pat<(v2f64 (bitconvert (v4i32 QPR:$src))), (VREV64q32 QPR:$src)>;
+ def : Pat<(v2f64 (bitconvert (v8i16 QPR:$src))), (VREV64q16 QPR:$src)>;
+ def : Pat<(v2f64 (bitconvert (v16i8 QPR:$src))), (VREV64q8 QPR:$src)>;
+ def : Pat<(v2f64 (bitconvert (v4f32 QPR:$src))), (VREV64q32 QPR:$src)>;
+}
+
+// Fold extracting an element out of a v2i32 into a vfp register.
+def : Pat<(f32 (bitconvert (i32 (extractelt (v2i32 DPR:$src), imm:$lane)))),
+ (f32 (EXTRACT_SUBREG DPR:$src, (SSubReg_f32_reg imm:$lane)))>;
+
+// Vector lengthening move with load, matching extending loads.
+
+// extload, zextload and sextload for a standard lengthening load. Example:
+// Lengthen_Single<"8", "i16", "8"> =
+// Pat<(v8i16 (extloadvi8 addrmode6:$addr))
+// (VMOVLuv8i16 (VLD1d8 addrmode6:$addr,
+// (f64 (IMPLICIT_DEF)), (i32 0)))>;
+multiclass Lengthen_Single<string DestLanes, string DestTy, string SrcTy> {
+ let AddedComplexity = 10 in {
+ def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
+ (!cast<PatFrag>("extloadvi" # SrcTy) addrmode6:$addr)),
+ (!cast<Instruction>("VMOVLuv" # DestLanes # DestTy)
+ (!cast<Instruction>("VLD1d" # SrcTy) addrmode6:$addr))>;
+
+ def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
+ (!cast<PatFrag>("zextloadvi" # SrcTy) addrmode6:$addr)),
+ (!cast<Instruction>("VMOVLuv" # DestLanes # DestTy)
+ (!cast<Instruction>("VLD1d" # SrcTy) addrmode6:$addr))>;
+
+ def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
+ (!cast<PatFrag>("sextloadvi" # SrcTy) addrmode6:$addr)),
+ (!cast<Instruction>("VMOVLsv" # DestLanes # DestTy)
+ (!cast<Instruction>("VLD1d" # SrcTy) addrmode6:$addr))>;
+ }
+}
+
+// extload, zextload and sextload for a lengthening load which only uses
+// half the lanes available. Example:
+// Lengthen_HalfSingle<"4", "i16", "8", "i16", "i8"> =
+// Pat<(v4i16 (extloadvi8 addrmode6oneL32:$addr)),
+// (EXTRACT_SUBREG (VMOVLuv8i16 (VLD1LNd32 addrmode6oneL32:$addr,
+// (f64 (IMPLICIT_DEF)), (i32 0))),
+// dsub_0)>;
+multiclass Lengthen_HalfSingle<string DestLanes, string DestTy, string SrcTy,
+ string InsnLanes, string InsnTy> {
+ def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
+ (!cast<PatFrag>("extloadv" # SrcTy) addrmode6oneL32:$addr)),
+ (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # InsnLanes # InsnTy)
+ (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
+ dsub_0)>;
+ def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
+ (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6oneL32:$addr)),
+ (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # InsnLanes # InsnTy)
+ (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
+ dsub_0)>;
+ def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
+ (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6oneL32:$addr)),
+ (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # InsnLanes # InsnTy)
+ (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
+ dsub_0)>;
+}
+
+// The following class definition is basically a copy of the
+// Lengthen_HalfSingle definition above, however with an additional parameter
+// "RevLanes" to select the correct VREV32dXX instruction. This is to convert
+// data loaded by VLD1LN into proper vector format in big endian mode.
+multiclass Lengthen_HalfSingle_Big_Endian<string DestLanes, string DestTy, string SrcTy,
+ string InsnLanes, string InsnTy, string RevLanes> {
+ def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
+ (!cast<PatFrag>("extloadv" # SrcTy) addrmode6oneL32:$addr)),
+ (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # InsnLanes # InsnTy)
+ (!cast<Instruction>("VREV32d" # RevLanes)
+ (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))),
+ dsub_0)>;
+ def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
+ (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6oneL32:$addr)),
+ (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # InsnLanes # InsnTy)
+ (!cast<Instruction>("VREV32d" # RevLanes)
+ (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))),
+ dsub_0)>;
+ def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
+ (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6oneL32:$addr)),
+ (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # InsnLanes # InsnTy)
+ (!cast<Instruction>("VREV32d" # RevLanes)
+ (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))),
+ dsub_0)>;
+}
+
+// extload, zextload and sextload for a lengthening load followed by another
+// lengthening load, to quadruple the initial length.
+//
+// Lengthen_Double<"4", "i32", "i8", "8", "i16", "4", "i32"> =
+// Pat<(v4i32 (extloadvi8 addrmode6oneL32:$addr))
+// (EXTRACT_SUBREG (VMOVLuv4i32
+// (EXTRACT_SUBREG (VMOVLuv8i16 (VLD1LNd32 addrmode6oneL32:$addr,
+// (f64 (IMPLICIT_DEF)),
+// (i32 0))),
+// dsub_0)),
+// dsub_0)>;
+multiclass Lengthen_Double<string DestLanes, string DestTy, string SrcTy,
+ string Insn1Lanes, string Insn1Ty, string Insn2Lanes,
+ string Insn2Ty> {
+ def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
+ (!cast<PatFrag>("extloadv" # SrcTy) addrmode6oneL32:$addr)),
+ (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty)
+ (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty)
+ (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
+ dsub_0))>;
+ def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
+ (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6oneL32:$addr)),
+ (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty)
+ (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty)
+ (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
+ dsub_0))>;
+ def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
+ (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6oneL32:$addr)),
+ (!cast<Instruction>("VMOVLsv" # Insn2Lanes # Insn2Ty)
+ (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn1Lanes # Insn1Ty)
+ (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
+ dsub_0))>;
+}
+
+// The following class definition is basically a copy of the
+// Lengthen_Double definition above, however with an additional parameter
+// "RevLanes" to select the correct VREV32dXX instruction. This is to convert
+// data loaded by VLD1LN into proper vector format in big endian mode.
+multiclass Lengthen_Double_Big_Endian<string DestLanes, string DestTy, string SrcTy,
+ string Insn1Lanes, string Insn1Ty, string Insn2Lanes,
+ string Insn2Ty, string RevLanes> {
+ def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
+ (!cast<PatFrag>("extloadv" # SrcTy) addrmode6oneL32:$addr)),
+ (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty)
+ (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty)
+ (!cast<Instruction>("VREV32d" # RevLanes)
+ (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))),
+ dsub_0))>;
+ def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
+ (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6oneL32:$addr)),
+ (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty)
+ (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty)
+ (!cast<Instruction>("VREV32d" # RevLanes)
+ (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))),
+ dsub_0))>;
+ def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
+ (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6oneL32:$addr)),
+ (!cast<Instruction>("VMOVLsv" # Insn2Lanes # Insn2Ty)
+ (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn1Lanes # Insn1Ty)
+ (!cast<Instruction>("VREV32d" # RevLanes)
+ (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))),
+ dsub_0))>;
+}
+
+// extload, zextload and sextload for a lengthening load followed by another
+// lengthening load, to quadruple the initial length, but which ends up only
+// requiring half the available lanes (a 64-bit outcome instead of a 128-bit).
+//
+// Lengthen_HalfDouble<"2", "i32", "i8", "8", "i16", "4", "i32"> =
+// Pat<(v2i32 (extloadvi8 addrmode6:$addr))
+// (EXTRACT_SUBREG (VMOVLuv4i32
+// (EXTRACT_SUBREG (VMOVLuv8i16 (VLD1LNd16 addrmode6:$addr,
+// (f64 (IMPLICIT_DEF)), (i32 0))),
+// dsub_0)),
+// dsub_0)>;
+multiclass Lengthen_HalfDouble<string DestLanes, string DestTy, string SrcTy,
+ string Insn1Lanes, string Insn1Ty, string Insn2Lanes,
+ string Insn2Ty> {
+ def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
+ (!cast<PatFrag>("extloadv" # SrcTy) addrmode6:$addr)),
+ (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty)
+ (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty)
+ (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
+ dsub_0)),
+ dsub_0)>;
+ def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
+ (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6:$addr)),
+ (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty)
+ (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty)
+ (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
+ dsub_0)),
+ dsub_0)>;
+ def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
+ (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6:$addr)),
+ (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn2Lanes # Insn2Ty)
+ (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn1Lanes # Insn1Ty)
+ (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
+ dsub_0)),
+ dsub_0)>;
+}
+
+// The following class definition is basically a copy of the
+// Lengthen_HalfDouble definition above, however with an additional VREV16d8
+// instruction to convert data loaded by VLD1LN into proper vector format
+// in big endian mode.
+multiclass Lengthen_HalfDouble_Big_Endian<string DestLanes, string DestTy, string SrcTy,
+ string Insn1Lanes, string Insn1Ty, string Insn2Lanes,
+ string Insn2Ty> {
+ def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
+ (!cast<PatFrag>("extloadv" # SrcTy) addrmode6:$addr)),
+ (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty)
+ (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty)
+ (!cast<Instruction>("VREV16d8")
+ (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))),
+ dsub_0)),
+ dsub_0)>;
+ def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
+ (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6:$addr)),
+ (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty)
+ (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty)
+ (!cast<Instruction>("VREV16d8")
+ (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))),
+ dsub_0)),
+ dsub_0)>;
+ def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
+ (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6:$addr)),
+ (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn2Lanes # Insn2Ty)
+ (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn1Lanes # Insn1Ty)
+ (!cast<Instruction>("VREV16d8")
+ (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))),
+ dsub_0)),
+ dsub_0)>;
+}
+
+defm : Lengthen_Single<"8", "i16", "8">; // v8i8 -> v8i16
+defm : Lengthen_Single<"4", "i32", "16">; // v4i16 -> v4i32
+defm : Lengthen_Single<"2", "i64", "32">; // v2i32 -> v2i64
+
+let Predicates = [IsLE] in {
+ defm : Lengthen_HalfSingle<"4", "i16", "i8", "8", "i16">; // v4i8 -> v4i16
+ defm : Lengthen_HalfSingle<"2", "i32", "i16", "4", "i32">; // v2i16 -> v2i32
+
+ // Double lengthening - v4i8 -> v4i16 -> v4i32
+ defm : Lengthen_Double<"4", "i32", "i8", "8", "i16", "4", "i32">;
+ // v2i8 -> v2i16 -> v2i32
+ defm : Lengthen_HalfDouble<"2", "i32", "i8", "8", "i16", "4", "i32">;
+ // v2i16 -> v2i32 -> v2i64
+ defm : Lengthen_Double<"2", "i64", "i16", "4", "i32", "2", "i64">;
+}
+
+let Predicates = [IsBE] in {
+ defm : Lengthen_HalfSingle_Big_Endian<"4", "i16", "i8", "8", "i16", "8">; // v4i8 -> v4i16
+ defm : Lengthen_HalfSingle_Big_Endian<"2", "i32", "i16", "4", "i32", "16">; // v2i16 -> v2i32
+
+ // Double lengthening - v4i8 -> v4i16 -> v4i32
+ defm : Lengthen_Double_Big_Endian<"4", "i32", "i8", "8", "i16", "4", "i32", "8">;
+ // v2i8 -> v2i16 -> v2i32
+ defm : Lengthen_HalfDouble_Big_Endian<"2", "i32", "i8", "8", "i16", "4", "i32">;
+ // v2i16 -> v2i32 -> v2i64
+ defm : Lengthen_Double_Big_Endian<"2", "i64", "i16", "4", "i32", "2", "i64", "16">;
+}
+
+// Triple lengthening - v2i8 -> v2i16 -> v2i32 -> v2i64
+let Predicates = [IsLE] in {
+ def : Pat<(v2i64 (extloadvi8 addrmode6:$addr)),
+ (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16
+ (VLD1LNd16 addrmode6:$addr,
+ (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>;
+ def : Pat<(v2i64 (zextloadvi8 addrmode6:$addr)),
+ (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16
+ (VLD1LNd16 addrmode6:$addr,
+ (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>;
+ def : Pat<(v2i64 (sextloadvi8 addrmode6:$addr)),
+ (VMOVLsv2i64 (EXTRACT_SUBREG (VMOVLsv4i32 (EXTRACT_SUBREG (VMOVLsv8i16
+ (VLD1LNd16 addrmode6:$addr,
+ (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>;
+}
+// The following patterns are basically a copy of the patterns above,
+// however with an additional VREV16d instruction to convert data
+// loaded by VLD1LN into proper vector format in big endian mode.
+let Predicates = [IsBE] in {
+ def : Pat<(v2i64 (extloadvi8 addrmode6:$addr)),
+ (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16
+ (!cast<Instruction>("VREV16d8")
+ (VLD1LNd16 addrmode6:$addr,
+ (f64 (IMPLICIT_DEF)), (i32 0)))), dsub_0)), dsub_0))>;
+ def : Pat<(v2i64 (zextloadvi8 addrmode6:$addr)),
+ (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16
+ (!cast<Instruction>("VREV16d8")
+ (VLD1LNd16 addrmode6:$addr,
+ (f64 (IMPLICIT_DEF)), (i32 0)))), dsub_0)), dsub_0))>;
+ def : Pat<(v2i64 (sextloadvi8 addrmode6:$addr)),
+ (VMOVLsv2i64 (EXTRACT_SUBREG (VMOVLsv4i32 (EXTRACT_SUBREG (VMOVLsv8i16
+ (!cast<Instruction>("VREV16d8")
+ (VLD1LNd16 addrmode6:$addr,
+ (f64 (IMPLICIT_DEF)), (i32 0)))), dsub_0)), dsub_0))>;
+}
+
+//===----------------------------------------------------------------------===//
+// Assembler aliases
+//
+
+def : VFP2InstAlias<"fmdhr${p} $Dd, $Rn",
+ (VSETLNi32 DPR:$Dd, GPR:$Rn, 1, pred:$p)>;
+def : VFP2InstAlias<"fmdlr${p} $Dd, $Rn",
+ (VSETLNi32 DPR:$Dd, GPR:$Rn, 0, pred:$p)>;
+
+// VAND/VBIC/VEOR/VORR accept but do not require a type suffix.
+defm : NEONDTAnyInstAlias<"vand${p}", "$Vd, $Vn, $Vm",
+ (VANDd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>;
+defm : NEONDTAnyInstAlias<"vand${p}", "$Vd, $Vn, $Vm",
+ (VANDq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>;
+defm : NEONDTAnyInstAlias<"vbic${p}", "$Vd, $Vn, $Vm",
+ (VBICd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>;
+defm : NEONDTAnyInstAlias<"vbic${p}", "$Vd, $Vn, $Vm",
+ (VBICq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>;
+defm : NEONDTAnyInstAlias<"veor${p}", "$Vd, $Vn, $Vm",
+ (VEORd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>;
+defm : NEONDTAnyInstAlias<"veor${p}", "$Vd, $Vn, $Vm",
+ (VEORq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>;
+defm : NEONDTAnyInstAlias<"vorr${p}", "$Vd, $Vn, $Vm",
+ (VORRd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>;
+defm : NEONDTAnyInstAlias<"vorr${p}", "$Vd, $Vn, $Vm",
+ (VORRq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>;
+// ... two-operand aliases
+defm : NEONDTAnyInstAlias<"vand${p}", "$Vdn, $Vm",
+ (VANDd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+defm : NEONDTAnyInstAlias<"vand${p}", "$Vdn, $Vm",
+ (VANDq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+defm : NEONDTAnyInstAlias<"veor${p}", "$Vdn, $Vm",
+ (VEORd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+defm : NEONDTAnyInstAlias<"veor${p}", "$Vdn, $Vm",
+ (VEORq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+defm : NEONDTAnyInstAlias<"vorr${p}", "$Vdn, $Vm",
+ (VORRd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+defm : NEONDTAnyInstAlias<"vorr${p}", "$Vdn, $Vm",
+ (VORRq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+// ... immediates
+def : NEONInstAlias<"vand${p}.i16 $Vd, $imm",
+ (VBICiv4i16 DPR:$Vd, nImmSplatNotI16:$imm, pred:$p)>;
+def : NEONInstAlias<"vand${p}.i32 $Vd, $imm",
+ (VBICiv2i32 DPR:$Vd, nImmSplatNotI32:$imm, pred:$p)>;
+def : NEONInstAlias<"vand${p}.i16 $Vd, $imm",
+ (VBICiv8i16 QPR:$Vd, nImmSplatNotI16:$imm, pred:$p)>;
+def : NEONInstAlias<"vand${p}.i32 $Vd, $imm",
+ (VBICiv4i32 QPR:$Vd, nImmSplatNotI32:$imm, pred:$p)>;
+
+
+// VLD1 single-lane pseudo-instructions. These need special handling for
+// the lane index that an InstAlias can't handle, so we use these instead.
+def VLD1LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld1${p}", ".8", "$list, $addr",
+ (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr,
+ pred:$p)>;
+def VLD1LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld1${p}", ".16", "$list, $addr",
+ (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr,
+ pred:$p)>;
+def VLD1LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld1${p}", ".32", "$list, $addr",
+ (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr,
+ pred:$p)>;
+
+def VLD1LNdWB_fixed_Asm_8 :
+ NEONDataTypeAsmPseudoInst<"vld1${p}", ".8", "$list, $addr!",
+ (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr,
+ pred:$p)>;
+def VLD1LNdWB_fixed_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vld1${p}", ".16", "$list, $addr!",
+ (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr,
+ pred:$p)>;
+def VLD1LNdWB_fixed_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vld1${p}", ".32", "$list, $addr!",
+ (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr,
+ pred:$p)>;
+def VLD1LNdWB_register_Asm_8 :
+ NEONDataTypeAsmPseudoInst<"vld1${p}", ".8", "$list, $addr, $Rm",
+ (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VLD1LNdWB_register_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vld1${p}", ".16", "$list, $addr, $Rm",
+ (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VLD1LNdWB_register_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vld1${p}", ".32", "$list, $addr, $Rm",
+ (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr,
+ rGPR:$Rm, pred:$p)>;
+
+
+// VST1 single-lane pseudo-instructions. These need special handling for
+// the lane index that an InstAlias can't handle, so we use these instead.
+def VST1LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst1${p}", ".8", "$list, $addr",
+ (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr,
+ pred:$p)>;
+def VST1LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst1${p}", ".16", "$list, $addr",
+ (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr,
+ pred:$p)>;
+def VST1LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst1${p}", ".32", "$list, $addr",
+ (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr,
+ pred:$p)>;
+
+def VST1LNdWB_fixed_Asm_8 :
+ NEONDataTypeAsmPseudoInst<"vst1${p}", ".8", "$list, $addr!",
+ (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr,
+ pred:$p)>;
+def VST1LNdWB_fixed_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vst1${p}", ".16", "$list, $addr!",
+ (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr,
+ pred:$p)>;
+def VST1LNdWB_fixed_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vst1${p}", ".32", "$list, $addr!",
+ (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr,
+ pred:$p)>;
+def VST1LNdWB_register_Asm_8 :
+ NEONDataTypeAsmPseudoInst<"vst1${p}", ".8", "$list, $addr, $Rm",
+ (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VST1LNdWB_register_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vst1${p}", ".16", "$list, $addr, $Rm",
+ (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VST1LNdWB_register_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vst1${p}", ".32", "$list, $addr, $Rm",
+ (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr,
+ rGPR:$Rm, pred:$p)>;
+
+// VLD2 single-lane pseudo-instructions. These need special handling for
+// the lane index that an InstAlias can't handle, so we use these instead.
+def VLD2LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".8", "$list, $addr",
+ (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr,
+ pred:$p)>;
+def VLD2LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr",
+ (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr,
+ pred:$p)>;
+def VLD2LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr",
+ (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr, pred:$p)>;
+def VLD2LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr",
+ (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr,
+ pred:$p)>;
+def VLD2LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr",
+ (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr,
+ pred:$p)>;
+
+def VLD2LNdWB_fixed_Asm_8 :
+ NEONDataTypeAsmPseudoInst<"vld2${p}", ".8", "$list, $addr!",
+ (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr,
+ pred:$p)>;
+def VLD2LNdWB_fixed_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr!",
+ (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr,
+ pred:$p)>;
+def VLD2LNdWB_fixed_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr!",
+ (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr,
+ pred:$p)>;
+def VLD2LNqWB_fixed_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr!",
+ (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr,
+ pred:$p)>;
+def VLD2LNqWB_fixed_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr!",
+ (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr,
+ pred:$p)>;
+def VLD2LNdWB_register_Asm_8 :
+ NEONDataTypeAsmPseudoInst<"vld2${p}", ".8", "$list, $addr, $Rm",
+ (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VLD2LNdWB_register_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr, $Rm",
+ (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VLD2LNdWB_register_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr, $Rm",
+ (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VLD2LNqWB_register_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr, $Rm",
+ (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VLD2LNqWB_register_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr, $Rm",
+ (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr,
+ rGPR:$Rm, pred:$p)>;
+
+
+// VST2 single-lane pseudo-instructions. These need special handling for
+// the lane index that an InstAlias can't handle, so we use these instead.
+def VST2LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".8", "$list, $addr",
+ (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr,
+ pred:$p)>;
+def VST2LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr",
+ (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr,
+ pred:$p)>;
+def VST2LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr",
+ (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr,
+ pred:$p)>;
+def VST2LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr",
+ (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr,
+ pred:$p)>;
+def VST2LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr",
+ (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr,
+ pred:$p)>;
+
+def VST2LNdWB_fixed_Asm_8 :
+ NEONDataTypeAsmPseudoInst<"vst2${p}", ".8", "$list, $addr!",
+ (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr,
+ pred:$p)>;
+def VST2LNdWB_fixed_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr!",
+ (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr,
+ pred:$p)>;
+def VST2LNdWB_fixed_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr!",
+ (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr,
+ pred:$p)>;
+def VST2LNqWB_fixed_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr!",
+ (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr,
+ pred:$p)>;
+def VST2LNqWB_fixed_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr!",
+ (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr,
+ pred:$p)>;
+def VST2LNdWB_register_Asm_8 :
+ NEONDataTypeAsmPseudoInst<"vst2${p}", ".8", "$list, $addr, $Rm",
+ (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VST2LNdWB_register_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vst2${p}", ".16","$list, $addr, $Rm",
+ (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VST2LNdWB_register_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr, $Rm",
+ (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VST2LNqWB_register_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vst2${p}", ".16","$list, $addr, $Rm",
+ (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VST2LNqWB_register_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr, $Rm",
+ (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr,
+ rGPR:$Rm, pred:$p)>;
+
+// VLD3 all-lanes pseudo-instructions. These need special handling for
+// the lane index that an InstAlias can't handle, so we use these instead.
+def VLD3DUPdAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr",
+ (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr,
+ pred:$p)>;
+def VLD3DUPdAsm_16: NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr",
+ (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr,
+ pred:$p)>;
+def VLD3DUPdAsm_32: NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr",
+ (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr,
+ pred:$p)>;
+def VLD3DUPqAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr",
+ (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr,
+ pred:$p)>;
+def VLD3DUPqAsm_16: NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr",
+ (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr,
+ pred:$p)>;
+def VLD3DUPqAsm_32: NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr",
+ (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr,
+ pred:$p)>;
+
+def VLD3DUPdWB_fixed_Asm_8 :
+ NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!",
+ (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr,
+ pred:$p)>;
+def VLD3DUPdWB_fixed_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!",
+ (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr,
+ pred:$p)>;
+def VLD3DUPdWB_fixed_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!",
+ (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr,
+ pred:$p)>;
+def VLD3DUPqWB_fixed_Asm_8 :
+ NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!",
+ (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr,
+ pred:$p)>;
+def VLD3DUPqWB_fixed_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!",
+ (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr,
+ pred:$p)>;
+def VLD3DUPqWB_fixed_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!",
+ (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr,
+ pred:$p)>;
+def VLD3DUPdWB_register_Asm_8 :
+ NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm",
+ (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VLD3DUPdWB_register_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm",
+ (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VLD3DUPdWB_register_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm",
+ (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VLD3DUPqWB_register_Asm_8 :
+ NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm",
+ (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VLD3DUPqWB_register_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm",
+ (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VLD3DUPqWB_register_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm",
+ (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr,
+ rGPR:$Rm, pred:$p)>;
+
+
+// VLD3 single-lane pseudo-instructions. These need special handling for
+// the lane index that an InstAlias can't handle, so we use these instead.
+def VLD3LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr",
+ (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr,
+ pred:$p)>;
+def VLD3LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr",
+ (ins VecListThreeDHWordIndexed:$list, addrmode6alignNone:$addr,
+ pred:$p)>;
+def VLD3LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr",
+ (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr,
+ pred:$p)>;
+def VLD3LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr",
+ (ins VecListThreeQHWordIndexed:$list, addrmode6alignNone:$addr,
+ pred:$p)>;
+def VLD3LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr",
+ (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr,
+ pred:$p)>;
+
+def VLD3LNdWB_fixed_Asm_8 :
+ NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!",
+ (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr,
+ pred:$p)>;
+def VLD3LNdWB_fixed_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!",
+ (ins VecListThreeDHWordIndexed:$list, addrmode6alignNone:$addr,
+ pred:$p)>;
+def VLD3LNdWB_fixed_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!",
+ (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr,
+ pred:$p)>;
+def VLD3LNqWB_fixed_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!",
+ (ins VecListThreeQHWordIndexed:$list, addrmode6alignNone:$addr,
+ pred:$p)>;
+def VLD3LNqWB_fixed_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!",
+ (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr,
+ pred:$p)>;
+def VLD3LNdWB_register_Asm_8 :
+ NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm",
+ (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VLD3LNdWB_register_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm",
+ (ins VecListThreeDHWordIndexed:$list,
+ addrmode6alignNone:$addr, rGPR:$Rm, pred:$p)>;
+def VLD3LNdWB_register_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm",
+ (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VLD3LNqWB_register_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm",
+ (ins VecListThreeQHWordIndexed:$list,
+ addrmode6alignNone:$addr, rGPR:$Rm, pred:$p)>;
+def VLD3LNqWB_register_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm",
+ (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr,
+ rGPR:$Rm, pred:$p)>;
+
+// VLD3 multiple structure pseudo-instructions. These need special handling for
+// the vector operands that the normal instructions don't yet model.
+// FIXME: Remove these when the register classes and instructions are updated.
+def VLD3dAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr",
+ (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>;
+def VLD3dAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr",
+ (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>;
+def VLD3dAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr",
+ (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>;
+def VLD3qAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr",
+ (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>;
+def VLD3qAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr",
+ (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>;
+def VLD3qAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr",
+ (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>;
+
+def VLD3dWB_fixed_Asm_8 :
+ NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!",
+ (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>;
+def VLD3dWB_fixed_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!",
+ (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>;
+def VLD3dWB_fixed_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!",
+ (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>;
+def VLD3qWB_fixed_Asm_8 :
+ NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!",
+ (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>;
+def VLD3qWB_fixed_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!",
+ (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>;
+def VLD3qWB_fixed_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!",
+ (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>;
+def VLD3dWB_register_Asm_8 :
+ NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm",
+ (ins VecListThreeD:$list, addrmode6align64:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VLD3dWB_register_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm",
+ (ins VecListThreeD:$list, addrmode6align64:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VLD3dWB_register_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm",
+ (ins VecListThreeD:$list, addrmode6align64:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VLD3qWB_register_Asm_8 :
+ NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm",
+ (ins VecListThreeQ:$list, addrmode6align64:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VLD3qWB_register_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm",
+ (ins VecListThreeQ:$list, addrmode6align64:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VLD3qWB_register_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm",
+ (ins VecListThreeQ:$list, addrmode6align64:$addr,
+ rGPR:$Rm, pred:$p)>;
+
+// VST3 single-lane pseudo-instructions. These need special handling for
+// the lane index that an InstAlias can't handle, so we use these instead.
+def VST3LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr",
+ (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr,
+ pred:$p)>;
+def VST3LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr",
+ (ins VecListThreeDHWordIndexed:$list, addrmode6alignNone:$addr,
+ pred:$p)>;
+def VST3LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr",
+ (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr,
+ pred:$p)>;
+def VST3LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr",
+ (ins VecListThreeQHWordIndexed:$list, addrmode6alignNone:$addr,
+ pred:$p)>;
+def VST3LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr",
+ (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr,
+ pred:$p)>;
+
+def VST3LNdWB_fixed_Asm_8 :
+ NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr!",
+ (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr,
+ pred:$p)>;
+def VST3LNdWB_fixed_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!",
+ (ins VecListThreeDHWordIndexed:$list, addrmode6alignNone:$addr,
+ pred:$p)>;
+def VST3LNdWB_fixed_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!",
+ (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr,
+ pred:$p)>;
+def VST3LNqWB_fixed_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!",
+ (ins VecListThreeQHWordIndexed:$list, addrmode6alignNone:$addr,
+ pred:$p)>;
+def VST3LNqWB_fixed_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!",
+ (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr,
+ pred:$p)>;
+def VST3LNdWB_register_Asm_8 :
+ NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr, $Rm",
+ (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VST3LNdWB_register_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm",
+ (ins VecListThreeDHWordIndexed:$list,
+ addrmode6alignNone:$addr, rGPR:$Rm, pred:$p)>;
+def VST3LNdWB_register_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm",
+ (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VST3LNqWB_register_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm",
+ (ins VecListThreeQHWordIndexed:$list,
+ addrmode6alignNone:$addr, rGPR:$Rm, pred:$p)>;
+def VST3LNqWB_register_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm",
+ (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr,
+ rGPR:$Rm, pred:$p)>;
+
+
+// VST3 multiple structure pseudo-instructions. These need special handling for
+// the vector operands that the normal instructions don't yet model.
+// FIXME: Remove these when the register classes and instructions are updated.
+def VST3dAsm_8 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr",
+ (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>;
+def VST3dAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr",
+ (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>;
+def VST3dAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr",
+ (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>;
+def VST3qAsm_8 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr",
+ (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>;
+def VST3qAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr",
+ (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>;
+def VST3qAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr",
+ (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>;
+
+def VST3dWB_fixed_Asm_8 :
+ NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr!",
+ (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>;
+def VST3dWB_fixed_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!",
+ (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>;
+def VST3dWB_fixed_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!",
+ (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>;
+def VST3qWB_fixed_Asm_8 :
+ NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr!",
+ (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>;
+def VST3qWB_fixed_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!",
+ (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>;
+def VST3qWB_fixed_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!",
+ (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>;
+def VST3dWB_register_Asm_8 :
+ NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr, $Rm",
+ (ins VecListThreeD:$list, addrmode6align64:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VST3dWB_register_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm",
+ (ins VecListThreeD:$list, addrmode6align64:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VST3dWB_register_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm",
+ (ins VecListThreeD:$list, addrmode6align64:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VST3qWB_register_Asm_8 :
+ NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr, $Rm",
+ (ins VecListThreeQ:$list, addrmode6align64:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VST3qWB_register_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm",
+ (ins VecListThreeQ:$list, addrmode6align64:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VST3qWB_register_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm",
+ (ins VecListThreeQ:$list, addrmode6align64:$addr,
+ rGPR:$Rm, pred:$p)>;
+
+// VLD4 all-lanes pseudo-instructions. These need special handling for
+// the lane index that an InstAlias can't handle, so we use these instead.
+def VLD4DUPdAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr",
+ (ins VecListFourDAllLanes:$list, addrmode6dupalign32:$addr,
+ pred:$p)>;
+def VLD4DUPdAsm_16: NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr",
+ (ins VecListFourDAllLanes:$list, addrmode6dupalign64:$addr,
+ pred:$p)>;
+def VLD4DUPdAsm_32: NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr",
+ (ins VecListFourDAllLanes:$list, addrmode6dupalign64or128:$addr,
+ pred:$p)>;
+def VLD4DUPqAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr",
+ (ins VecListFourQAllLanes:$list, addrmode6dupalign32:$addr,
+ pred:$p)>;
+def VLD4DUPqAsm_16: NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr",
+ (ins VecListFourQAllLanes:$list, addrmode6dupalign64:$addr,
+ pred:$p)>;
+def VLD4DUPqAsm_32: NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr",
+ (ins VecListFourQAllLanes:$list, addrmode6dupalign64or128:$addr,
+ pred:$p)>;
+
+def VLD4DUPdWB_fixed_Asm_8 :
+ NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!",
+ (ins VecListFourDAllLanes:$list, addrmode6dupalign32:$addr,
+ pred:$p)>;
+def VLD4DUPdWB_fixed_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!",
+ (ins VecListFourDAllLanes:$list, addrmode6dupalign64:$addr,
+ pred:$p)>;
+def VLD4DUPdWB_fixed_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!",
+ (ins VecListFourDAllLanes:$list, addrmode6dupalign64or128:$addr,
+ pred:$p)>;
+def VLD4DUPqWB_fixed_Asm_8 :
+ NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!",
+ (ins VecListFourQAllLanes:$list, addrmode6dupalign32:$addr,
+ pred:$p)>;
+def VLD4DUPqWB_fixed_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!",
+ (ins VecListFourQAllLanes:$list, addrmode6dupalign64:$addr,
+ pred:$p)>;
+def VLD4DUPqWB_fixed_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!",
+ (ins VecListFourQAllLanes:$list, addrmode6dupalign64or128:$addr,
+ pred:$p)>;
+def VLD4DUPdWB_register_Asm_8 :
+ NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm",
+ (ins VecListFourDAllLanes:$list, addrmode6dupalign32:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VLD4DUPdWB_register_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm",
+ (ins VecListFourDAllLanes:$list, addrmode6dupalign64:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VLD4DUPdWB_register_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm",
+ (ins VecListFourDAllLanes:$list,
+ addrmode6dupalign64or128:$addr, rGPR:$Rm, pred:$p)>;
+def VLD4DUPqWB_register_Asm_8 :
+ NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm",
+ (ins VecListFourQAllLanes:$list, addrmode6dupalign32:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VLD4DUPqWB_register_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm",
+ (ins VecListFourQAllLanes:$list, addrmode6dupalign64:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VLD4DUPqWB_register_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm",
+ (ins VecListFourQAllLanes:$list,
+ addrmode6dupalign64or128:$addr, rGPR:$Rm, pred:$p)>;
+
+
+// VLD4 single-lane pseudo-instructions. These need special handling for
+// the lane index that an InstAlias can't handle, so we use these instead.
+def VLD4LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr",
+ (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr,
+ pred:$p)>;
+def VLD4LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr",
+ (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr,
+ pred:$p)>;
+def VLD4LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr",
+ (ins VecListFourDWordIndexed:$list, addrmode6align64or128:$addr,
+ pred:$p)>;
+def VLD4LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr",
+ (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr,
+ pred:$p)>;
+def VLD4LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr",
+ (ins VecListFourQWordIndexed:$list, addrmode6align64or128:$addr,
+ pred:$p)>;
+
+def VLD4LNdWB_fixed_Asm_8 :
+ NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!",
+ (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr,
+ pred:$p)>;
+def VLD4LNdWB_fixed_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!",
+ (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr,
+ pred:$p)>;
+def VLD4LNdWB_fixed_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!",
+ (ins VecListFourDWordIndexed:$list, addrmode6align64or128:$addr,
+ pred:$p)>;
+def VLD4LNqWB_fixed_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!",
+ (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr,
+ pred:$p)>;
+def VLD4LNqWB_fixed_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!",
+ (ins VecListFourQWordIndexed:$list, addrmode6align64or128:$addr,
+ pred:$p)>;
+def VLD4LNdWB_register_Asm_8 :
+ NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm",
+ (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VLD4LNdWB_register_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm",
+ (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VLD4LNdWB_register_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm",
+ (ins VecListFourDWordIndexed:$list,
+ addrmode6align64or128:$addr, rGPR:$Rm, pred:$p)>;
+def VLD4LNqWB_register_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm",
+ (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VLD4LNqWB_register_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm",
+ (ins VecListFourQWordIndexed:$list,
+ addrmode6align64or128:$addr, rGPR:$Rm, pred:$p)>;
+
+
+
+// VLD4 multiple structure pseudo-instructions. These need special handling for
+// the vector operands that the normal instructions don't yet model.
+// FIXME: Remove these when the register classes and instructions are updated.
+def VLD4dAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr",
+ (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
+ pred:$p)>;
+def VLD4dAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr",
+ (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
+ pred:$p)>;
+def VLD4dAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr",
+ (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
+ pred:$p)>;
+def VLD4qAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr",
+ (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
+ pred:$p)>;
+def VLD4qAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr",
+ (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
+ pred:$p)>;
+def VLD4qAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr",
+ (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
+ pred:$p)>;
+
+def VLD4dWB_fixed_Asm_8 :
+ NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!",
+ (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
+ pred:$p)>;
+def VLD4dWB_fixed_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!",
+ (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
+ pred:$p)>;
+def VLD4dWB_fixed_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!",
+ (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
+ pred:$p)>;
+def VLD4qWB_fixed_Asm_8 :
+ NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!",
+ (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
+ pred:$p)>;
+def VLD4qWB_fixed_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!",
+ (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
+ pred:$p)>;
+def VLD4qWB_fixed_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!",
+ (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
+ pred:$p)>;
+def VLD4dWB_register_Asm_8 :
+ NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm",
+ (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VLD4dWB_register_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm",
+ (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VLD4dWB_register_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm",
+ (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VLD4qWB_register_Asm_8 :
+ NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm",
+ (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VLD4qWB_register_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm",
+ (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VLD4qWB_register_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm",
+ (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
+ rGPR:$Rm, pred:$p)>;
+
+// VST4 single-lane pseudo-instructions. These need special handling for
+// the lane index that an InstAlias can't handle, so we use these instead.
+def VST4LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr",
+ (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr,
+ pred:$p)>;
+def VST4LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr",
+ (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr,
+ pred:$p)>;
+def VST4LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr",
+ (ins VecListFourDWordIndexed:$list, addrmode6align64or128:$addr,
+ pred:$p)>;
+def VST4LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr",
+ (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr,
+ pred:$p)>;
+def VST4LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr",
+ (ins VecListFourQWordIndexed:$list, addrmode6align64or128:$addr,
+ pred:$p)>;
+
+def VST4LNdWB_fixed_Asm_8 :
+ NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr!",
+ (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr,
+ pred:$p)>;
+def VST4LNdWB_fixed_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!",
+ (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr,
+ pred:$p)>;
+def VST4LNdWB_fixed_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!",
+ (ins VecListFourDWordIndexed:$list, addrmode6align64or128:$addr,
+ pred:$p)>;
+def VST4LNqWB_fixed_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!",
+ (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr,
+ pred:$p)>;
+def VST4LNqWB_fixed_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!",
+ (ins VecListFourQWordIndexed:$list, addrmode6align64or128:$addr,
+ pred:$p)>;
+def VST4LNdWB_register_Asm_8 :
+ NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr, $Rm",
+ (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VST4LNdWB_register_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm",
+ (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VST4LNdWB_register_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm",
+ (ins VecListFourDWordIndexed:$list,
+ addrmode6align64or128:$addr, rGPR:$Rm, pred:$p)>;
+def VST4LNqWB_register_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm",
+ (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VST4LNqWB_register_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm",
+ (ins VecListFourQWordIndexed:$list,
+ addrmode6align64or128:$addr, rGPR:$Rm, pred:$p)>;
+
+
+// VST4 multiple structure pseudo-instructions. These need special handling for
+// the vector operands that the normal instructions don't yet model.
+// FIXME: Remove these when the register classes and instructions are updated.
+def VST4dAsm_8 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr",
+ (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
+ pred:$p)>;
+def VST4dAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr",
+ (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
+ pred:$p)>;
+def VST4dAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr",
+ (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
+ pred:$p)>;
+def VST4qAsm_8 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr",
+ (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
+ pred:$p)>;
+def VST4qAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr",
+ (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
+ pred:$p)>;
+def VST4qAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr",
+ (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
+ pred:$p)>;
+
+def VST4dWB_fixed_Asm_8 :
+ NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr!",
+ (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
+ pred:$p)>;
+def VST4dWB_fixed_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!",
+ (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
+ pred:$p)>;
+def VST4dWB_fixed_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!",
+ (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
+ pred:$p)>;
+def VST4qWB_fixed_Asm_8 :
+ NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr!",
+ (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
+ pred:$p)>;
+def VST4qWB_fixed_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!",
+ (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
+ pred:$p)>;
+def VST4qWB_fixed_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!",
+ (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
+ pred:$p)>;
+def VST4dWB_register_Asm_8 :
+ NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr, $Rm",
+ (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VST4dWB_register_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm",
+ (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VST4dWB_register_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm",
+ (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VST4qWB_register_Asm_8 :
+ NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr, $Rm",
+ (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VST4qWB_register_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm",
+ (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VST4qWB_register_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm",
+ (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
+ rGPR:$Rm, pred:$p)>;
+
+// VMOV/VMVN takes an optional datatype suffix
+defm : NEONDTAnyInstAlias<"vmov${p}", "$Vd, $Vm",
+ (VORRd DPR:$Vd, DPR:$Vm, DPR:$Vm, pred:$p)>;
+defm : NEONDTAnyInstAlias<"vmov${p}", "$Vd, $Vm",
+ (VORRq QPR:$Vd, QPR:$Vm, QPR:$Vm, pred:$p)>;
+
+defm : NEONDTAnyInstAlias<"vmvn${p}", "$Vd, $Vm",
+ (VMVNd DPR:$Vd, DPR:$Vm, pred:$p)>;
+defm : NEONDTAnyInstAlias<"vmvn${p}", "$Vd, $Vm",
+ (VMVNq QPR:$Vd, QPR:$Vm, pred:$p)>;
+
+// VCLT (register) is an assembler alias for VCGT w/ the operands reversed.
+// D-register versions.
+def : NEONInstAlias<"vcle${p}.s8 $Dd, $Dn, $Dm",
+ (VCGEsv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
+def : NEONInstAlias<"vcle${p}.s16 $Dd, $Dn, $Dm",
+ (VCGEsv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
+def : NEONInstAlias<"vcle${p}.s32 $Dd, $Dn, $Dm",
+ (VCGEsv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
+def : NEONInstAlias<"vcle${p}.u8 $Dd, $Dn, $Dm",
+ (VCGEuv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
+def : NEONInstAlias<"vcle${p}.u16 $Dd, $Dn, $Dm",
+ (VCGEuv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
+def : NEONInstAlias<"vcle${p}.u32 $Dd, $Dn, $Dm",
+ (VCGEuv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
+def : NEONInstAlias<"vcle${p}.f32 $Dd, $Dn, $Dm",
+ (VCGEfd DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
+let Predicates = [HasNEON, HasFullFP16] in
+def : NEONInstAlias<"vcle${p}.f16 $Dd, $Dn, $Dm",
+ (VCGEhd DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
+// Q-register versions.
+def : NEONInstAlias<"vcle${p}.s8 $Qd, $Qn, $Qm",
+ (VCGEsv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
+def : NEONInstAlias<"vcle${p}.s16 $Qd, $Qn, $Qm",
+ (VCGEsv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
+def : NEONInstAlias<"vcle${p}.s32 $Qd, $Qn, $Qm",
+ (VCGEsv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
+def : NEONInstAlias<"vcle${p}.u8 $Qd, $Qn, $Qm",
+ (VCGEuv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
+def : NEONInstAlias<"vcle${p}.u16 $Qd, $Qn, $Qm",
+ (VCGEuv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
+def : NEONInstAlias<"vcle${p}.u32 $Qd, $Qn, $Qm",
+ (VCGEuv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
+def : NEONInstAlias<"vcle${p}.f32 $Qd, $Qn, $Qm",
+ (VCGEfq QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
+let Predicates = [HasNEON, HasFullFP16] in
+def : NEONInstAlias<"vcle${p}.f16 $Qd, $Qn, $Qm",
+ (VCGEhq QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
+
+// VCLT (register) is an assembler alias for VCGT w/ the operands reversed.
+// D-register versions.
+def : NEONInstAlias<"vclt${p}.s8 $Dd, $Dn, $Dm",
+ (VCGTsv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
+def : NEONInstAlias<"vclt${p}.s16 $Dd, $Dn, $Dm",
+ (VCGTsv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
+def : NEONInstAlias<"vclt${p}.s32 $Dd, $Dn, $Dm",
+ (VCGTsv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
+def : NEONInstAlias<"vclt${p}.u8 $Dd, $Dn, $Dm",
+ (VCGTuv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
+def : NEONInstAlias<"vclt${p}.u16 $Dd, $Dn, $Dm",
+ (VCGTuv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
+def : NEONInstAlias<"vclt${p}.u32 $Dd, $Dn, $Dm",
+ (VCGTuv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
+def : NEONInstAlias<"vclt${p}.f32 $Dd, $Dn, $Dm",
+ (VCGTfd DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
+let Predicates = [HasNEON, HasFullFP16] in
+def : NEONInstAlias<"vclt${p}.f16 $Dd, $Dn, $Dm",
+ (VCGThd DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
+// Q-register versions.
+def : NEONInstAlias<"vclt${p}.s8 $Qd, $Qn, $Qm",
+ (VCGTsv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
+def : NEONInstAlias<"vclt${p}.s16 $Qd, $Qn, $Qm",
+ (VCGTsv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
+def : NEONInstAlias<"vclt${p}.s32 $Qd, $Qn, $Qm",
+ (VCGTsv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
+def : NEONInstAlias<"vclt${p}.u8 $Qd, $Qn, $Qm",
+ (VCGTuv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
+def : NEONInstAlias<"vclt${p}.u16 $Qd, $Qn, $Qm",
+ (VCGTuv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
+def : NEONInstAlias<"vclt${p}.u32 $Qd, $Qn, $Qm",
+ (VCGTuv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
+def : NEONInstAlias<"vclt${p}.f32 $Qd, $Qn, $Qm",
+ (VCGTfq QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
+let Predicates = [HasNEON, HasFullFP16] in
+def : NEONInstAlias<"vclt${p}.f16 $Qd, $Qn, $Qm",
+ (VCGThq QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
+
+// VSWP allows, but does not require, a type suffix.
+defm : NEONDTAnyInstAlias<"vswp${p}", "$Vd, $Vm",
+ (VSWPd DPR:$Vd, DPR:$Vm, pred:$p)>;
+defm : NEONDTAnyInstAlias<"vswp${p}", "$Vd, $Vm",
+ (VSWPq QPR:$Vd, QPR:$Vm, pred:$p)>;
+
+// VBIF, VBIT, and VBSL allow, but do not require, a type suffix.
+defm : NEONDTAnyInstAlias<"vbif${p}", "$Vd, $Vn, $Vm",
+ (VBIFd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>;
+defm : NEONDTAnyInstAlias<"vbit${p}", "$Vd, $Vn, $Vm",
+ (VBITd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>;
+defm : NEONDTAnyInstAlias<"vbsl${p}", "$Vd, $Vn, $Vm",
+ (VBSLd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>;
+defm : NEONDTAnyInstAlias<"vbif${p}", "$Vd, $Vn, $Vm",
+ (VBIFq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>;
+defm : NEONDTAnyInstAlias<"vbit${p}", "$Vd, $Vn, $Vm",
+ (VBITq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>;
+defm : NEONDTAnyInstAlias<"vbsl${p}", "$Vd, $Vn, $Vm",
+ (VBSLq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>;
+
+// "vmov Rd, #-imm" can be handled via "vmvn".
+def : NEONInstAlias<"vmov${p}.i32 $Vd, $imm",
+ (VMVNv2i32 DPR:$Vd, nImmVMOVI32Neg:$imm, pred:$p)>;
+def : NEONInstAlias<"vmov${p}.i32 $Vd, $imm",
+ (VMVNv4i32 QPR:$Vd, nImmVMOVI32Neg:$imm, pred:$p)>;
+def : NEONInstAlias<"vmvn${p}.i32 $Vd, $imm",
+ (VMOVv2i32 DPR:$Vd, nImmVMOVI32Neg:$imm, pred:$p)>;
+def : NEONInstAlias<"vmvn${p}.i32 $Vd, $imm",
+ (VMOVv4i32 QPR:$Vd, nImmVMOVI32Neg:$imm, pred:$p)>;
+
+// 'gas' compatibility aliases for quad-word instructions. Strictly speaking,
+// these should restrict to just the Q register variants, but the register
+// classes are enough to match correctly regardless, so we keep it simple
+// and just use MnemonicAlias.
+def : NEONMnemonicAlias<"vbicq", "vbic">;
+def : NEONMnemonicAlias<"vandq", "vand">;
+def : NEONMnemonicAlias<"veorq", "veor">;
+def : NEONMnemonicAlias<"vorrq", "vorr">;
+
+def : NEONMnemonicAlias<"vmovq", "vmov">;
+def : NEONMnemonicAlias<"vmvnq", "vmvn">;
+// Explicit versions for floating point so that the FPImm variants get
+// handled early. The parser gets confused otherwise.
+def : NEONMnemonicAlias<"vmovq.f32", "vmov.f32">;
+def : NEONMnemonicAlias<"vmovq.f64", "vmov.f64">;
+
+def : NEONMnemonicAlias<"vaddq", "vadd">;
+def : NEONMnemonicAlias<"vsubq", "vsub">;
+
+def : NEONMnemonicAlias<"vminq", "vmin">;
+def : NEONMnemonicAlias<"vmaxq", "vmax">;
+
+def : NEONMnemonicAlias<"vmulq", "vmul">;
+
+def : NEONMnemonicAlias<"vabsq", "vabs">;
+
+def : NEONMnemonicAlias<"vshlq", "vshl">;
+def : NEONMnemonicAlias<"vshrq", "vshr">;
+
+def : NEONMnemonicAlias<"vcvtq", "vcvt">;
+
+def : NEONMnemonicAlias<"vcleq", "vcle">;
+def : NEONMnemonicAlias<"vceqq", "vceq">;
+
+def : NEONMnemonicAlias<"vzipq", "vzip">;
+def : NEONMnemonicAlias<"vswpq", "vswp">;
+
+def : NEONMnemonicAlias<"vrecpeq.f32", "vrecpe.f32">;
+def : NEONMnemonicAlias<"vrecpeq.u32", "vrecpe.u32">;
+
+
+// Alias for loading floating point immediates that aren't representable
+// using the vmov.f32 encoding but the bitpattern is representable using
+// the .i32 encoding.
+def : NEONInstAlias<"vmov${p}.f32 $Vd, $imm",
+ (VMOVv4i32 QPR:$Vd, nImmVMOVI32:$imm, pred:$p)>;
+def : NEONInstAlias<"vmov${p}.f32 $Vd, $imm",
+ (VMOVv2i32 DPR:$Vd, nImmVMOVI32:$imm, pred:$p)>;
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrThumb.td b/contrib/llvm/lib/Target/ARM/ARMInstrThumb.td
new file mode 100644
index 0000000..df6f243
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMInstrThumb.td
@@ -0,0 +1,1518 @@
+//===-- ARMInstrThumb.td - Thumb support for ARM -----------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the Thumb instruction set.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Thumb specific DAG Nodes.
+//
+
+def ARMtcall : SDNode<"ARMISD::tCALL", SDT_ARMcall,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
+ SDNPVariadic]>;
+
+def imm_sr_XFORM: SDNodeXForm<imm, [{
+ unsigned Imm = N->getZExtValue();
+ return CurDAG->getTargetConstant((Imm == 32 ? 0 : Imm), SDLoc(N), MVT::i32);
+}]>;
+def ThumbSRImmAsmOperand: AsmOperandClass { let Name = "ImmThumbSR"; }
+def imm_sr : Operand<i32>, PatLeaf<(imm), [{
+ uint64_t Imm = N->getZExtValue();
+ return Imm > 0 && Imm <= 32;
+}], imm_sr_XFORM> {
+ let PrintMethod = "printThumbSRImm";
+ let ParserMatchClass = ThumbSRImmAsmOperand;
+}
+
+def imm_comp_XFORM : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(~((uint32_t)N->getZExtValue()), SDLoc(N),
+ MVT::i32);
+}]>;
+
+def imm0_7_neg : PatLeaf<(i32 imm), [{
+ return (uint32_t)-N->getZExtValue() < 8;
+}], imm_neg_XFORM>;
+
+def imm0_255_comp : PatLeaf<(i32 imm), [{
+ return ~((uint32_t)N->getZExtValue()) < 256;
+}]>;
+
+def imm8_255 : ImmLeaf<i32, [{
+ return Imm >= 8 && Imm < 256;
+}]>;
+def imm8_255_neg : PatLeaf<(i32 imm), [{
+ unsigned Val = -N->getZExtValue();
+ return Val >= 8 && Val < 256;
+}], imm_neg_XFORM>;
+
+// Break imm's up into two pieces: an immediate + a left shift. This uses
+// thumb_immshifted to match and thumb_immshifted_val and thumb_immshifted_shamt
+// to get the val/shift pieces.
+def thumb_immshifted : PatLeaf<(imm), [{
+ return ARM_AM::isThumbImmShiftedVal((unsigned)N->getZExtValue());
+}]>;
+
+def thumb_immshifted_val : SDNodeXForm<imm, [{
+ unsigned V = ARM_AM::getThumbImmNonShiftedVal((unsigned)N->getZExtValue());
+ return CurDAG->getTargetConstant(V, SDLoc(N), MVT::i32);
+}]>;
+
+def thumb_immshifted_shamt : SDNodeXForm<imm, [{
+ unsigned V = ARM_AM::getThumbImmValShift((unsigned)N->getZExtValue());
+ return CurDAG->getTargetConstant(V, SDLoc(N), MVT::i32);
+}]>;
+
+// Scaled 4 immediate.
+def t_imm0_1020s4_asmoperand: AsmOperandClass { let Name = "Imm0_1020s4"; }
+def t_imm0_1020s4 : Operand<i32> {
+ let PrintMethod = "printThumbS4ImmOperand";
+ let ParserMatchClass = t_imm0_1020s4_asmoperand;
+ let OperandType = "OPERAND_IMMEDIATE";
+}
+
+def t_imm0_508s4_asmoperand: AsmOperandClass { let Name = "Imm0_508s4"; }
+def t_imm0_508s4 : Operand<i32> {
+ let PrintMethod = "printThumbS4ImmOperand";
+ let ParserMatchClass = t_imm0_508s4_asmoperand;
+ let OperandType = "OPERAND_IMMEDIATE";
+}
+// Alias use only, so no printer is necessary.
+def t_imm0_508s4_neg_asmoperand: AsmOperandClass { let Name = "Imm0_508s4Neg"; }
+def t_imm0_508s4_neg : Operand<i32> {
+ let ParserMatchClass = t_imm0_508s4_neg_asmoperand;
+ let OperandType = "OPERAND_IMMEDIATE";
+}
+
+// Define Thumb specific addressing modes.
+
+// unsigned 8-bit, 2-scaled memory offset
+class OperandUnsignedOffset_b8s2 : AsmOperandClass {
+ let Name = "UnsignedOffset_b8s2";
+ let PredicateMethod = "isUnsignedOffset<8, 2>";
+}
+
+def UnsignedOffset_b8s2 : OperandUnsignedOffset_b8s2;
+
+// thumb style PC relative operand. signed, 8 bits magnitude,
+// two bits shift. can be represented as either [pc, #imm], #imm,
+// or relocatable expression...
+def ThumbMemPC : AsmOperandClass {
+ let Name = "ThumbMemPC";
+}
+
+let OperandType = "OPERAND_PCREL" in {
+def t_brtarget : Operand<OtherVT> {
+ let EncoderMethod = "getThumbBRTargetOpValue";
+ let DecoderMethod = "DecodeThumbBROperand";
+}
+
+// ADR instruction labels.
+def t_adrlabel : Operand<i32> {
+ let EncoderMethod = "getThumbAdrLabelOpValue";
+ let PrintMethod = "printAdrLabelOperand<2>";
+ let ParserMatchClass = UnsignedOffset_b8s2;
+}
+
+def t_bcctarget : Operand<i32> {
+ let EncoderMethod = "getThumbBCCTargetOpValue";
+ let DecoderMethod = "DecodeThumbBCCTargetOperand";
+}
+
+def t_cbtarget : Operand<i32> {
+ let EncoderMethod = "getThumbCBTargetOpValue";
+ let DecoderMethod = "DecodeThumbCmpBROperand";
+}
+
+def t_bltarget : Operand<i32> {
+ let EncoderMethod = "getThumbBLTargetOpValue";
+ let DecoderMethod = "DecodeThumbBLTargetOperand";
+}
+
+def t_blxtarget : Operand<i32> {
+ let EncoderMethod = "getThumbBLXTargetOpValue";
+ let DecoderMethod = "DecodeThumbBLXOffset";
+}
+
+// t_addrmode_pc := <label> => pc + imm8 * 4
+//
+def t_addrmode_pc : MemOperand {
+ let EncoderMethod = "getAddrModePCOpValue";
+ let DecoderMethod = "DecodeThumbAddrModePC";
+ let PrintMethod = "printThumbLdrLabelOperand";
+ let ParserMatchClass = ThumbMemPC;
+}
+}
+
+// t_addrmode_rr := reg + reg
+//
+def t_addrmode_rr_asm_operand : AsmOperandClass { let Name = "MemThumbRR"; }
+def t_addrmode_rr : MemOperand,
+ ComplexPattern<i32, 2, "SelectThumbAddrModeRR", []> {
+ let EncoderMethod = "getThumbAddrModeRegRegOpValue";
+ let PrintMethod = "printThumbAddrModeRROperand";
+ let DecoderMethod = "DecodeThumbAddrModeRR";
+ let ParserMatchClass = t_addrmode_rr_asm_operand;
+ let MIOperandInfo = (ops tGPR:$base, tGPR:$offsreg);
+}
+
+// t_addrmode_rrs := reg + reg
+//
+// We use separate scaled versions because the Select* functions need
+// to explicitly check for a matching constant and return false here so that
+// the reg+imm forms will match instead. This is a horrible way to do that,
+// as it forces tight coupling between the methods, but it's how selectiondag
+// currently works.
+def t_addrmode_rrs1 : MemOperand,
+ ComplexPattern<i32, 2, "SelectThumbAddrModeRI5S1", []> {
+ let EncoderMethod = "getThumbAddrModeRegRegOpValue";
+ let PrintMethod = "printThumbAddrModeRROperand";
+ let DecoderMethod = "DecodeThumbAddrModeRR";
+ let ParserMatchClass = t_addrmode_rr_asm_operand;
+ let MIOperandInfo = (ops tGPR:$base, tGPR:$offsreg);
+}
+def t_addrmode_rrs2 : MemOperand,
+ ComplexPattern<i32, 2, "SelectThumbAddrModeRI5S2", []> {
+ let EncoderMethod = "getThumbAddrModeRegRegOpValue";
+ let DecoderMethod = "DecodeThumbAddrModeRR";
+ let PrintMethod = "printThumbAddrModeRROperand";
+ let ParserMatchClass = t_addrmode_rr_asm_operand;
+ let MIOperandInfo = (ops tGPR:$base, tGPR:$offsreg);
+}
+def t_addrmode_rrs4 : MemOperand,
+ ComplexPattern<i32, 2, "SelectThumbAddrModeRI5S4", []> {
+ let EncoderMethod = "getThumbAddrModeRegRegOpValue";
+ let DecoderMethod = "DecodeThumbAddrModeRR";
+ let PrintMethod = "printThumbAddrModeRROperand";
+ let ParserMatchClass = t_addrmode_rr_asm_operand;
+ let MIOperandInfo = (ops tGPR:$base, tGPR:$offsreg);
+}
+
+// t_addrmode_is4 := reg + imm5 * 4
+//
+def t_addrmode_is4_asm_operand : AsmOperandClass { let Name = "MemThumbRIs4"; }
+def t_addrmode_is4 : MemOperand,
+ ComplexPattern<i32, 2, "SelectThumbAddrModeImm5S4", []> {
+ let EncoderMethod = "getAddrModeISOpValue";
+ let DecoderMethod = "DecodeThumbAddrModeIS";
+ let PrintMethod = "printThumbAddrModeImm5S4Operand";
+ let ParserMatchClass = t_addrmode_is4_asm_operand;
+ let MIOperandInfo = (ops tGPR:$base, i32imm:$offsimm);
+}
+
+// t_addrmode_is2 := reg + imm5 * 2
+//
+def t_addrmode_is2_asm_operand : AsmOperandClass { let Name = "MemThumbRIs2"; }
+def t_addrmode_is2 : MemOperand,
+ ComplexPattern<i32, 2, "SelectThumbAddrModeImm5S2", []> {
+ let EncoderMethod = "getAddrModeISOpValue";
+ let DecoderMethod = "DecodeThumbAddrModeIS";
+ let PrintMethod = "printThumbAddrModeImm5S2Operand";
+ let ParserMatchClass = t_addrmode_is2_asm_operand;
+ let MIOperandInfo = (ops tGPR:$base, i32imm:$offsimm);
+}
+
+// t_addrmode_is1 := reg + imm5
+//
+def t_addrmode_is1_asm_operand : AsmOperandClass { let Name = "MemThumbRIs1"; }
+def t_addrmode_is1 : MemOperand,
+ ComplexPattern<i32, 2, "SelectThumbAddrModeImm5S1", []> {
+ let EncoderMethod = "getAddrModeISOpValue";
+ let DecoderMethod = "DecodeThumbAddrModeIS";
+ let PrintMethod = "printThumbAddrModeImm5S1Operand";
+ let ParserMatchClass = t_addrmode_is1_asm_operand;
+ let MIOperandInfo = (ops tGPR:$base, i32imm:$offsimm);
+}
+
+// t_addrmode_sp := sp + imm8 * 4
+//
+// FIXME: This really shouldn't have an explicit SP operand at all. It should
+// be implicit, just like in the instruction encoding itself.
+def t_addrmode_sp_asm_operand : AsmOperandClass { let Name = "MemThumbSPI"; }
+def t_addrmode_sp : MemOperand,
+ ComplexPattern<i32, 2, "SelectThumbAddrModeSP", []> {
+ let EncoderMethod = "getAddrModeThumbSPOpValue";
+ let DecoderMethod = "DecodeThumbAddrModeSP";
+ let PrintMethod = "printThumbAddrModeSPOperand";
+ let ParserMatchClass = t_addrmode_sp_asm_operand;
+ let MIOperandInfo = (ops GPR:$base, i32imm:$offsimm);
+}
+
+//===----------------------------------------------------------------------===//
+// Miscellaneous Instructions.
+//
+
+// FIXME: Marking these as hasSideEffects is necessary to prevent machine DCE
+// from removing one half of the matched pairs. That breaks PEI, which assumes
+// these will always be in pairs, and asserts if it finds otherwise. Better way?
+let Defs = [SP], Uses = [SP], hasSideEffects = 1 in {
+def tADJCALLSTACKUP :
+ PseudoInst<(outs), (ins i32imm:$amt1, i32imm:$amt2), NoItinerary,
+ [(ARMcallseq_end imm:$amt1, imm:$amt2)]>,
+ Requires<[IsThumb, IsThumb1Only]>;
+
+def tADJCALLSTACKDOWN :
+ PseudoInst<(outs), (ins i32imm:$amt), NoItinerary,
+ [(ARMcallseq_start imm:$amt)]>,
+ Requires<[IsThumb, IsThumb1Only]>;
+}
+
+class T1SystemEncoding<bits<8> opc>
+ : T1Encoding<0b101111> {
+ let Inst{9-8} = 0b11;
+ let Inst{7-0} = opc;
+}
+
+def tHINT : T1pI<(outs), (ins imm0_15:$imm), NoItinerary, "hint", "\t$imm",
+ [(int_arm_hint imm0_15:$imm)]>,
+ T1SystemEncoding<0x00>,
+ Requires<[IsThumb, HasV6M]> {
+ bits<4> imm;
+ let Inst{7-4} = imm;
+}
+
+class tHintAlias<string Asm, dag Result> : tInstAlias<Asm, Result> {
+ let Predicates = [IsThumb, HasV6M];
+}
+
+def : tHintAlias<"nop$p", (tHINT 0, pred:$p)>; // A8.6.110
+def : tHintAlias<"yield$p", (tHINT 1, pred:$p)>; // A8.6.410
+def : tHintAlias<"wfe$p", (tHINT 2, pred:$p)>; // A8.6.408
+def : tHintAlias<"wfi$p", (tHINT 3, pred:$p)>; // A8.6.409
+def : tHintAlias<"sev$p", (tHINT 4, pred:$p)>; // A8.6.157
+def : tInstAlias<"sevl$p", (tHINT 5, pred:$p)> {
+ let Predicates = [IsThumb2, HasV8];
+}
+
+// The imm operand $val can be used by a debugger to store more information
+// about the breakpoint.
+def tBKPT : T1I<(outs), (ins imm0_255:$val), NoItinerary, "bkpt\t$val",
+ []>,
+ T1Encoding<0b101111> {
+ let Inst{9-8} = 0b10;
+ // A8.6.22
+ bits<8> val;
+ let Inst{7-0} = val;
+}
+// default immediate for breakpoint mnemonic
+def : InstAlias<"bkpt", (tBKPT 0)>, Requires<[IsThumb]>;
+
+def tHLT : T1I<(outs), (ins imm0_63:$val), NoItinerary, "hlt\t$val",
+ []>, T1Encoding<0b101110>, Requires<[IsThumb, HasV8]> {
+ let Inst{9-6} = 0b1010;
+ bits<6> val;
+ let Inst{5-0} = val;
+}
+
+def tSETEND : T1I<(outs), (ins setend_op:$end), NoItinerary, "setend\t$end",
+ []>, T1Encoding<0b101101>, Requires<[IsNotMClass]>, Deprecated<HasV8Ops> {
+ bits<1> end;
+ // A8.6.156
+ let Inst{9-5} = 0b10010;
+ let Inst{4} = 1;
+ let Inst{3} = end;
+ let Inst{2-0} = 0b000;
+}
+
+// Change Processor State is a system instruction -- for disassembly only.
+def tCPS : T1I<(outs), (ins imod_op:$imod, iflags_op:$iflags),
+ NoItinerary, "cps$imod $iflags", []>,
+ T1Misc<0b0110011> {
+ // A8.6.38 & B6.1.1
+ bit imod;
+ bits<3> iflags;
+
+ let Inst{4} = imod;
+ let Inst{3} = 0;
+ let Inst{2-0} = iflags;
+ let DecoderMethod = "DecodeThumbCPS";
+}
+
+// For both thumb1 and thumb2.
+let isNotDuplicable = 1, isCodeGenOnly = 1 in
+def tPICADD : TIt<(outs GPR:$dst), (ins GPR:$lhs, pclabel:$cp), IIC_iALUr, "",
+ [(set GPR:$dst, (ARMpic_add GPR:$lhs, imm:$cp))]>,
+ T1Special<{0,0,?,?}>, Sched<[WriteALU]> {
+ // A8.6.6
+ bits<3> dst;
+ let Inst{6-3} = 0b1111; // Rm = pc
+ let Inst{2-0} = dst;
+}
+
+// ADD <Rd>, sp, #<imm8>
+// FIXME: This should not be marked as having side effects, and it should be
+// rematerializable. Clearing the side effect bit causes miscompilations,
+// probably because the instruction can be moved around.
+def tADDrSPi : T1pI<(outs tGPR:$dst), (ins GPRsp:$sp, t_imm0_1020s4:$imm),
+ IIC_iALUi, "add", "\t$dst, $sp, $imm", []>,
+ T1Encoding<{1,0,1,0,1,?}>, Sched<[WriteALU]> {
+ // A6.2 & A8.6.8
+ bits<3> dst;
+ bits<8> imm;
+ let Inst{10-8} = dst;
+ let Inst{7-0} = imm;
+ let DecoderMethod = "DecodeThumbAddSpecialReg";
+}
+
+// Thumb1 frame lowering is rather fragile, we hope to be able to use
+// tADDrSPi, but we may need to insert a sequence that clobbers CPSR.
+def tADDframe : PseudoInst<(outs tGPR:$dst), (ins i32imm:$base, i32imm:$offset),
+ NoItinerary, []>,
+ Requires<[IsThumb, IsThumb1Only]> {
+ let Defs = [CPSR];
+}
+
+// ADD sp, sp, #<imm7>
+def tADDspi : T1pIt<(outs GPRsp:$Rdn), (ins GPRsp:$Rn, t_imm0_508s4:$imm),
+ IIC_iALUi, "add", "\t$Rdn, $imm", []>,
+ T1Misc<{0,0,0,0,0,?,?}>, Sched<[WriteALU]> {
+ // A6.2.5 & A8.6.8
+ bits<7> imm;
+ let Inst{6-0} = imm;
+ let DecoderMethod = "DecodeThumbAddSPImm";
+}
+
+// SUB sp, sp, #<imm7>
+// FIXME: The encoding and the ASM string don't match up.
+def tSUBspi : T1pIt<(outs GPRsp:$Rdn), (ins GPRsp:$Rn, t_imm0_508s4:$imm),
+ IIC_iALUi, "sub", "\t$Rdn, $imm", []>,
+ T1Misc<{0,0,0,0,1,?,?}>, Sched<[WriteALU]> {
+ // A6.2.5 & A8.6.214
+ bits<7> imm;
+ let Inst{6-0} = imm;
+ let DecoderMethod = "DecodeThumbAddSPImm";
+}
+
+def : tInstAlias<"add${p} sp, $imm",
+ (tSUBspi SP, t_imm0_508s4_neg:$imm, pred:$p)>;
+def : tInstAlias<"add${p} sp, sp, $imm",
+ (tSUBspi SP, t_imm0_508s4_neg:$imm, pred:$p)>;
+
+// Can optionally specify SP as a three operand instruction.
+def : tInstAlias<"add${p} sp, sp, $imm",
+ (tADDspi SP, t_imm0_508s4:$imm, pred:$p)>;
+def : tInstAlias<"sub${p} sp, sp, $imm",
+ (tSUBspi SP, t_imm0_508s4:$imm, pred:$p)>;
+
+// ADD <Rm>, sp
+def tADDrSP : T1pI<(outs GPR:$Rdn), (ins GPRsp:$sp, GPR:$Rn), IIC_iALUr,
+ "add", "\t$Rdn, $sp, $Rn", []>,
+ T1Special<{0,0,?,?}>, Sched<[WriteALU]> {
+ // A8.6.9 Encoding T1
+ bits<4> Rdn;
+ let Inst{7} = Rdn{3};
+ let Inst{6-3} = 0b1101;
+ let Inst{2-0} = Rdn{2-0};
+ let DecoderMethod = "DecodeThumbAddSPReg";
+}
+
+// ADD sp, <Rm>
+def tADDspr : T1pIt<(outs GPRsp:$Rdn), (ins GPRsp:$Rn, GPR:$Rm), IIC_iALUr,
+ "add", "\t$Rdn, $Rm", []>,
+ T1Special<{0,0,?,?}>, Sched<[WriteALU]> {
+ // A8.6.9 Encoding T2
+ bits<4> Rm;
+ let Inst{7} = 1;
+ let Inst{6-3} = Rm;
+ let Inst{2-0} = 0b101;
+ let DecoderMethod = "DecodeThumbAddSPReg";
+}
+
+//===----------------------------------------------------------------------===//
+// Control Flow Instructions.
+//
+
+// Indirect branches
+let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
+ def tBX : TI<(outs), (ins GPR:$Rm, pred:$p), IIC_Br, "bx${p}\t$Rm", []>,
+ T1Special<{1,1,0,?}>, Sched<[WriteBr]> {
+ // A6.2.3 & A8.6.25
+ bits<4> Rm;
+ let Inst{6-3} = Rm;
+ let Inst{2-0} = 0b000;
+ let Unpredictable{2-0} = 0b111;
+ }
+}
+
+let isReturn = 1, isTerminator = 1, isBarrier = 1 in {
+ def tBX_RET : tPseudoExpand<(outs), (ins pred:$p), 2, IIC_Br,
+ [(ARMretflag)], (tBX LR, pred:$p)>, Sched<[WriteBr]>;
+
+ // Alternative return instruction used by vararg functions.
+ def tBX_RET_vararg : tPseudoExpand<(outs), (ins tGPR:$Rm, pred:$p),
+ 2, IIC_Br, [],
+ (tBX GPR:$Rm, pred:$p)>, Sched<[WriteBr]>;
+}
+
+// All calls clobber the non-callee saved registers. SP is marked as a use to
+// prevent stack-pointer assignments that appear immediately before calls from
+// potentially appearing dead.
+let isCall = 1,
+ Defs = [LR], Uses = [SP] in {
+ // Also used for Thumb2
+ def tBL : TIx2<0b11110, 0b11, 1,
+ (outs), (ins pred:$p, t_bltarget:$func), IIC_Br,
+ "bl${p}\t$func",
+ [(ARMtcall tglobaladdr:$func)]>,
+ Requires<[IsThumb]>, Sched<[WriteBrL]> {
+ bits<24> func;
+ let Inst{26} = func{23};
+ let Inst{25-16} = func{20-11};
+ let Inst{13} = func{22};
+ let Inst{11} = func{21};
+ let Inst{10-0} = func{10-0};
+ }
+
+ // ARMv5T and above, also used for Thumb2
+ def tBLXi : TIx2<0b11110, 0b11, 0,
+ (outs), (ins pred:$p, t_blxtarget:$func), IIC_Br,
+ "blx${p}\t$func",
+ [(ARMcall tglobaladdr:$func)]>,
+ Requires<[IsThumb, HasV5T, IsNotMClass]>, Sched<[WriteBrL]> {
+ bits<24> func;
+ let Inst{26} = func{23};
+ let Inst{25-16} = func{20-11};
+ let Inst{13} = func{22};
+ let Inst{11} = func{21};
+ let Inst{10-1} = func{10-1};
+ let Inst{0} = 0; // func{0} is assumed zero
+ }
+
+ // Also used for Thumb2
+ def tBLXr : TI<(outs), (ins pred:$p, GPR:$func), IIC_Br,
+ "blx${p}\t$func",
+ [(ARMtcall GPR:$func)]>,
+ Requires<[IsThumb, HasV5T]>,
+ T1Special<{1,1,1,?}>, Sched<[WriteBrL]> { // A6.2.3 & A8.6.24;
+ bits<4> func;
+ let Inst{6-3} = func;
+ let Inst{2-0} = 0b000;
+ }
+
+ // ARMv4T
+ def tBX_CALL : tPseudoInst<(outs), (ins tGPR:$func),
+ 4, IIC_Br,
+ [(ARMcall_nolink tGPR:$func)]>,
+ Requires<[IsThumb, IsThumb1Only]>, Sched<[WriteBr]>;
+}
+
+let isBranch = 1, isTerminator = 1, isBarrier = 1 in {
+ let isPredicable = 1 in
+ def tB : T1pI<(outs), (ins t_brtarget:$target), IIC_Br,
+ "b", "\t$target", [(br bb:$target)]>,
+ T1Encoding<{1,1,1,0,0,?}>, Sched<[WriteBr]> {
+ bits<11> target;
+ let Inst{10-0} = target;
+ let AsmMatchConverter = "cvtThumbBranches";
+ }
+
+ // Far jump
+ // Just a pseudo for a tBL instruction. Needed to let regalloc know about
+ // the clobber of LR.
+ let Defs = [LR] in
+ def tBfar : tPseudoExpand<(outs), (ins t_bltarget:$target, pred:$p),
+ 4, IIC_Br, [], (tBL pred:$p, t_bltarget:$target)>,
+ Sched<[WriteBrTbl]>;
+
+ def tBR_JTr : tPseudoInst<(outs),
+ (ins tGPR:$target, i32imm:$jt),
+ 0, IIC_Br,
+ [(ARMbrjt tGPR:$target, tjumptable:$jt)]>,
+ Sched<[WriteBrTbl]> {
+ let Size = 2;
+ list<Predicate> Predicates = [IsThumb, IsThumb1Only];
+ }
+}
+
+// FIXME: should be able to write a pattern for ARMBrcond, but can't use
+// a two-value operand where a dag node expects two operands. :(
+let isBranch = 1, isTerminator = 1 in
+ def tBcc : T1I<(outs), (ins t_bcctarget:$target, pred:$p), IIC_Br,
+ "b${p}\t$target",
+ [/*(ARMbrcond bb:$target, imm:$cc)*/]>,
+ T1BranchCond<{1,1,0,1}>, Sched<[WriteBr]> {
+ bits<4> p;
+ bits<8> target;
+ let Inst{11-8} = p;
+ let Inst{7-0} = target;
+ let AsmMatchConverter = "cvtThumbBranches";
+}
+
+
+// Tail calls
+let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in {
+ // IOS versions.
+ let Uses = [SP] in {
+ def tTAILJMPr : tPseudoExpand<(outs), (ins tcGPR:$dst),
+ 4, IIC_Br, [],
+ (tBX GPR:$dst, (ops 14, zero_reg))>,
+ Requires<[IsThumb]>, Sched<[WriteBr]>;
+ }
+ // tTAILJMPd: MachO version uses a Thumb2 branch (no Thumb1 tail calls
+ // on MachO), so it's in ARMInstrThumb2.td.
+ // Non-MachO version:
+ let Uses = [SP] in {
+ def tTAILJMPdND : tPseudoExpand<(outs),
+ (ins t_brtarget:$dst, pred:$p),
+ 4, IIC_Br, [],
+ (tB t_brtarget:$dst, pred:$p)>,
+ Requires<[IsThumb, IsNotMachO]>, Sched<[WriteBr]>;
+ }
+}
+
+
+// A8.6.218 Supervisor Call (Software Interrupt)
+// A8.6.16 B: Encoding T1
+// If Inst{11-8} == 0b1111 then SEE SVC
+let isCall = 1, Uses = [SP] in
+def tSVC : T1pI<(outs), (ins imm0_255:$imm), IIC_Br,
+ "svc", "\t$imm", []>, Encoding16, Sched<[WriteBr]> {
+ bits<8> imm;
+ let Inst{15-12} = 0b1101;
+ let Inst{11-8} = 0b1111;
+ let Inst{7-0} = imm;
+}
+
+// The assembler uses 0xDEFE for a trap instruction.
+let isBarrier = 1, isTerminator = 1 in
+def tTRAP : TI<(outs), (ins), IIC_Br,
+ "trap", [(trap)]>, Encoding16, Sched<[WriteBr]> {
+ let Inst = 0xdefe;
+}
+
+//===----------------------------------------------------------------------===//
+// Load Store Instructions.
+//
+
+// PC-relative loads need to be matched first as constant pool accesses need to
+// always be PC-relative. We do this using AddedComplexity, as the pattern is
+// simpler than the patterns of the other load instructions.
+let canFoldAsLoad = 1, isReMaterializable = 1, AddedComplexity = 10 in
+def tLDRpci : T1pIs<(outs tGPR:$Rt), (ins t_addrmode_pc:$addr), IIC_iLoad_i,
+ "ldr", "\t$Rt, $addr",
+ [(set tGPR:$Rt, (load (ARMWrapper tconstpool:$addr)))]>,
+ T1Encoding<{0,1,0,0,1,?}> {
+ // A6.2 & A8.6.59
+ bits<3> Rt;
+ bits<8> addr;
+ let Inst{10-8} = Rt;
+ let Inst{7-0} = addr;
+}
+
+// SP-relative loads should be matched before standard immediate-offset loads as
+// it means we avoid having to move SP to another register.
+let canFoldAsLoad = 1 in
+def tLDRspi : T1pIs<(outs tGPR:$Rt), (ins t_addrmode_sp:$addr), IIC_iLoad_i,
+ "ldr", "\t$Rt, $addr",
+ [(set tGPR:$Rt, (load t_addrmode_sp:$addr))]>,
+ T1LdStSP<{1,?,?}> {
+ bits<3> Rt;
+ bits<8> addr;
+ let Inst{10-8} = Rt;
+ let Inst{7-0} = addr;
+}
+
+// Loads: reg/reg and reg/imm5
+let canFoldAsLoad = 1, isReMaterializable = 1 in
+multiclass thumb_ld_rr_ri_enc<bits<3> reg_opc, bits<4> imm_opc,
+ Operand AddrMode_r, Operand AddrMode_i,
+ AddrMode am, InstrItinClass itin_r,
+ InstrItinClass itin_i, string asm,
+ PatFrag opnode> {
+ // Immediate-offset loads should be matched before register-offset loads as
+ // when the offset is a constant it's simpler to first check if it fits in the
+ // immediate offset field then fall back to register-offset if it doesn't.
+ def i : // reg/imm5
+ T1pILdStEncodeImm<imm_opc, 1 /* Load */,
+ (outs tGPR:$Rt), (ins AddrMode_i:$addr),
+ am, itin_i, asm, "\t$Rt, $addr",
+ [(set tGPR:$Rt, (opnode AddrMode_i:$addr))]>;
+ // Register-offset loads are matched last.
+ def r : // reg/reg
+ T1pILdStEncode<reg_opc,
+ (outs tGPR:$Rt), (ins AddrMode_r:$addr),
+ am, itin_r, asm, "\t$Rt, $addr",
+ [(set tGPR:$Rt, (opnode AddrMode_r:$addr))]>;
+}
+// Stores: reg/reg and reg/imm5
+multiclass thumb_st_rr_ri_enc<bits<3> reg_opc, bits<4> imm_opc,
+ Operand AddrMode_r, Operand AddrMode_i,
+ AddrMode am, InstrItinClass itin_r,
+ InstrItinClass itin_i, string asm,
+ PatFrag opnode> {
+ def i : // reg/imm5
+ T1pILdStEncodeImm<imm_opc, 0 /* Store */,
+ (outs), (ins tGPR:$Rt, AddrMode_i:$addr),
+ am, itin_i, asm, "\t$Rt, $addr",
+ [(opnode tGPR:$Rt, AddrMode_i:$addr)]>;
+ def r : // reg/reg
+ T1pILdStEncode<reg_opc,
+ (outs), (ins tGPR:$Rt, AddrMode_r:$addr),
+ am, itin_r, asm, "\t$Rt, $addr",
+ [(opnode tGPR:$Rt, AddrMode_r:$addr)]>;
+}
+
+// A8.6.57 & A8.6.60
+defm tLDR : thumb_ld_rr_ri_enc<0b100, 0b0110, t_addrmode_rr,
+ t_addrmode_is4, AddrModeT1_4,
+ IIC_iLoad_r, IIC_iLoad_i, "ldr",
+ UnOpFrag<(load node:$Src)>>;
+
+// A8.6.64 & A8.6.61
+defm tLDRB : thumb_ld_rr_ri_enc<0b110, 0b0111, t_addrmode_rr,
+ t_addrmode_is1, AddrModeT1_1,
+ IIC_iLoad_bh_r, IIC_iLoad_bh_i, "ldrb",
+ UnOpFrag<(zextloadi8 node:$Src)>>;
+
+// A8.6.76 & A8.6.73
+defm tLDRH : thumb_ld_rr_ri_enc<0b101, 0b1000, t_addrmode_rr,
+ t_addrmode_is2, AddrModeT1_2,
+ IIC_iLoad_bh_r, IIC_iLoad_bh_i, "ldrh",
+ UnOpFrag<(zextloadi16 node:$Src)>>;
+
+let AddedComplexity = 10 in
+def tLDRSB : // A8.6.80
+ T1pILdStEncode<0b011, (outs tGPR:$Rt), (ins t_addrmode_rr:$addr),
+ AddrModeT1_1, IIC_iLoad_bh_r,
+ "ldrsb", "\t$Rt, $addr",
+ [(set tGPR:$Rt, (sextloadi8 t_addrmode_rr:$addr))]>;
+
+let AddedComplexity = 10 in
+def tLDRSH : // A8.6.84
+ T1pILdStEncode<0b111, (outs tGPR:$Rt), (ins t_addrmode_rr:$addr),
+ AddrModeT1_2, IIC_iLoad_bh_r,
+ "ldrsh", "\t$Rt, $addr",
+ [(set tGPR:$Rt, (sextloadi16 t_addrmode_rr:$addr))]>;
+
+
+def tSTRspi : T1pIs<(outs), (ins tGPR:$Rt, t_addrmode_sp:$addr), IIC_iStore_i,
+ "str", "\t$Rt, $addr",
+ [(store tGPR:$Rt, t_addrmode_sp:$addr)]>,
+ T1LdStSP<{0,?,?}> {
+ bits<3> Rt;
+ bits<8> addr;
+ let Inst{10-8} = Rt;
+ let Inst{7-0} = addr;
+}
+
+// A8.6.194 & A8.6.192
+defm tSTR : thumb_st_rr_ri_enc<0b000, 0b0110, t_addrmode_rr,
+ t_addrmode_is4, AddrModeT1_4,
+ IIC_iStore_r, IIC_iStore_i, "str",
+ BinOpFrag<(store node:$LHS, node:$RHS)>>;
+
+// A8.6.197 & A8.6.195
+defm tSTRB : thumb_st_rr_ri_enc<0b010, 0b0111, t_addrmode_rr,
+ t_addrmode_is1, AddrModeT1_1,
+ IIC_iStore_bh_r, IIC_iStore_bh_i, "strb",
+ BinOpFrag<(truncstorei8 node:$LHS, node:$RHS)>>;
+
+// A8.6.207 & A8.6.205
+defm tSTRH : thumb_st_rr_ri_enc<0b001, 0b1000, t_addrmode_rr,
+ t_addrmode_is2, AddrModeT1_2,
+ IIC_iStore_bh_r, IIC_iStore_bh_i, "strh",
+ BinOpFrag<(truncstorei16 node:$LHS, node:$RHS)>>;
+
+
+//===----------------------------------------------------------------------===//
+// Load / store multiple Instructions.
+//
+
+// These require base address to be written back or one of the loaded regs.
+let hasSideEffects = 0 in {
+
+let mayLoad = 1, hasExtraDefRegAllocReq = 1 in
+def tLDMIA : T1I<(outs), (ins tGPR:$Rn, pred:$p, reglist:$regs, variable_ops),
+ IIC_iLoad_m, "ldm${p}\t$Rn, $regs", []>, T1Encoding<{1,1,0,0,1,?}> {
+ bits<3> Rn;
+ bits<8> regs;
+ let Inst{10-8} = Rn;
+ let Inst{7-0} = regs;
+}
+
+// Writeback version is just a pseudo, as there's no encoding difference.
+// Writeback happens iff the base register is not in the destination register
+// list.
+let mayLoad = 1, hasExtraDefRegAllocReq = 1 in
+def tLDMIA_UPD :
+ InstTemplate<AddrModeNone, 0, IndexModeNone, Pseudo, GenericDomain,
+ "$Rn = $wb", IIC_iLoad_mu>,
+ PseudoInstExpansion<(tLDMIA tGPR:$Rn, pred:$p, reglist:$regs)> {
+ let Size = 2;
+ let OutOperandList = (outs GPR:$wb);
+ let InOperandList = (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops);
+ let Pattern = [];
+ let isCodeGenOnly = 1;
+ let isPseudo = 1;
+ list<Predicate> Predicates = [IsThumb];
+}
+
+// There is no non-writeback version of STM for Thumb.
+let mayStore = 1, hasExtraSrcRegAllocReq = 1 in
+def tSTMIA_UPD : Thumb1I<(outs GPR:$wb),
+ (ins tGPR:$Rn, pred:$p, reglist:$regs, variable_ops),
+ AddrModeNone, 2, IIC_iStore_mu,
+ "stm${p}\t$Rn!, $regs", "$Rn = $wb", []>,
+ T1Encoding<{1,1,0,0,0,?}> {
+ bits<3> Rn;
+ bits<8> regs;
+ let Inst{10-8} = Rn;
+ let Inst{7-0} = regs;
+}
+
+} // hasSideEffects
+
+def : InstAlias<"ldm${p} $Rn!, $regs",
+ (tLDMIA tGPR:$Rn, pred:$p, reglist:$regs)>,
+ Requires<[IsThumb, IsThumb1Only]>;
+
+let mayLoad = 1, Uses = [SP], Defs = [SP], hasExtraDefRegAllocReq = 1 in
+def tPOP : T1I<(outs), (ins pred:$p, reglist:$regs, variable_ops),
+ IIC_iPop,
+ "pop${p}\t$regs", []>,
+ T1Misc<{1,1,0,?,?,?,?}> {
+ bits<16> regs;
+ let Inst{8} = regs{15};
+ let Inst{7-0} = regs{7-0};
+}
+
+let mayStore = 1, Uses = [SP], Defs = [SP], hasExtraSrcRegAllocReq = 1 in
+def tPUSH : T1I<(outs), (ins pred:$p, reglist:$regs, variable_ops),
+ IIC_iStore_m,
+ "push${p}\t$regs", []>,
+ T1Misc<{0,1,0,?,?,?,?}> {
+ bits<16> regs;
+ let Inst{8} = regs{14};
+ let Inst{7-0} = regs{7-0};
+}
+
+//===----------------------------------------------------------------------===//
+// Arithmetic Instructions.
+//
+
+// Helper classes for encoding T1pI patterns:
+class T1pIDPEncode<bits<4> opA, dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T1pI<oops, iops, itin, opc, asm, pattern>,
+ T1DataProcessing<opA> {
+ bits<3> Rm;
+ bits<3> Rn;
+ let Inst{5-3} = Rm;
+ let Inst{2-0} = Rn;
+}
+class T1pIMiscEncode<bits<7> opA, dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T1pI<oops, iops, itin, opc, asm, pattern>,
+ T1Misc<opA> {
+ bits<3> Rm;
+ bits<3> Rd;
+ let Inst{5-3} = Rm;
+ let Inst{2-0} = Rd;
+}
+
+// Helper classes for encoding T1sI patterns:
+class T1sIDPEncode<bits<4> opA, dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T1sI<oops, iops, itin, opc, asm, pattern>,
+ T1DataProcessing<opA> {
+ bits<3> Rd;
+ bits<3> Rn;
+ let Inst{5-3} = Rn;
+ let Inst{2-0} = Rd;
+}
+class T1sIGenEncode<bits<5> opA, dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T1sI<oops, iops, itin, opc, asm, pattern>,
+ T1General<opA> {
+ bits<3> Rm;
+ bits<3> Rn;
+ bits<3> Rd;
+ let Inst{8-6} = Rm;
+ let Inst{5-3} = Rn;
+ let Inst{2-0} = Rd;
+}
+class T1sIGenEncodeImm<bits<5> opA, dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T1sI<oops, iops, itin, opc, asm, pattern>,
+ T1General<opA> {
+ bits<3> Rd;
+ bits<3> Rm;
+ let Inst{5-3} = Rm;
+ let Inst{2-0} = Rd;
+}
+
+// Helper classes for encoding T1sIt patterns:
+class T1sItDPEncode<bits<4> opA, dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T1sIt<oops, iops, itin, opc, asm, pattern>,
+ T1DataProcessing<opA> {
+ bits<3> Rdn;
+ bits<3> Rm;
+ let Inst{5-3} = Rm;
+ let Inst{2-0} = Rdn;
+}
+class T1sItGenEncodeImm<bits<5> opA, dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T1sIt<oops, iops, itin, opc, asm, pattern>,
+ T1General<opA> {
+ bits<3> Rdn;
+ bits<8> imm8;
+ let Inst{10-8} = Rdn;
+ let Inst{7-0} = imm8;
+}
+
+// Add with carry register
+let isCommutable = 1, Uses = [CPSR] in
+def tADC : // A8.6.2
+ T1sItDPEncode<0b0101, (outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm), IIC_iALUr,
+ "adc", "\t$Rdn, $Rm",
+ [(set tGPR:$Rdn, (adde tGPR:$Rn, tGPR:$Rm))]>, Sched<[WriteALU]>;
+
+// Add immediate
+def tADDi3 : // A8.6.4 T1
+ T1sIGenEncodeImm<0b01110, (outs tGPR:$Rd), (ins tGPR:$Rm, imm0_7:$imm3),
+ IIC_iALUi,
+ "add", "\t$Rd, $Rm, $imm3",
+ [(set tGPR:$Rd, (add tGPR:$Rm, imm0_7:$imm3))]>,
+ Sched<[WriteALU]> {
+ bits<3> imm3;
+ let Inst{8-6} = imm3;
+}
+
+def tADDi8 : // A8.6.4 T2
+ T1sItGenEncodeImm<{1,1,0,?,?}, (outs tGPR:$Rdn),
+ (ins tGPR:$Rn, imm0_255:$imm8), IIC_iALUi,
+ "add", "\t$Rdn, $imm8",
+ [(set tGPR:$Rdn, (add tGPR:$Rn, imm8_255:$imm8))]>,
+ Sched<[WriteALU]>;
+
+// Add register
+let isCommutable = 1 in
+def tADDrr : // A8.6.6 T1
+ T1sIGenEncode<0b01100, (outs tGPR:$Rd), (ins tGPR:$Rn, tGPR:$Rm),
+ IIC_iALUr,
+ "add", "\t$Rd, $Rn, $Rm",
+ [(set tGPR:$Rd, (add tGPR:$Rn, tGPR:$Rm))]>, Sched<[WriteALU]>;
+
+let hasSideEffects = 0 in
+def tADDhirr : T1pIt<(outs GPR:$Rdn), (ins GPR:$Rn, GPR:$Rm), IIC_iALUr,
+ "add", "\t$Rdn, $Rm", []>,
+ T1Special<{0,0,?,?}>, Sched<[WriteALU]> {
+ // A8.6.6 T2
+ bits<4> Rdn;
+ bits<4> Rm;
+ let Inst{7} = Rdn{3};
+ let Inst{6-3} = Rm;
+ let Inst{2-0} = Rdn{2-0};
+}
+
+// AND register
+let isCommutable = 1 in
+def tAND : // A8.6.12
+ T1sItDPEncode<0b0000, (outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm),
+ IIC_iBITr,
+ "and", "\t$Rdn, $Rm",
+ [(set tGPR:$Rdn, (and tGPR:$Rn, tGPR:$Rm))]>, Sched<[WriteALU]>;
+
+// ASR immediate
+def tASRri : // A8.6.14
+ T1sIGenEncodeImm<{0,1,0,?,?}, (outs tGPR:$Rd), (ins tGPR:$Rm, imm_sr:$imm5),
+ IIC_iMOVsi,
+ "asr", "\t$Rd, $Rm, $imm5",
+ [(set tGPR:$Rd, (sra tGPR:$Rm, (i32 imm_sr:$imm5)))]>,
+ Sched<[WriteALU]> {
+ bits<5> imm5;
+ let Inst{10-6} = imm5;
+}
+
+// ASR register
+def tASRrr : // A8.6.15
+ T1sItDPEncode<0b0100, (outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm),
+ IIC_iMOVsr,
+ "asr", "\t$Rdn, $Rm",
+ [(set tGPR:$Rdn, (sra tGPR:$Rn, tGPR:$Rm))]>, Sched<[WriteALU]>;
+
+// BIC register
+def tBIC : // A8.6.20
+ T1sItDPEncode<0b1110, (outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm),
+ IIC_iBITr,
+ "bic", "\t$Rdn, $Rm",
+ [(set tGPR:$Rdn, (and tGPR:$Rn, (not tGPR:$Rm)))]>,
+ Sched<[WriteALU]>;
+
+// CMN register
+let isCompare = 1, Defs = [CPSR] in {
+//FIXME: Disable CMN, as CCodes are backwards from compare expectations
+// Compare-to-zero still works out, just not the relationals
+//def tCMN : // A8.6.33
+// T1pIDPEncode<0b1011, (outs), (ins tGPR:$lhs, tGPR:$rhs),
+// IIC_iCMPr,
+// "cmn", "\t$lhs, $rhs",
+// [(ARMcmp tGPR:$lhs, (ineg tGPR:$rhs))]>;
+
+def tCMNz : // A8.6.33
+ T1pIDPEncode<0b1011, (outs), (ins tGPR:$Rn, tGPR:$Rm),
+ IIC_iCMPr,
+ "cmn", "\t$Rn, $Rm",
+ [(ARMcmpZ tGPR:$Rn, (ineg tGPR:$Rm))]>, Sched<[WriteCMP]>;
+
+} // isCompare = 1, Defs = [CPSR]
+
+// CMP immediate
+let isCompare = 1, Defs = [CPSR] in {
+def tCMPi8 : T1pI<(outs), (ins tGPR:$Rn, imm0_255:$imm8), IIC_iCMPi,
+ "cmp", "\t$Rn, $imm8",
+ [(ARMcmp tGPR:$Rn, imm0_255:$imm8)]>,
+ T1General<{1,0,1,?,?}>, Sched<[WriteCMP]> {
+ // A8.6.35
+ bits<3> Rn;
+ bits<8> imm8;
+ let Inst{10-8} = Rn;
+ let Inst{7-0} = imm8;
+}
+
+// CMP register
+def tCMPr : // A8.6.36 T1
+ T1pIDPEncode<0b1010, (outs), (ins tGPR:$Rn, tGPR:$Rm),
+ IIC_iCMPr,
+ "cmp", "\t$Rn, $Rm",
+ [(ARMcmp tGPR:$Rn, tGPR:$Rm)]>, Sched<[WriteCMP]>;
+
+def tCMPhir : T1pI<(outs), (ins GPR:$Rn, GPR:$Rm), IIC_iCMPr,
+ "cmp", "\t$Rn, $Rm", []>,
+ T1Special<{0,1,?,?}>, Sched<[WriteCMP]> {
+ // A8.6.36 T2
+ bits<4> Rm;
+ bits<4> Rn;
+ let Inst{7} = Rn{3};
+ let Inst{6-3} = Rm;
+ let Inst{2-0} = Rn{2-0};
+}
+} // isCompare = 1, Defs = [CPSR]
+
+
+// XOR register
+let isCommutable = 1 in
+def tEOR : // A8.6.45
+ T1sItDPEncode<0b0001, (outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm),
+ IIC_iBITr,
+ "eor", "\t$Rdn, $Rm",
+ [(set tGPR:$Rdn, (xor tGPR:$Rn, tGPR:$Rm))]>, Sched<[WriteALU]>;
+
+// LSL immediate
+def tLSLri : // A8.6.88
+ T1sIGenEncodeImm<{0,0,0,?,?}, (outs tGPR:$Rd), (ins tGPR:$Rm, imm0_31:$imm5),
+ IIC_iMOVsi,
+ "lsl", "\t$Rd, $Rm, $imm5",
+ [(set tGPR:$Rd, (shl tGPR:$Rm, (i32 imm:$imm5)))]>,
+ Sched<[WriteALU]> {
+ bits<5> imm5;
+ let Inst{10-6} = imm5;
+}
+
+// LSL register
+def tLSLrr : // A8.6.89
+ T1sItDPEncode<0b0010, (outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm),
+ IIC_iMOVsr,
+ "lsl", "\t$Rdn, $Rm",
+ [(set tGPR:$Rdn, (shl tGPR:$Rn, tGPR:$Rm))]>, Sched<[WriteALU]>;
+
+// LSR immediate
+def tLSRri : // A8.6.90
+ T1sIGenEncodeImm<{0,0,1,?,?}, (outs tGPR:$Rd), (ins tGPR:$Rm, imm_sr:$imm5),
+ IIC_iMOVsi,
+ "lsr", "\t$Rd, $Rm, $imm5",
+ [(set tGPR:$Rd, (srl tGPR:$Rm, (i32 imm_sr:$imm5)))]>,
+ Sched<[WriteALU]> {
+ bits<5> imm5;
+ let Inst{10-6} = imm5;
+}
+
+// LSR register
+def tLSRrr : // A8.6.91
+ T1sItDPEncode<0b0011, (outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm),
+ IIC_iMOVsr,
+ "lsr", "\t$Rdn, $Rm",
+ [(set tGPR:$Rdn, (srl tGPR:$Rn, tGPR:$Rm))]>, Sched<[WriteALU]>;
+
+// Move register
+let isMoveImm = 1 in
+def tMOVi8 : T1sI<(outs tGPR:$Rd), (ins imm0_255:$imm8), IIC_iMOVi,
+ "mov", "\t$Rd, $imm8",
+ [(set tGPR:$Rd, imm0_255:$imm8)]>,
+ T1General<{1,0,0,?,?}>, Sched<[WriteALU]> {
+ // A8.6.96
+ bits<3> Rd;
+ bits<8> imm8;
+ let Inst{10-8} = Rd;
+ let Inst{7-0} = imm8;
+}
+// Because we have an explicit tMOVSr below, we need an alias to handle
+// the immediate "movs" form here. Blech.
+def : tInstAlias <"movs $Rdn, $imm",
+ (tMOVi8 tGPR:$Rdn, CPSR, imm0_255:$imm, 14, 0)>;
+
+// A7-73: MOV(2) - mov setting flag.
+
+let hasSideEffects = 0 in {
+def tMOVr : Thumb1pI<(outs GPR:$Rd), (ins GPR:$Rm), AddrModeNone,
+ 2, IIC_iMOVr,
+ "mov", "\t$Rd, $Rm", "", []>,
+ T1Special<{1,0,?,?}>, Sched<[WriteALU]> {
+ // A8.6.97
+ bits<4> Rd;
+ bits<4> Rm;
+ let Inst{7} = Rd{3};
+ let Inst{6-3} = Rm;
+ let Inst{2-0} = Rd{2-0};
+}
+let Defs = [CPSR] in
+def tMOVSr : T1I<(outs tGPR:$Rd), (ins tGPR:$Rm), IIC_iMOVr,
+ "movs\t$Rd, $Rm", []>, Encoding16, Sched<[WriteALU]> {
+ // A8.6.97
+ bits<3> Rd;
+ bits<3> Rm;
+ let Inst{15-6} = 0b0000000000;
+ let Inst{5-3} = Rm;
+ let Inst{2-0} = Rd;
+}
+} // hasSideEffects
+
+// Multiply register
+let isCommutable = 1 in
+def tMUL : // A8.6.105 T1
+ Thumb1sI<(outs tGPR:$Rd), (ins tGPR:$Rn, tGPR:$Rm), AddrModeNone, 2,
+ IIC_iMUL32, "mul", "\t$Rd, $Rn, $Rm", "$Rm = $Rd",
+ [(set tGPR:$Rd, (mul tGPR:$Rn, tGPR:$Rm))]>,
+ T1DataProcessing<0b1101> {
+ bits<3> Rd;
+ bits<3> Rn;
+ let Inst{5-3} = Rn;
+ let Inst{2-0} = Rd;
+ let AsmMatchConverter = "cvtThumbMultiply";
+}
+
+def :tInstAlias<"mul${s}${p} $Rdm, $Rn", (tMUL tGPR:$Rdm, s_cc_out:$s, tGPR:$Rn,
+ pred:$p)>;
+
+// Move inverse register
+def tMVN : // A8.6.107
+ T1sIDPEncode<0b1111, (outs tGPR:$Rd), (ins tGPR:$Rn), IIC_iMVNr,
+ "mvn", "\t$Rd, $Rn",
+ [(set tGPR:$Rd, (not tGPR:$Rn))]>, Sched<[WriteALU]>;
+
+// Bitwise or register
+let isCommutable = 1 in
+def tORR : // A8.6.114
+ T1sItDPEncode<0b1100, (outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm),
+ IIC_iBITr,
+ "orr", "\t$Rdn, $Rm",
+ [(set tGPR:$Rdn, (or tGPR:$Rn, tGPR:$Rm))]>, Sched<[WriteALU]>;
+
+// Swaps
+def tREV : // A8.6.134
+ T1pIMiscEncode<{1,0,1,0,0,0,?}, (outs tGPR:$Rd), (ins tGPR:$Rm),
+ IIC_iUNAr,
+ "rev", "\t$Rd, $Rm",
+ [(set tGPR:$Rd, (bswap tGPR:$Rm))]>,
+ Requires<[IsThumb, IsThumb1Only, HasV6]>, Sched<[WriteALU]>;
+
+def tREV16 : // A8.6.135
+ T1pIMiscEncode<{1,0,1,0,0,1,?}, (outs tGPR:$Rd), (ins tGPR:$Rm),
+ IIC_iUNAr,
+ "rev16", "\t$Rd, $Rm",
+ [(set tGPR:$Rd, (rotr (bswap tGPR:$Rm), (i32 16)))]>,
+ Requires<[IsThumb, IsThumb1Only, HasV6]>, Sched<[WriteALU]>;
+
+def tREVSH : // A8.6.136
+ T1pIMiscEncode<{1,0,1,0,1,1,?}, (outs tGPR:$Rd), (ins tGPR:$Rm),
+ IIC_iUNAr,
+ "revsh", "\t$Rd, $Rm",
+ [(set tGPR:$Rd, (sra (bswap tGPR:$Rm), (i32 16)))]>,
+ Requires<[IsThumb, IsThumb1Only, HasV6]>, Sched<[WriteALU]>;
+
+// Rotate right register
+def tROR : // A8.6.139
+ T1sItDPEncode<0b0111, (outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm),
+ IIC_iMOVsr,
+ "ror", "\t$Rdn, $Rm",
+ [(set tGPR:$Rdn, (rotr tGPR:$Rn, tGPR:$Rm))]>,
+ Sched<[WriteALU]>;
+
+// Negate register
+def tRSB : // A8.6.141
+ T1sIDPEncode<0b1001, (outs tGPR:$Rd), (ins tGPR:$Rn),
+ IIC_iALUi,
+ "rsb", "\t$Rd, $Rn, #0",
+ [(set tGPR:$Rd, (ineg tGPR:$Rn))]>, Sched<[WriteALU]>;
+
+// Subtract with carry register
+let Uses = [CPSR] in
+def tSBC : // A8.6.151
+ T1sItDPEncode<0b0110, (outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm),
+ IIC_iALUr,
+ "sbc", "\t$Rdn, $Rm",
+ [(set tGPR:$Rdn, (sube tGPR:$Rn, tGPR:$Rm))]>,
+ Sched<[WriteALU]>;
+
+// Subtract immediate
+def tSUBi3 : // A8.6.210 T1
+ T1sIGenEncodeImm<0b01111, (outs tGPR:$Rd), (ins tGPR:$Rm, imm0_7:$imm3),
+ IIC_iALUi,
+ "sub", "\t$Rd, $Rm, $imm3",
+ [(set tGPR:$Rd, (add tGPR:$Rm, imm0_7_neg:$imm3))]>,
+ Sched<[WriteALU]> {
+ bits<3> imm3;
+ let Inst{8-6} = imm3;
+}
+
+def tSUBi8 : // A8.6.210 T2
+ T1sItGenEncodeImm<{1,1,1,?,?}, (outs tGPR:$Rdn),
+ (ins tGPR:$Rn, imm0_255:$imm8), IIC_iALUi,
+ "sub", "\t$Rdn, $imm8",
+ [(set tGPR:$Rdn, (add tGPR:$Rn, imm8_255_neg:$imm8))]>,
+ Sched<[WriteALU]>;
+
+// Subtract register
+def tSUBrr : // A8.6.212
+ T1sIGenEncode<0b01101, (outs tGPR:$Rd), (ins tGPR:$Rn, tGPR:$Rm),
+ IIC_iALUr,
+ "sub", "\t$Rd, $Rn, $Rm",
+ [(set tGPR:$Rd, (sub tGPR:$Rn, tGPR:$Rm))]>,
+ Sched<[WriteALU]>;
+
+// Sign-extend byte
+def tSXTB : // A8.6.222
+ T1pIMiscEncode<{0,0,1,0,0,1,?}, (outs tGPR:$Rd), (ins tGPR:$Rm),
+ IIC_iUNAr,
+ "sxtb", "\t$Rd, $Rm",
+ [(set tGPR:$Rd, (sext_inreg tGPR:$Rm, i8))]>,
+ Requires<[IsThumb, IsThumb1Only, HasV6]>,
+ Sched<[WriteALU]>;
+
+// Sign-extend short
+def tSXTH : // A8.6.224
+ T1pIMiscEncode<{0,0,1,0,0,0,?}, (outs tGPR:$Rd), (ins tGPR:$Rm),
+ IIC_iUNAr,
+ "sxth", "\t$Rd, $Rm",
+ [(set tGPR:$Rd, (sext_inreg tGPR:$Rm, i16))]>,
+ Requires<[IsThumb, IsThumb1Only, HasV6]>,
+ Sched<[WriteALU]>;
+
+// Test
+let isCompare = 1, isCommutable = 1, Defs = [CPSR] in
+def tTST : // A8.6.230
+ T1pIDPEncode<0b1000, (outs), (ins tGPR:$Rn, tGPR:$Rm), IIC_iTSTr,
+ "tst", "\t$Rn, $Rm",
+ [(ARMcmpZ (and_su tGPR:$Rn, tGPR:$Rm), 0)]>,
+ Sched<[WriteALU]>;
+
+// A8.8.247 UDF - Undefined (Encoding T1)
+def tUDF : TI<(outs), (ins imm0_255:$imm8), IIC_Br, "udf\t$imm8",
+ [(int_arm_undefined imm0_255:$imm8)]>, Encoding16 {
+ bits<8> imm8;
+ let Inst{15-12} = 0b1101;
+ let Inst{11-8} = 0b1110;
+ let Inst{7-0} = imm8;
+}
+
+// Zero-extend byte
+def tUXTB : // A8.6.262
+ T1pIMiscEncode<{0,0,1,0,1,1,?}, (outs tGPR:$Rd), (ins tGPR:$Rm),
+ IIC_iUNAr,
+ "uxtb", "\t$Rd, $Rm",
+ [(set tGPR:$Rd, (and tGPR:$Rm, 0xFF))]>,
+ Requires<[IsThumb, IsThumb1Only, HasV6]>,
+ Sched<[WriteALU]>;
+
+// Zero-extend short
+def tUXTH : // A8.6.264
+ T1pIMiscEncode<{0,0,1,0,1,0,?}, (outs tGPR:$Rd), (ins tGPR:$Rm),
+ IIC_iUNAr,
+ "uxth", "\t$Rd, $Rm",
+ [(set tGPR:$Rd, (and tGPR:$Rm, 0xFFFF))]>,
+ Requires<[IsThumb, IsThumb1Only, HasV6]>, Sched<[WriteALU]>;
+
+// Conditional move tMOVCCr - Used to implement the Thumb SELECT_CC operation.
+// Expanded after instruction selection into a branch sequence.
+let usesCustomInserter = 1 in // Expanded after instruction selection.
+ def tMOVCCr_pseudo :
+ PseudoInst<(outs tGPR:$dst), (ins tGPR:$false, tGPR:$true, cmovpred:$p),
+ NoItinerary,
+ [(set tGPR:$dst, (ARMcmov tGPR:$false, tGPR:$true, cmovpred:$p))]>;
+
+// tLEApcrel - Load a pc-relative address into a register without offending the
+// assembler.
+
+def tADR : T1I<(outs tGPR:$Rd), (ins t_adrlabel:$addr, pred:$p),
+ IIC_iALUi, "adr{$p}\t$Rd, $addr", []>,
+ T1Encoding<{1,0,1,0,0,?}>, Sched<[WriteALU]> {
+ bits<3> Rd;
+ bits<8> addr;
+ let Inst{10-8} = Rd;
+ let Inst{7-0} = addr;
+ let DecoderMethod = "DecodeThumbAddSpecialReg";
+}
+
+let hasSideEffects = 0, isReMaterializable = 1 in
+def tLEApcrel : tPseudoInst<(outs tGPR:$Rd), (ins i32imm:$label, pred:$p),
+ 2, IIC_iALUi, []>, Sched<[WriteALU]>;
+
+let hasSideEffects = 1 in
+def tLEApcrelJT : tPseudoInst<(outs tGPR:$Rd),
+ (ins i32imm:$label, pred:$p),
+ 2, IIC_iALUi, []>, Sched<[WriteALU]>;
+
+//===----------------------------------------------------------------------===//
+// TLS Instructions
+//
+
+// __aeabi_read_tp preserves the registers r1-r3.
+// This is a pseudo inst so that we can get the encoding right,
+// complete with fixup for the aeabi_read_tp function.
+let isCall = 1, Defs = [R0, R12, LR, CPSR], Uses = [SP] in
+def tTPsoft : tPseudoInst<(outs), (ins), 4, IIC_Br,
+ [(set R0, ARMthread_pointer)]>,
+ Sched<[WriteBr]>;
+
+//===----------------------------------------------------------------------===//
+// SJLJ Exception handling intrinsics
+//
+
+// eh_sjlj_setjmp() is an instruction sequence to store the return address and
+// save #0 in R0 for the non-longjmp case. Since by its nature we may be coming
+// from some other function to get here, and we're using the stack frame for the
+// containing function to save/restore registers, we can't keep anything live in
+// regs across the eh_sjlj_setjmp(), else it will almost certainly have been
+// tromped upon when we get here from a longjmp(). We force everything out of
+// registers except for our own input by listing the relevant registers in
+// Defs. By doing so, we also cause the prologue/epilogue code to actively
+// preserve all of the callee-saved resgisters, which is exactly what we want.
+// $val is a scratch register for our use.
+let Defs = [ R0, R1, R2, R3, R4, R5, R6, R7, R12, CPSR ],
+ hasSideEffects = 1, isBarrier = 1, isCodeGenOnly = 1,
+ usesCustomInserter = 1 in
+def tInt_eh_sjlj_setjmp : ThumbXI<(outs),(ins tGPR:$src, tGPR:$val),
+ AddrModeNone, 0, NoItinerary, "","",
+ [(set R0, (ARMeh_sjlj_setjmp tGPR:$src, tGPR:$val))]>;
+
+// FIXME: Non-IOS version(s)
+let isBarrier = 1, hasSideEffects = 1, isTerminator = 1, isCodeGenOnly = 1,
+ Defs = [ R7, LR, SP ] in
+def tInt_eh_sjlj_longjmp : XI<(outs), (ins GPR:$src, GPR:$scratch),
+ AddrModeNone, 0, IndexModeNone,
+ Pseudo, NoItinerary, "", "",
+ [(ARMeh_sjlj_longjmp GPR:$src, GPR:$scratch)]>,
+ Requires<[IsThumb]>;
+
+//===----------------------------------------------------------------------===//
+// Non-Instruction Patterns
+//
+
+// Comparisons
+def : T1Pat<(ARMcmpZ tGPR:$Rn, imm0_255:$imm8),
+ (tCMPi8 tGPR:$Rn, imm0_255:$imm8)>;
+def : T1Pat<(ARMcmpZ tGPR:$Rn, tGPR:$Rm),
+ (tCMPr tGPR:$Rn, tGPR:$Rm)>;
+
+// Add with carry
+def : T1Pat<(addc tGPR:$lhs, imm0_7:$rhs),
+ (tADDi3 tGPR:$lhs, imm0_7:$rhs)>;
+def : T1Pat<(addc tGPR:$lhs, imm8_255:$rhs),
+ (tADDi8 tGPR:$lhs, imm8_255:$rhs)>;
+def : T1Pat<(addc tGPR:$lhs, tGPR:$rhs),
+ (tADDrr tGPR:$lhs, tGPR:$rhs)>;
+
+// Subtract with carry
+def : T1Pat<(addc tGPR:$lhs, imm0_7_neg:$rhs),
+ (tSUBi3 tGPR:$lhs, imm0_7_neg:$rhs)>;
+def : T1Pat<(addc tGPR:$lhs, imm8_255_neg:$rhs),
+ (tSUBi8 tGPR:$lhs, imm8_255_neg:$rhs)>;
+def : T1Pat<(subc tGPR:$lhs, tGPR:$rhs),
+ (tSUBrr tGPR:$lhs, tGPR:$rhs)>;
+
+// Bswap 16 with load/store
+def : T1Pat<(srl (bswap (extloadi16 t_addrmode_is2:$addr)), (i32 16)),
+ (tREV16 (tLDRHi t_addrmode_is2:$addr))>;
+def : T1Pat<(srl (bswap (extloadi16 t_addrmode_rr:$addr)), (i32 16)),
+ (tREV16 (tLDRHr t_addrmode_rr:$addr))>;
+def : T1Pat<(truncstorei16 (srl (bswap tGPR:$Rn), (i32 16)),
+ t_addrmode_is2:$addr),
+ (tSTRHi(tREV16 tGPR:$Rn), t_addrmode_is2:$addr)>;
+def : T1Pat<(truncstorei16 (srl (bswap tGPR:$Rn), (i32 16)),
+ t_addrmode_rr:$addr),
+ (tSTRHr (tREV16 tGPR:$Rn), t_addrmode_rr:$addr)>;
+
+// ConstantPool
+def : T1Pat<(ARMWrapper tconstpool :$dst), (tLEApcrel tconstpool :$dst)>;
+
+// GlobalAddress
+def tLDRLIT_ga_pcrel : PseudoInst<(outs tGPR:$dst), (ins i32imm:$addr),
+ IIC_iLoadiALU,
+ [(set tGPR:$dst,
+ (ARMWrapperPIC tglobaladdr:$addr))]>,
+ Requires<[IsThumb, DontUseMovt]>;
+
+def tLDRLIT_ga_abs : PseudoInst<(outs tGPR:$dst), (ins i32imm:$src),
+ IIC_iLoad_i,
+ [(set tGPR:$dst,
+ (ARMWrapper tglobaladdr:$src))]>,
+ Requires<[IsThumb, DontUseMovt]>;
+
+
+// JumpTable
+def : T1Pat<(ARMWrapperJT tjumptable:$dst),
+ (tLEApcrelJT tjumptable:$dst)>;
+
+// Direct calls
+def : T1Pat<(ARMtcall texternalsym:$func), (tBL texternalsym:$func)>,
+ Requires<[IsThumb]>;
+
+def : Tv5Pat<(ARMcall texternalsym:$func), (tBLXi texternalsym:$func)>,
+ Requires<[IsThumb, HasV5T, IsNotMClass]>;
+
+// Indirect calls to ARM routines
+def : Tv5Pat<(ARMcall GPR:$dst), (tBLXr GPR:$dst)>,
+ Requires<[IsThumb, HasV5T]>;
+
+// zextload i1 -> zextload i8
+def : T1Pat<(zextloadi1 t_addrmode_is1:$addr),
+ (tLDRBi t_addrmode_is1:$addr)>;
+def : T1Pat<(zextloadi1 t_addrmode_rr:$addr),
+ (tLDRBr t_addrmode_rr:$addr)>;
+
+// extload from the stack -> word load from the stack, as it avoids having to
+// materialize the base in a separate register. This only works when a word
+// load puts the byte/halfword value in the same place in the register that the
+// byte/halfword load would, i.e. when little-endian.
+def : T1Pat<(extloadi1 t_addrmode_sp:$addr), (tLDRspi t_addrmode_sp:$addr)>,
+ Requires<[IsThumb, IsThumb1Only, IsLE]>;
+def : T1Pat<(extloadi8 t_addrmode_sp:$addr), (tLDRspi t_addrmode_sp:$addr)>,
+ Requires<[IsThumb, IsThumb1Only, IsLE]>;
+def : T1Pat<(extloadi16 t_addrmode_sp:$addr), (tLDRspi t_addrmode_sp:$addr)>,
+ Requires<[IsThumb, IsThumb1Only, IsLE]>;
+
+// extload -> zextload
+def : T1Pat<(extloadi1 t_addrmode_is1:$addr), (tLDRBi t_addrmode_is1:$addr)>;
+def : T1Pat<(extloadi1 t_addrmode_rr:$addr), (tLDRBr t_addrmode_rr:$addr)>;
+def : T1Pat<(extloadi8 t_addrmode_is1:$addr), (tLDRBi t_addrmode_is1:$addr)>;
+def : T1Pat<(extloadi8 t_addrmode_rr:$addr), (tLDRBr t_addrmode_rr:$addr)>;
+def : T1Pat<(extloadi16 t_addrmode_is2:$addr), (tLDRHi t_addrmode_is2:$addr)>;
+def : T1Pat<(extloadi16 t_addrmode_rr:$addr), (tLDRHr t_addrmode_rr:$addr)>;
+
+// If it's impossible to use [r,r] address mode for sextload, select to
+// ldr{b|h} + sxt{b|h} instead.
+def : T1Pat<(sextloadi8 t_addrmode_is1:$addr),
+ (tSXTB (tLDRBi t_addrmode_is1:$addr))>,
+ Requires<[IsThumb, IsThumb1Only, HasV6]>;
+def : T1Pat<(sextloadi8 t_addrmode_rr:$addr),
+ (tSXTB (tLDRBr t_addrmode_rr:$addr))>,
+ Requires<[IsThumb, IsThumb1Only, HasV6]>;
+def : T1Pat<(sextloadi16 t_addrmode_is2:$addr),
+ (tSXTH (tLDRHi t_addrmode_is2:$addr))>,
+ Requires<[IsThumb, IsThumb1Only, HasV6]>;
+def : T1Pat<(sextloadi16 t_addrmode_rr:$addr),
+ (tSXTH (tLDRHr t_addrmode_rr:$addr))>,
+ Requires<[IsThumb, IsThumb1Only, HasV6]>;
+
+def : T1Pat<(sextloadi8 t_addrmode_is1:$addr),
+ (tASRri (tLSLri (tLDRBi t_addrmode_is1:$addr), 24), 24)>;
+def : T1Pat<(sextloadi8 t_addrmode_rr:$addr),
+ (tASRri (tLSLri (tLDRBr t_addrmode_rr:$addr), 24), 24)>;
+def : T1Pat<(sextloadi16 t_addrmode_is2:$addr),
+ (tASRri (tLSLri (tLDRHi t_addrmode_is2:$addr), 16), 16)>;
+def : T1Pat<(sextloadi16 t_addrmode_rr:$addr),
+ (tASRri (tLSLri (tLDRHr t_addrmode_rr:$addr), 16), 16)>;
+
+def : T1Pat<(atomic_load_8 t_addrmode_is1:$src),
+ (tLDRBi t_addrmode_is1:$src)>;
+def : T1Pat<(atomic_load_8 t_addrmode_rr:$src),
+ (tLDRBr t_addrmode_rr:$src)>;
+def : T1Pat<(atomic_load_16 t_addrmode_is2:$src),
+ (tLDRHi t_addrmode_is2:$src)>;
+def : T1Pat<(atomic_load_16 t_addrmode_rr:$src),
+ (tLDRHr t_addrmode_rr:$src)>;
+def : T1Pat<(atomic_load_32 t_addrmode_is4:$src),
+ (tLDRi t_addrmode_is4:$src)>;
+def : T1Pat<(atomic_load_32 t_addrmode_rr:$src),
+ (tLDRr t_addrmode_rr:$src)>;
+def : T1Pat<(atomic_store_8 t_addrmode_is1:$ptr, tGPR:$val),
+ (tSTRBi tGPR:$val, t_addrmode_is1:$ptr)>;
+def : T1Pat<(atomic_store_8 t_addrmode_rr:$ptr, tGPR:$val),
+ (tSTRBr tGPR:$val, t_addrmode_rr:$ptr)>;
+def : T1Pat<(atomic_store_16 t_addrmode_is2:$ptr, tGPR:$val),
+ (tSTRHi tGPR:$val, t_addrmode_is2:$ptr)>;
+def : T1Pat<(atomic_store_16 t_addrmode_rr:$ptr, tGPR:$val),
+ (tSTRHr tGPR:$val, t_addrmode_rr:$ptr)>;
+def : T1Pat<(atomic_store_32 t_addrmode_is4:$ptr, tGPR:$val),
+ (tSTRi tGPR:$val, t_addrmode_is4:$ptr)>;
+def : T1Pat<(atomic_store_32 t_addrmode_rr:$ptr, tGPR:$val),
+ (tSTRr tGPR:$val, t_addrmode_rr:$ptr)>;
+
+// Large immediate handling.
+
+// Two piece imms.
+def : T1Pat<(i32 thumb_immshifted:$src),
+ (tLSLri (tMOVi8 (thumb_immshifted_val imm:$src)),
+ (thumb_immshifted_shamt imm:$src))>;
+
+def : T1Pat<(i32 imm0_255_comp:$src),
+ (tMVN (tMOVi8 (imm_comp_XFORM imm:$src)))>;
+
+// Pseudo instruction that combines ldr from constpool and add pc. This should
+// be expanded into two instructions late to allow if-conversion and
+// scheduling.
+let isReMaterializable = 1 in
+def tLDRpci_pic : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr, pclabel:$cp),
+ NoItinerary,
+ [(set GPR:$dst, (ARMpic_add (load (ARMWrapper tconstpool:$addr)),
+ imm:$cp))]>,
+ Requires<[IsThumb, IsThumb1Only]>;
+
+// Pseudo-instruction for merged POP and return.
+// FIXME: remove when we have a way to marking a MI with these properties.
+let isReturn = 1, isTerminator = 1, isBarrier = 1, mayLoad = 1,
+ hasExtraDefRegAllocReq = 1 in
+def tPOP_RET : tPseudoExpand<(outs), (ins pred:$p, reglist:$regs, variable_ops),
+ 2, IIC_iPop_Br, [],
+ (tPOP pred:$p, reglist:$regs)>, Sched<[WriteBrL]>;
+
+// Indirect branch using "mov pc, $Rm"
+let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
+ def tBRIND : tPseudoExpand<(outs), (ins GPR:$Rm, pred:$p),
+ 2, IIC_Br, [(brind GPR:$Rm)],
+ (tMOVr PC, GPR:$Rm, pred:$p)>, Sched<[WriteBr]>;
+}
+
+
+// In Thumb1, "nop" is encoded as a "mov r8, r8". Technically, the bf00
+// encoding is available on ARMv6K, but we don't differentiate that finely.
+def : InstAlias<"nop", (tMOVr R8, R8, 14, 0)>,Requires<[IsThumb, IsThumb1Only]>;
+
+
+// For round-trip assembly/disassembly, we have to handle a CPS instruction
+// without any iflags. That's not, strictly speaking, valid syntax, but it's
+// a useful extension and assembles to defined behaviour (the insn does
+// nothing).
+def : tInstAlias<"cps$imod", (tCPS imod_op:$imod, 0)>;
+def : tInstAlias<"cps$imod", (tCPS imod_op:$imod, 0)>;
+
+// "neg" is and alias for "rsb rd, rn, #0"
+def : tInstAlias<"neg${s}${p} $Rd, $Rm",
+ (tRSB tGPR:$Rd, s_cc_out:$s, tGPR:$Rm, pred:$p)>;
+
+
+// Implied destination operand forms for shifts.
+def : tInstAlias<"lsl${s}${p} $Rdm, $imm",
+ (tLSLri tGPR:$Rdm, cc_out:$s, tGPR:$Rdm, imm0_31:$imm, pred:$p)>;
+def : tInstAlias<"lsr${s}${p} $Rdm, $imm",
+ (tLSRri tGPR:$Rdm, cc_out:$s, tGPR:$Rdm, imm_sr:$imm, pred:$p)>;
+def : tInstAlias<"asr${s}${p} $Rdm, $imm",
+ (tASRri tGPR:$Rdm, cc_out:$s, tGPR:$Rdm, imm_sr:$imm, pred:$p)>;
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrThumb2.td b/contrib/llvm/lib/Target/ARM/ARMInstrThumb2.td
new file mode 100644
index 0000000..d460d33
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMInstrThumb2.td
@@ -0,0 +1,4765 @@
+//===-- ARMInstrThumb2.td - Thumb2 support for ARM ---------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the Thumb2 instruction set.
+//
+//===----------------------------------------------------------------------===//
+
+// IT block predicate field
+def it_pred_asmoperand : AsmOperandClass {
+ let Name = "ITCondCode";
+ let ParserMethod = "parseITCondCode";
+}
+def it_pred : Operand<i32> {
+ let PrintMethod = "printMandatoryPredicateOperand";
+ let ParserMatchClass = it_pred_asmoperand;
+}
+
+// IT block condition mask
+def it_mask_asmoperand : AsmOperandClass { let Name = "ITMask"; }
+def it_mask : Operand<i32> {
+ let PrintMethod = "printThumbITMask";
+ let ParserMatchClass = it_mask_asmoperand;
+}
+
+// t2_shift_imm: An integer that encodes a shift amount and the type of shift
+// (asr or lsl). The 6-bit immediate encodes as:
+// {5} 0 ==> lsl
+// 1 asr
+// {4-0} imm5 shift amount.
+// asr #32 not allowed
+def t2_shift_imm : Operand<i32> {
+ let PrintMethod = "printShiftImmOperand";
+ let ParserMatchClass = ShifterImmAsmOperand;
+ let DecoderMethod = "DecodeT2ShifterImmOperand";
+}
+
+// Shifted operands. No register controlled shifts for Thumb2.
+// Note: We do not support rrx shifted operands yet.
+def t2_so_reg : Operand<i32>, // reg imm
+ ComplexPattern<i32, 2, "SelectShiftImmShifterOperand",
+ [shl,srl,sra,rotr]> {
+ let EncoderMethod = "getT2SORegOpValue";
+ let PrintMethod = "printT2SOOperand";
+ let DecoderMethod = "DecodeSORegImmOperand";
+ let ParserMatchClass = ShiftedImmAsmOperand;
+ let MIOperandInfo = (ops rGPR, i32imm);
+}
+
+// t2_so_imm_not_XFORM - Return the complement of a t2_so_imm value
+def t2_so_imm_not_XFORM : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(~((uint32_t)N->getZExtValue()), SDLoc(N),
+ MVT::i32);
+}]>;
+
+// t2_so_imm_neg_XFORM - Return the negation of a t2_so_imm value
+def t2_so_imm_neg_XFORM : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(-((int)N->getZExtValue()), SDLoc(N),
+ MVT::i32);
+}]>;
+
+// so_imm_notSext_XFORM - Return a so_imm value packed into the format
+// described for so_imm_notSext def below, with sign extension from 16
+// bits.
+def t2_so_imm_notSext16_XFORM : SDNodeXForm<imm, [{
+ APInt apIntN = N->getAPIntValue();
+ unsigned N16bitSignExt = apIntN.trunc(16).sext(32).getZExtValue();
+ return CurDAG->getTargetConstant(~N16bitSignExt, SDLoc(N), MVT::i32);
+}]>;
+
+// t2_so_imm - Match a 32-bit immediate operand, which is an
+// 8-bit immediate rotated by an arbitrary number of bits, or an 8-bit
+// immediate splatted into multiple bytes of the word.
+def t2_so_imm_asmoperand : ImmAsmOperand { let Name = "T2SOImm"; }
+def t2_so_imm : Operand<i32>, ImmLeaf<i32, [{
+ return ARM_AM::getT2SOImmVal(Imm) != -1;
+ }]> {
+ let ParserMatchClass = t2_so_imm_asmoperand;
+ let EncoderMethod = "getT2SOImmOpValue";
+ let DecoderMethod = "DecodeT2SOImm";
+}
+
+// t2_so_imm_not - Match an immediate that is a complement
+// of a t2_so_imm.
+// Note: this pattern doesn't require an encoder method and such, as it's
+// only used on aliases (Pat<> and InstAlias<>). The actual encoding
+// is handled by the destination instructions, which use t2_so_imm.
+def t2_so_imm_not_asmoperand : AsmOperandClass { let Name = "T2SOImmNot"; }
+def t2_so_imm_not : Operand<i32>, PatLeaf<(imm), [{
+ return ARM_AM::getT2SOImmVal(~((uint32_t)N->getZExtValue())) != -1;
+}], t2_so_imm_not_XFORM> {
+ let ParserMatchClass = t2_so_imm_not_asmoperand;
+}
+
+// t2_so_imm_notSext - match an immediate that is a complement of a t2_so_imm
+// if the upper 16 bits are zero.
+def t2_so_imm_notSext : Operand<i32>, PatLeaf<(imm), [{
+ APInt apIntN = N->getAPIntValue();
+ if (!apIntN.isIntN(16)) return false;
+ unsigned N16bitSignExt = apIntN.trunc(16).sext(32).getZExtValue();
+ return ARM_AM::getT2SOImmVal(~N16bitSignExt) != -1;
+ }], t2_so_imm_notSext16_XFORM> {
+ let ParserMatchClass = t2_so_imm_not_asmoperand;
+}
+
+// t2_so_imm_neg - Match an immediate that is a negation of a t2_so_imm.
+def t2_so_imm_neg_asmoperand : AsmOperandClass { let Name = "T2SOImmNeg"; }
+def t2_so_imm_neg : Operand<i32>, PatLeaf<(imm), [{
+ int64_t Value = -(int)N->getZExtValue();
+ return Value && ARM_AM::getT2SOImmVal(Value) != -1;
+}], t2_so_imm_neg_XFORM> {
+ let ParserMatchClass = t2_so_imm_neg_asmoperand;
+}
+
+/// imm0_4095 predicate - True if the 32-bit immediate is in the range [0.4095].
+def imm0_4095_asmoperand: ImmAsmOperand { let Name = "Imm0_4095"; }
+def imm0_4095 : Operand<i32>, ImmLeaf<i32, [{
+ return Imm >= 0 && Imm < 4096;
+}]> {
+ let ParserMatchClass = imm0_4095_asmoperand;
+}
+
+def imm0_4095_neg_asmoperand: AsmOperandClass { let Name = "Imm0_4095Neg"; }
+def imm0_4095_neg : Operand<i32>, PatLeaf<(i32 imm), [{
+ return (uint32_t)(-N->getZExtValue()) < 4096;
+}], imm_neg_XFORM> {
+ let ParserMatchClass = imm0_4095_neg_asmoperand;
+}
+
+def imm1_255_neg : PatLeaf<(i32 imm), [{
+ uint32_t Val = -N->getZExtValue();
+ return (Val > 0 && Val < 255);
+}], imm_neg_XFORM>;
+
+def imm0_255_not : PatLeaf<(i32 imm), [{
+ return (uint32_t)(~N->getZExtValue()) < 255;
+}], imm_comp_XFORM>;
+
+def lo5AllOne : PatLeaf<(i32 imm), [{
+ // Returns true if all low 5-bits are 1.
+ return (((uint32_t)N->getZExtValue()) & 0x1FUL) == 0x1FUL;
+}]>;
+
+// Define Thumb2 specific addressing modes.
+
+// t2addrmode_imm12 := reg + imm12
+def t2addrmode_imm12_asmoperand : AsmOperandClass {let Name="MemUImm12Offset";}
+def t2addrmode_imm12 : MemOperand,
+ ComplexPattern<i32, 2, "SelectT2AddrModeImm12", []> {
+ let PrintMethod = "printAddrModeImm12Operand<false>";
+ let EncoderMethod = "getAddrModeImm12OpValue";
+ let DecoderMethod = "DecodeT2AddrModeImm12";
+ let ParserMatchClass = t2addrmode_imm12_asmoperand;
+ let MIOperandInfo = (ops GPR:$base, i32imm:$offsimm);
+}
+
+// t2ldrlabel := imm12
+def t2ldrlabel : Operand<i32> {
+ let EncoderMethod = "getAddrModeImm12OpValue";
+ let PrintMethod = "printThumbLdrLabelOperand";
+}
+
+def t2ldr_pcrel_imm12_asmoperand : AsmOperandClass {let Name = "MemPCRelImm12";}
+def t2ldr_pcrel_imm12 : Operand<i32> {
+ let ParserMatchClass = t2ldr_pcrel_imm12_asmoperand;
+ // used for assembler pseudo instruction and maps to t2ldrlabel, so
+ // doesn't need encoder or print methods of its own.
+}
+
+// ADR instruction labels.
+def t2adrlabel : Operand<i32> {
+ let EncoderMethod = "getT2AdrLabelOpValue";
+ let PrintMethod = "printAdrLabelOperand<0>";
+}
+
+// t2addrmode_posimm8 := reg + imm8
+def MemPosImm8OffsetAsmOperand : AsmOperandClass {let Name="MemPosImm8Offset";}
+def t2addrmode_posimm8 : MemOperand {
+ let PrintMethod = "printT2AddrModeImm8Operand<false>";
+ let EncoderMethod = "getT2AddrModeImm8OpValue";
+ let DecoderMethod = "DecodeT2AddrModeImm8";
+ let ParserMatchClass = MemPosImm8OffsetAsmOperand;
+ let MIOperandInfo = (ops GPR:$base, i32imm:$offsimm);
+}
+
+// t2addrmode_negimm8 := reg - imm8
+def MemNegImm8OffsetAsmOperand : AsmOperandClass {let Name="MemNegImm8Offset";}
+def t2addrmode_negimm8 : MemOperand,
+ ComplexPattern<i32, 2, "SelectT2AddrModeImm8", []> {
+ let PrintMethod = "printT2AddrModeImm8Operand<false>";
+ let EncoderMethod = "getT2AddrModeImm8OpValue";
+ let DecoderMethod = "DecodeT2AddrModeImm8";
+ let ParserMatchClass = MemNegImm8OffsetAsmOperand;
+ let MIOperandInfo = (ops GPR:$base, i32imm:$offsimm);
+}
+
+// t2addrmode_imm8 := reg +/- imm8
+def MemImm8OffsetAsmOperand : AsmOperandClass { let Name = "MemImm8Offset"; }
+class T2AddrMode_Imm8 : MemOperand,
+ ComplexPattern<i32, 2, "SelectT2AddrModeImm8", []> {
+ let EncoderMethod = "getT2AddrModeImm8OpValue";
+ let DecoderMethod = "DecodeT2AddrModeImm8";
+ let ParserMatchClass = MemImm8OffsetAsmOperand;
+ let MIOperandInfo = (ops GPR:$base, i32imm:$offsimm);
+}
+
+def t2addrmode_imm8 : T2AddrMode_Imm8 {
+ let PrintMethod = "printT2AddrModeImm8Operand<false>";
+}
+
+def t2addrmode_imm8_pre : T2AddrMode_Imm8 {
+ let PrintMethod = "printT2AddrModeImm8Operand<true>";
+}
+
+def t2am_imm8_offset : MemOperand,
+ ComplexPattern<i32, 1, "SelectT2AddrModeImm8Offset",
+ [], [SDNPWantRoot]> {
+ let PrintMethod = "printT2AddrModeImm8OffsetOperand";
+ let EncoderMethod = "getT2AddrModeImm8OffsetOpValue";
+ let DecoderMethod = "DecodeT2Imm8";
+}
+
+// t2addrmode_imm8s4 := reg +/- (imm8 << 2)
+def MemImm8s4OffsetAsmOperand : AsmOperandClass {let Name = "MemImm8s4Offset";}
+class T2AddrMode_Imm8s4 : MemOperand {
+ let EncoderMethod = "getT2AddrModeImm8s4OpValue";
+ let DecoderMethod = "DecodeT2AddrModeImm8s4";
+ let ParserMatchClass = MemImm8s4OffsetAsmOperand;
+ let MIOperandInfo = (ops GPR:$base, i32imm:$offsimm);
+}
+
+def t2addrmode_imm8s4 : T2AddrMode_Imm8s4 {
+ let PrintMethod = "printT2AddrModeImm8s4Operand<false>";
+}
+
+def t2addrmode_imm8s4_pre : T2AddrMode_Imm8s4 {
+ let PrintMethod = "printT2AddrModeImm8s4Operand<true>";
+}
+
+def t2am_imm8s4_offset_asmoperand : AsmOperandClass { let Name = "Imm8s4"; }
+def t2am_imm8s4_offset : MemOperand {
+ let PrintMethod = "printT2AddrModeImm8s4OffsetOperand";
+ let EncoderMethod = "getT2Imm8s4OpValue";
+ let DecoderMethod = "DecodeT2Imm8S4";
+}
+
+// t2addrmode_imm0_1020s4 := reg + (imm8 << 2)
+def MemImm0_1020s4OffsetAsmOperand : AsmOperandClass {
+ let Name = "MemImm0_1020s4Offset";
+}
+def t2addrmode_imm0_1020s4 : MemOperand,
+ ComplexPattern<i32, 2, "SelectT2AddrModeExclusive"> {
+ let PrintMethod = "printT2AddrModeImm0_1020s4Operand";
+ let EncoderMethod = "getT2AddrModeImm0_1020s4OpValue";
+ let DecoderMethod = "DecodeT2AddrModeImm0_1020s4";
+ let ParserMatchClass = MemImm0_1020s4OffsetAsmOperand;
+ let MIOperandInfo = (ops GPRnopc:$base, i32imm:$offsimm);
+}
+
+// t2addrmode_so_reg := reg + (reg << imm2)
+def t2addrmode_so_reg_asmoperand : AsmOperandClass {let Name="T2MemRegOffset";}
+def t2addrmode_so_reg : MemOperand,
+ ComplexPattern<i32, 3, "SelectT2AddrModeSoReg", []> {
+ let PrintMethod = "printT2AddrModeSoRegOperand";
+ let EncoderMethod = "getT2AddrModeSORegOpValue";
+ let DecoderMethod = "DecodeT2AddrModeSOReg";
+ let ParserMatchClass = t2addrmode_so_reg_asmoperand;
+ let MIOperandInfo = (ops GPR:$base, rGPR:$offsreg, i32imm:$offsimm);
+}
+
+// Addresses for the TBB/TBH instructions.
+def addrmode_tbb_asmoperand : AsmOperandClass { let Name = "MemTBB"; }
+def addrmode_tbb : MemOperand {
+ let PrintMethod = "printAddrModeTBB";
+ let ParserMatchClass = addrmode_tbb_asmoperand;
+ let MIOperandInfo = (ops GPR:$Rn, rGPR:$Rm);
+}
+def addrmode_tbh_asmoperand : AsmOperandClass { let Name = "MemTBH"; }
+def addrmode_tbh : MemOperand {
+ let PrintMethod = "printAddrModeTBH";
+ let ParserMatchClass = addrmode_tbh_asmoperand;
+ let MIOperandInfo = (ops GPR:$Rn, rGPR:$Rm);
+}
+
+//===----------------------------------------------------------------------===//
+// Multiclass helpers...
+//
+
+
+class T2OneRegImm<dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T2I<oops, iops, itin, opc, asm, pattern> {
+ bits<4> Rd;
+ bits<12> imm;
+
+ let Inst{11-8} = Rd;
+ let Inst{26} = imm{11};
+ let Inst{14-12} = imm{10-8};
+ let Inst{7-0} = imm{7-0};
+}
+
+
+class T2sOneRegImm<dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T2sI<oops, iops, itin, opc, asm, pattern> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<12> imm;
+
+ let Inst{11-8} = Rd;
+ let Inst{26} = imm{11};
+ let Inst{14-12} = imm{10-8};
+ let Inst{7-0} = imm{7-0};
+}
+
+class T2OneRegCmpImm<dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T2I<oops, iops, itin, opc, asm, pattern> {
+ bits<4> Rn;
+ bits<12> imm;
+
+ let Inst{19-16} = Rn;
+ let Inst{26} = imm{11};
+ let Inst{14-12} = imm{10-8};
+ let Inst{7-0} = imm{7-0};
+}
+
+
+class T2OneRegShiftedReg<dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T2I<oops, iops, itin, opc, asm, pattern> {
+ bits<4> Rd;
+ bits<12> ShiftedRm;
+
+ let Inst{11-8} = Rd;
+ let Inst{3-0} = ShiftedRm{3-0};
+ let Inst{5-4} = ShiftedRm{6-5};
+ let Inst{14-12} = ShiftedRm{11-9};
+ let Inst{7-6} = ShiftedRm{8-7};
+}
+
+class T2sOneRegShiftedReg<dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T2sI<oops, iops, itin, opc, asm, pattern> {
+ bits<4> Rd;
+ bits<12> ShiftedRm;
+
+ let Inst{11-8} = Rd;
+ let Inst{3-0} = ShiftedRm{3-0};
+ let Inst{5-4} = ShiftedRm{6-5};
+ let Inst{14-12} = ShiftedRm{11-9};
+ let Inst{7-6} = ShiftedRm{8-7};
+}
+
+class T2OneRegCmpShiftedReg<dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T2I<oops, iops, itin, opc, asm, pattern> {
+ bits<4> Rn;
+ bits<12> ShiftedRm;
+
+ let Inst{19-16} = Rn;
+ let Inst{3-0} = ShiftedRm{3-0};
+ let Inst{5-4} = ShiftedRm{6-5};
+ let Inst{14-12} = ShiftedRm{11-9};
+ let Inst{7-6} = ShiftedRm{8-7};
+}
+
+class T2TwoReg<dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T2I<oops, iops, itin, opc, asm, pattern> {
+ bits<4> Rd;
+ bits<4> Rm;
+
+ let Inst{11-8} = Rd;
+ let Inst{3-0} = Rm;
+}
+
+class T2sTwoReg<dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T2sI<oops, iops, itin, opc, asm, pattern> {
+ bits<4> Rd;
+ bits<4> Rm;
+
+ let Inst{11-8} = Rd;
+ let Inst{3-0} = Rm;
+}
+
+class T2TwoRegCmp<dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T2I<oops, iops, itin, opc, asm, pattern> {
+ bits<4> Rn;
+ bits<4> Rm;
+
+ let Inst{19-16} = Rn;
+ let Inst{3-0} = Rm;
+}
+
+
+class T2TwoRegImm<dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T2I<oops, iops, itin, opc, asm, pattern> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<12> imm;
+
+ let Inst{11-8} = Rd;
+ let Inst{19-16} = Rn;
+ let Inst{26} = imm{11};
+ let Inst{14-12} = imm{10-8};
+ let Inst{7-0} = imm{7-0};
+}
+
+class T2sTwoRegImm<dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T2sI<oops, iops, itin, opc, asm, pattern> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<12> imm;
+
+ let Inst{11-8} = Rd;
+ let Inst{19-16} = Rn;
+ let Inst{26} = imm{11};
+ let Inst{14-12} = imm{10-8};
+ let Inst{7-0} = imm{7-0};
+}
+
+class T2TwoRegShiftImm<dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T2I<oops, iops, itin, opc, asm, pattern> {
+ bits<4> Rd;
+ bits<4> Rm;
+ bits<5> imm;
+
+ let Inst{11-8} = Rd;
+ let Inst{3-0} = Rm;
+ let Inst{14-12} = imm{4-2};
+ let Inst{7-6} = imm{1-0};
+}
+
+class T2sTwoRegShiftImm<dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T2sI<oops, iops, itin, opc, asm, pattern> {
+ bits<4> Rd;
+ bits<4> Rm;
+ bits<5> imm;
+
+ let Inst{11-8} = Rd;
+ let Inst{3-0} = Rm;
+ let Inst{14-12} = imm{4-2};
+ let Inst{7-6} = imm{1-0};
+}
+
+class T2ThreeReg<dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T2I<oops, iops, itin, opc, asm, pattern> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<4> Rm;
+
+ let Inst{11-8} = Rd;
+ let Inst{19-16} = Rn;
+ let Inst{3-0} = Rm;
+}
+
+class T2ThreeRegNoP<dag oops, dag iops, InstrItinClass itin,
+ string asm, list<dag> pattern>
+ : T2XI<oops, iops, itin, asm, pattern> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<4> Rm;
+
+ let Inst{11-8} = Rd;
+ let Inst{19-16} = Rn;
+ let Inst{3-0} = Rm;
+}
+
+class T2sThreeReg<dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T2sI<oops, iops, itin, opc, asm, pattern> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<4> Rm;
+
+ let Inst{11-8} = Rd;
+ let Inst{19-16} = Rn;
+ let Inst{3-0} = Rm;
+}
+
+class T2TwoRegShiftedReg<dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T2I<oops, iops, itin, opc, asm, pattern> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<12> ShiftedRm;
+
+ let Inst{11-8} = Rd;
+ let Inst{19-16} = Rn;
+ let Inst{3-0} = ShiftedRm{3-0};
+ let Inst{5-4} = ShiftedRm{6-5};
+ let Inst{14-12} = ShiftedRm{11-9};
+ let Inst{7-6} = ShiftedRm{8-7};
+}
+
+class T2sTwoRegShiftedReg<dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T2sI<oops, iops, itin, opc, asm, pattern> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<12> ShiftedRm;
+
+ let Inst{11-8} = Rd;
+ let Inst{19-16} = Rn;
+ let Inst{3-0} = ShiftedRm{3-0};
+ let Inst{5-4} = ShiftedRm{6-5};
+ let Inst{14-12} = ShiftedRm{11-9};
+ let Inst{7-6} = ShiftedRm{8-7};
+}
+
+class T2FourReg<dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T2I<oops, iops, itin, opc, asm, pattern> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<4> Rm;
+ bits<4> Ra;
+
+ let Inst{19-16} = Rn;
+ let Inst{15-12} = Ra;
+ let Inst{11-8} = Rd;
+ let Inst{3-0} = Rm;
+}
+
+class T2MulLong<bits<3> opc22_20, bits<4> opc7_4,
+ dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T2I<oops, iops, itin, opc, asm, pattern> {
+ bits<4> RdLo;
+ bits<4> RdHi;
+ bits<4> Rn;
+ bits<4> Rm;
+
+ let Inst{31-23} = 0b111110111;
+ let Inst{22-20} = opc22_20;
+ let Inst{19-16} = Rn;
+ let Inst{15-12} = RdLo;
+ let Inst{11-8} = RdHi;
+ let Inst{7-4} = opc7_4;
+ let Inst{3-0} = Rm;
+}
+class T2MlaLong<bits<3> opc22_20, bits<4> opc7_4,
+ dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T2I<oops, iops, itin, opc, asm, pattern> {
+ bits<4> RdLo;
+ bits<4> RdHi;
+ bits<4> Rn;
+ bits<4> Rm;
+
+ let Inst{31-23} = 0b111110111;
+ let Inst{22-20} = opc22_20;
+ let Inst{19-16} = Rn;
+ let Inst{15-12} = RdLo;
+ let Inst{11-8} = RdHi;
+ let Inst{7-4} = opc7_4;
+ let Inst{3-0} = Rm;
+}
+
+
+/// T2I_bin_irs - Defines a set of (op reg, {so_imm|r|so_reg}) patterns for a
+/// binary operation that produces a value. These are predicable and can be
+/// changed to modify CPSR.
+multiclass T2I_bin_irs<bits<4> opcod, string opc,
+ InstrItinClass iii, InstrItinClass iir, InstrItinClass iis,
+ PatFrag opnode, bit Commutable = 0,
+ string wide = ""> {
+ // shifted imm
+ def ri : T2sTwoRegImm<
+ (outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_imm:$imm), iii,
+ opc, "\t$Rd, $Rn, $imm",
+ [(set rGPR:$Rd, (opnode rGPR:$Rn, t2_so_imm:$imm))]>,
+ Sched<[WriteALU, ReadALU]> {
+ let Inst{31-27} = 0b11110;
+ let Inst{25} = 0;
+ let Inst{24-21} = opcod;
+ let Inst{15} = 0;
+ }
+ // register
+ def rr : T2sThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), iir,
+ opc, !strconcat(wide, "\t$Rd, $Rn, $Rm"),
+ [(set rGPR:$Rd, (opnode rGPR:$Rn, rGPR:$Rm))]>,
+ Sched<[WriteALU, ReadALU, ReadALU]> {
+ let isCommutable = Commutable;
+ let Inst{31-27} = 0b11101;
+ let Inst{26-25} = 0b01;
+ let Inst{24-21} = opcod;
+ let Inst{14-12} = 0b000; // imm3
+ let Inst{7-6} = 0b00; // imm2
+ let Inst{5-4} = 0b00; // type
+ }
+ // shifted register
+ def rs : T2sTwoRegShiftedReg<
+ (outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_reg:$ShiftedRm), iis,
+ opc, !strconcat(wide, "\t$Rd, $Rn, $ShiftedRm"),
+ [(set rGPR:$Rd, (opnode rGPR:$Rn, t2_so_reg:$ShiftedRm))]>,
+ Sched<[WriteALUsi, ReadALU]> {
+ let Inst{31-27} = 0b11101;
+ let Inst{26-25} = 0b01;
+ let Inst{24-21} = opcod;
+ }
+ // Assembly aliases for optional destination operand when it's the same
+ // as the source operand.
+ def : t2InstAlias<!strconcat(opc, "${s}${p} $Rdn, $imm"),
+ (!cast<Instruction>(NAME#"ri") rGPR:$Rdn, rGPR:$Rdn,
+ t2_so_imm:$imm, pred:$p,
+ cc_out:$s)>;
+ def : t2InstAlias<!strconcat(opc, "${s}${p}", wide, " $Rdn, $Rm"),
+ (!cast<Instruction>(NAME#"rr") rGPR:$Rdn, rGPR:$Rdn,
+ rGPR:$Rm, pred:$p,
+ cc_out:$s)>;
+ def : t2InstAlias<!strconcat(opc, "${s}${p}", wide, " $Rdn, $shift"),
+ (!cast<Instruction>(NAME#"rs") rGPR:$Rdn, rGPR:$Rdn,
+ t2_so_reg:$shift, pred:$p,
+ cc_out:$s)>;
+}
+
+/// T2I_bin_w_irs - Same as T2I_bin_irs except these operations need
+// the ".w" suffix to indicate that they are wide.
+multiclass T2I_bin_w_irs<bits<4> opcod, string opc,
+ InstrItinClass iii, InstrItinClass iir, InstrItinClass iis,
+ PatFrag opnode, bit Commutable = 0> :
+ T2I_bin_irs<opcod, opc, iii, iir, iis, opnode, Commutable, ".w"> {
+ // Assembler aliases w/ the ".w" suffix.
+ def : t2InstAlias<!strconcat(opc, "${s}${p}.w", " $Rd, $Rn, $imm"),
+ (!cast<Instruction>(NAME#"ri") rGPR:$Rd, rGPR:$Rn, t2_so_imm:$imm, pred:$p,
+ cc_out:$s)>;
+ // Assembler aliases w/o the ".w" suffix.
+ def : t2InstAlias<!strconcat(opc, "${s}${p}", " $Rd, $Rn, $Rm"),
+ (!cast<Instruction>(NAME#"rr") rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, pred:$p,
+ cc_out:$s)>;
+ def : t2InstAlias<!strconcat(opc, "${s}${p}", " $Rd, $Rn, $shift"),
+ (!cast<Instruction>(NAME#"rs") rGPR:$Rd, rGPR:$Rn, t2_so_reg:$shift,
+ pred:$p, cc_out:$s)>;
+
+ // and with the optional destination operand, too.
+ def : t2InstAlias<!strconcat(opc, "${s}${p}.w", " $Rdn, $imm"),
+ (!cast<Instruction>(NAME#"ri") rGPR:$Rdn, rGPR:$Rdn, t2_so_imm:$imm,
+ pred:$p, cc_out:$s)>;
+ def : t2InstAlias<!strconcat(opc, "${s}${p}", " $Rdn, $Rm"),
+ (!cast<Instruction>(NAME#"rr") rGPR:$Rdn, rGPR:$Rdn, rGPR:$Rm, pred:$p,
+ cc_out:$s)>;
+ def : t2InstAlias<!strconcat(opc, "${s}${p}", " $Rdn, $shift"),
+ (!cast<Instruction>(NAME#"rs") rGPR:$Rdn, rGPR:$Rdn, t2_so_reg:$shift,
+ pred:$p, cc_out:$s)>;
+}
+
+/// T2I_rbin_is - Same as T2I_bin_irs except the order of operands are
+/// reversed. The 'rr' form is only defined for the disassembler; for codegen
+/// it is equivalent to the T2I_bin_irs counterpart.
+multiclass T2I_rbin_irs<bits<4> opcod, string opc, PatFrag opnode> {
+ // shifted imm
+ def ri : T2sTwoRegImm<
+ (outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_imm:$imm), IIC_iALUi,
+ opc, ".w\t$Rd, $Rn, $imm",
+ [(set rGPR:$Rd, (opnode t2_so_imm:$imm, rGPR:$Rn))]>,
+ Sched<[WriteALU, ReadALU]> {
+ let Inst{31-27} = 0b11110;
+ let Inst{25} = 0;
+ let Inst{24-21} = opcod;
+ let Inst{15} = 0;
+ }
+ // register
+ def rr : T2sThreeReg<
+ (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iALUr,
+ opc, "\t$Rd, $Rn, $Rm",
+ [/* For disassembly only; pattern left blank */]>,
+ Sched<[WriteALU, ReadALU, ReadALU]> {
+ let Inst{31-27} = 0b11101;
+ let Inst{26-25} = 0b01;
+ let Inst{24-21} = opcod;
+ let Inst{14-12} = 0b000; // imm3
+ let Inst{7-6} = 0b00; // imm2
+ let Inst{5-4} = 0b00; // type
+ }
+ // shifted register
+ def rs : T2sTwoRegShiftedReg<
+ (outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_reg:$ShiftedRm),
+ IIC_iALUsir, opc, "\t$Rd, $Rn, $ShiftedRm",
+ [(set rGPR:$Rd, (opnode t2_so_reg:$ShiftedRm, rGPR:$Rn))]>,
+ Sched<[WriteALUsi, ReadALU]> {
+ let Inst{31-27} = 0b11101;
+ let Inst{26-25} = 0b01;
+ let Inst{24-21} = opcod;
+ }
+}
+
+/// T2I_bin_s_irs - Similar to T2I_bin_irs except it sets the 's' bit so the
+/// instruction modifies the CPSR register.
+///
+/// These opcodes will be converted to the real non-S opcodes by
+/// AdjustInstrPostInstrSelection after giving then an optional CPSR operand.
+let hasPostISelHook = 1, Defs = [CPSR] in {
+multiclass T2I_bin_s_irs<InstrItinClass iii, InstrItinClass iir,
+ InstrItinClass iis, PatFrag opnode,
+ bit Commutable = 0> {
+ // shifted imm
+ def ri : t2PseudoInst<(outs rGPR:$Rd),
+ (ins GPRnopc:$Rn, t2_so_imm:$imm, pred:$p),
+ 4, iii,
+ [(set rGPR:$Rd, CPSR, (opnode GPRnopc:$Rn,
+ t2_so_imm:$imm))]>,
+ Sched<[WriteALU, ReadALU]>;
+ // register
+ def rr : t2PseudoInst<(outs rGPR:$Rd), (ins GPRnopc:$Rn, rGPR:$Rm, pred:$p),
+ 4, iir,
+ [(set rGPR:$Rd, CPSR, (opnode GPRnopc:$Rn,
+ rGPR:$Rm))]>,
+ Sched<[WriteALU, ReadALU, ReadALU]> {
+ let isCommutable = Commutable;
+ }
+ // shifted register
+ def rs : t2PseudoInst<(outs rGPR:$Rd),
+ (ins GPRnopc:$Rn, t2_so_reg:$ShiftedRm, pred:$p),
+ 4, iis,
+ [(set rGPR:$Rd, CPSR, (opnode GPRnopc:$Rn,
+ t2_so_reg:$ShiftedRm))]>,
+ Sched<[WriteALUsi, ReadALUsr]>;
+}
+}
+
+/// T2I_rbin_s_is - Same as T2I_bin_s_irs, except selection DAG
+/// operands are reversed.
+let hasPostISelHook = 1, Defs = [CPSR] in {
+multiclass T2I_rbin_s_is<PatFrag opnode> {
+ // shifted imm
+ def ri : t2PseudoInst<(outs rGPR:$Rd),
+ (ins rGPR:$Rn, t2_so_imm:$imm, pred:$p),
+ 4, IIC_iALUi,
+ [(set rGPR:$Rd, CPSR, (opnode t2_so_imm:$imm,
+ rGPR:$Rn))]>,
+ Sched<[WriteALU, ReadALU]>;
+ // shifted register
+ def rs : t2PseudoInst<(outs rGPR:$Rd),
+ (ins rGPR:$Rn, t2_so_reg:$ShiftedRm, pred:$p),
+ 4, IIC_iALUsi,
+ [(set rGPR:$Rd, CPSR, (opnode t2_so_reg:$ShiftedRm,
+ rGPR:$Rn))]>,
+ Sched<[WriteALUsi, ReadALU]>;
+}
+}
+
+/// T2I_bin_ii12rs - Defines a set of (op reg, {so_imm|imm0_4095|r|so_reg})
+/// patterns for a binary operation that produces a value.
+multiclass T2I_bin_ii12rs<bits<3> op23_21, string opc, PatFrag opnode,
+ bit Commutable = 0> {
+ // shifted imm
+ // The register-immediate version is re-materializable. This is useful
+ // in particular for taking the address of a local.
+ let isReMaterializable = 1 in {
+ def ri : T2sTwoRegImm<
+ (outs GPRnopc:$Rd), (ins GPRnopc:$Rn, t2_so_imm:$imm), IIC_iALUi,
+ opc, ".w\t$Rd, $Rn, $imm",
+ [(set GPRnopc:$Rd, (opnode GPRnopc:$Rn, t2_so_imm:$imm))]>,
+ Sched<[WriteALU, ReadALU]> {
+ let Inst{31-27} = 0b11110;
+ let Inst{25} = 0;
+ let Inst{24} = 1;
+ let Inst{23-21} = op23_21;
+ let Inst{15} = 0;
+ }
+ }
+ // 12-bit imm
+ def ri12 : T2I<
+ (outs GPRnopc:$Rd), (ins GPR:$Rn, imm0_4095:$imm), IIC_iALUi,
+ !strconcat(opc, "w"), "\t$Rd, $Rn, $imm",
+ [(set GPRnopc:$Rd, (opnode GPR:$Rn, imm0_4095:$imm))]>,
+ Sched<[WriteALU, ReadALU]> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<12> imm;
+ let Inst{31-27} = 0b11110;
+ let Inst{26} = imm{11};
+ let Inst{25-24} = 0b10;
+ let Inst{23-21} = op23_21;
+ let Inst{20} = 0; // The S bit.
+ let Inst{19-16} = Rn;
+ let Inst{15} = 0;
+ let Inst{14-12} = imm{10-8};
+ let Inst{11-8} = Rd;
+ let Inst{7-0} = imm{7-0};
+ }
+ // register
+ def rr : T2sThreeReg<(outs GPRnopc:$Rd), (ins GPRnopc:$Rn, rGPR:$Rm),
+ IIC_iALUr, opc, ".w\t$Rd, $Rn, $Rm",
+ [(set GPRnopc:$Rd, (opnode GPRnopc:$Rn, rGPR:$Rm))]>,
+ Sched<[WriteALU, ReadALU, ReadALU]> {
+ let isCommutable = Commutable;
+ let Inst{31-27} = 0b11101;
+ let Inst{26-25} = 0b01;
+ let Inst{24} = 1;
+ let Inst{23-21} = op23_21;
+ let Inst{14-12} = 0b000; // imm3
+ let Inst{7-6} = 0b00; // imm2
+ let Inst{5-4} = 0b00; // type
+ }
+ // shifted register
+ def rs : T2sTwoRegShiftedReg<
+ (outs GPRnopc:$Rd), (ins GPRnopc:$Rn, t2_so_reg:$ShiftedRm),
+ IIC_iALUsi, opc, ".w\t$Rd, $Rn, $ShiftedRm",
+ [(set GPRnopc:$Rd, (opnode GPRnopc:$Rn, t2_so_reg:$ShiftedRm))]>,
+ Sched<[WriteALUsi, ReadALU]> {
+ let Inst{31-27} = 0b11101;
+ let Inst{26-25} = 0b01;
+ let Inst{24} = 1;
+ let Inst{23-21} = op23_21;
+ }
+}
+
+/// T2I_adde_sube_irs - Defines a set of (op reg, {so_imm|r|so_reg}) patterns
+/// for a binary operation that produces a value and use the carry
+/// bit. It's not predicable.
+let Defs = [CPSR], Uses = [CPSR] in {
+multiclass T2I_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode,
+ bit Commutable = 0> {
+ // shifted imm
+ def ri : T2sTwoRegImm<(outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_imm:$imm),
+ IIC_iALUi, opc, "\t$Rd, $Rn, $imm",
+ [(set rGPR:$Rd, CPSR, (opnode rGPR:$Rn, t2_so_imm:$imm, CPSR))]>,
+ Requires<[IsThumb2]>, Sched<[WriteALU, ReadALU]> {
+ let Inst{31-27} = 0b11110;
+ let Inst{25} = 0;
+ let Inst{24-21} = opcod;
+ let Inst{15} = 0;
+ }
+ // register
+ def rr : T2sThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iALUr,
+ opc, ".w\t$Rd, $Rn, $Rm",
+ [(set rGPR:$Rd, CPSR, (opnode rGPR:$Rn, rGPR:$Rm, CPSR))]>,
+ Requires<[IsThumb2]>, Sched<[WriteALU, ReadALU, ReadALU]> {
+ let isCommutable = Commutable;
+ let Inst{31-27} = 0b11101;
+ let Inst{26-25} = 0b01;
+ let Inst{24-21} = opcod;
+ let Inst{14-12} = 0b000; // imm3
+ let Inst{7-6} = 0b00; // imm2
+ let Inst{5-4} = 0b00; // type
+ }
+ // shifted register
+ def rs : T2sTwoRegShiftedReg<
+ (outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_reg:$ShiftedRm),
+ IIC_iALUsi, opc, ".w\t$Rd, $Rn, $ShiftedRm",
+ [(set rGPR:$Rd, CPSR, (opnode rGPR:$Rn, t2_so_reg:$ShiftedRm, CPSR))]>,
+ Requires<[IsThumb2]>, Sched<[WriteALUsi, ReadALU]> {
+ let Inst{31-27} = 0b11101;
+ let Inst{26-25} = 0b01;
+ let Inst{24-21} = opcod;
+ }
+}
+}
+
+/// T2I_sh_ir - Defines a set of (op reg, {so_imm|r}) patterns for a shift /
+// rotate operation that produces a value.
+multiclass T2I_sh_ir<bits<2> opcod, string opc, Operand ty, PatFrag opnode> {
+ // 5-bit imm
+ def ri : T2sTwoRegShiftImm<
+ (outs rGPR:$Rd), (ins rGPR:$Rm, ty:$imm), IIC_iMOVsi,
+ opc, ".w\t$Rd, $Rm, $imm",
+ [(set rGPR:$Rd, (opnode rGPR:$Rm, (i32 ty:$imm)))]>,
+ Sched<[WriteALU]> {
+ let Inst{31-27} = 0b11101;
+ let Inst{26-21} = 0b010010;
+ let Inst{19-16} = 0b1111; // Rn
+ let Inst{5-4} = opcod;
+ }
+ // register
+ def rr : T2sThreeReg<
+ (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMOVsr,
+ opc, ".w\t$Rd, $Rn, $Rm",
+ [(set rGPR:$Rd, (opnode rGPR:$Rn, rGPR:$Rm))]>,
+ Sched<[WriteALU]> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0100;
+ let Inst{22-21} = opcod;
+ let Inst{15-12} = 0b1111;
+ let Inst{7-4} = 0b0000;
+ }
+
+ // Optional destination register
+ def : t2InstAlias<!strconcat(opc, "${s}${p}", ".w $Rdn, $imm"),
+ (!cast<Instruction>(NAME#"ri") rGPR:$Rdn, rGPR:$Rdn, ty:$imm, pred:$p,
+ cc_out:$s)>;
+ def : t2InstAlias<!strconcat(opc, "${s}${p}", ".w $Rdn, $Rm"),
+ (!cast<Instruction>(NAME#"rr") rGPR:$Rdn, rGPR:$Rdn, rGPR:$Rm, pred:$p,
+ cc_out:$s)>;
+
+ // Assembler aliases w/o the ".w" suffix.
+ def : t2InstAlias<!strconcat(opc, "${s}${p}", " $Rd, $Rn, $imm"),
+ (!cast<Instruction>(NAME#"ri") rGPR:$Rd, rGPR:$Rn, ty:$imm, pred:$p,
+ cc_out:$s)>;
+ def : t2InstAlias<!strconcat(opc, "${s}${p}", " $Rd, $Rn, $Rm"),
+ (!cast<Instruction>(NAME#"rr") rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, pred:$p,
+ cc_out:$s)>;
+
+ // and with the optional destination operand, too.
+ def : t2InstAlias<!strconcat(opc, "${s}${p}", " $Rdn, $imm"),
+ (!cast<Instruction>(NAME#"ri") rGPR:$Rdn, rGPR:$Rdn, ty:$imm, pred:$p,
+ cc_out:$s)>;
+ def : t2InstAlias<!strconcat(opc, "${s}${p}", " $Rdn, $Rm"),
+ (!cast<Instruction>(NAME#"rr") rGPR:$Rdn, rGPR:$Rdn, rGPR:$Rm, pred:$p,
+ cc_out:$s)>;
+}
+
+/// T2I_cmp_irs - Defines a set of (op r, {so_imm|r|so_reg}) cmp / test
+/// patterns. Similar to T2I_bin_irs except the instruction does not produce
+/// a explicit result, only implicitly set CPSR.
+multiclass T2I_cmp_irs<bits<4> opcod, string opc,
+ InstrItinClass iii, InstrItinClass iir, InstrItinClass iis,
+ PatFrag opnode> {
+let isCompare = 1, Defs = [CPSR] in {
+ // shifted imm
+ def ri : T2OneRegCmpImm<
+ (outs), (ins GPRnopc:$Rn, t2_so_imm:$imm), iii,
+ opc, ".w\t$Rn, $imm",
+ [(opnode GPRnopc:$Rn, t2_so_imm:$imm)]>, Sched<[WriteCMP]> {
+ let Inst{31-27} = 0b11110;
+ let Inst{25} = 0;
+ let Inst{24-21} = opcod;
+ let Inst{20} = 1; // The S bit.
+ let Inst{15} = 0;
+ let Inst{11-8} = 0b1111; // Rd
+ }
+ // register
+ def rr : T2TwoRegCmp<
+ (outs), (ins GPRnopc:$Rn, rGPR:$Rm), iir,
+ opc, ".w\t$Rn, $Rm",
+ [(opnode GPRnopc:$Rn, rGPR:$Rm)]>, Sched<[WriteCMP]> {
+ let Inst{31-27} = 0b11101;
+ let Inst{26-25} = 0b01;
+ let Inst{24-21} = opcod;
+ let Inst{20} = 1; // The S bit.
+ let Inst{14-12} = 0b000; // imm3
+ let Inst{11-8} = 0b1111; // Rd
+ let Inst{7-6} = 0b00; // imm2
+ let Inst{5-4} = 0b00; // type
+ }
+ // shifted register
+ def rs : T2OneRegCmpShiftedReg<
+ (outs), (ins GPRnopc:$Rn, t2_so_reg:$ShiftedRm), iis,
+ opc, ".w\t$Rn, $ShiftedRm",
+ [(opnode GPRnopc:$Rn, t2_so_reg:$ShiftedRm)]>,
+ Sched<[WriteCMPsi]> {
+ let Inst{31-27} = 0b11101;
+ let Inst{26-25} = 0b01;
+ let Inst{24-21} = opcod;
+ let Inst{20} = 1; // The S bit.
+ let Inst{11-8} = 0b1111; // Rd
+ }
+}
+
+ // Assembler aliases w/o the ".w" suffix.
+ // No alias here for 'rr' version as not all instantiations of this
+ // multiclass want one (CMP in particular, does not).
+ def : t2InstAlias<!strconcat(opc, "${p}", " $Rn, $imm"),
+ (!cast<Instruction>(NAME#"ri") GPRnopc:$Rn, t2_so_imm:$imm, pred:$p)>;
+ def : t2InstAlias<!strconcat(opc, "${p}", " $Rn, $shift"),
+ (!cast<Instruction>(NAME#"rs") GPRnopc:$Rn, t2_so_reg:$shift, pred:$p)>;
+}
+
+/// T2I_ld - Defines a set of (op r, {imm12|imm8|so_reg}) load patterns.
+multiclass T2I_ld<bit signed, bits<2> opcod, string opc,
+ InstrItinClass iii, InstrItinClass iis, RegisterClass target,
+ PatFrag opnode> {
+ def i12 : T2Ii12<(outs target:$Rt), (ins t2addrmode_imm12:$addr), iii,
+ opc, ".w\t$Rt, $addr",
+ [(set target:$Rt, (opnode t2addrmode_imm12:$addr))]> {
+ bits<4> Rt;
+ bits<17> addr;
+ let Inst{31-25} = 0b1111100;
+ let Inst{24} = signed;
+ let Inst{23} = 1;
+ let Inst{22-21} = opcod;
+ let Inst{20} = 1; // load
+ let Inst{19-16} = addr{16-13}; // Rn
+ let Inst{15-12} = Rt;
+ let Inst{11-0} = addr{11-0}; // imm
+
+ let DecoderMethod = "DecodeT2LoadImm12";
+ }
+ def i8 : T2Ii8 <(outs target:$Rt), (ins t2addrmode_negimm8:$addr), iii,
+ opc, "\t$Rt, $addr",
+ [(set target:$Rt, (opnode t2addrmode_negimm8:$addr))]> {
+ bits<4> Rt;
+ bits<13> addr;
+ let Inst{31-27} = 0b11111;
+ let Inst{26-25} = 0b00;
+ let Inst{24} = signed;
+ let Inst{23} = 0;
+ let Inst{22-21} = opcod;
+ let Inst{20} = 1; // load
+ let Inst{19-16} = addr{12-9}; // Rn
+ let Inst{15-12} = Rt;
+ let Inst{11} = 1;
+ // Offset: index==TRUE, wback==FALSE
+ let Inst{10} = 1; // The P bit.
+ let Inst{9} = addr{8}; // U
+ let Inst{8} = 0; // The W bit.
+ let Inst{7-0} = addr{7-0}; // imm
+
+ let DecoderMethod = "DecodeT2LoadImm8";
+ }
+ def s : T2Iso <(outs target:$Rt), (ins t2addrmode_so_reg:$addr), iis,
+ opc, ".w\t$Rt, $addr",
+ [(set target:$Rt, (opnode t2addrmode_so_reg:$addr))]> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-25} = 0b00;
+ let Inst{24} = signed;
+ let Inst{23} = 0;
+ let Inst{22-21} = opcod;
+ let Inst{20} = 1; // load
+ let Inst{11-6} = 0b000000;
+
+ bits<4> Rt;
+ let Inst{15-12} = Rt;
+
+ bits<10> addr;
+ let Inst{19-16} = addr{9-6}; // Rn
+ let Inst{3-0} = addr{5-2}; // Rm
+ let Inst{5-4} = addr{1-0}; // imm
+
+ let DecoderMethod = "DecodeT2LoadShift";
+ }
+
+ // pci variant is very similar to i12, but supports negative offsets
+ // from the PC.
+ def pci : T2Ipc <(outs target:$Rt), (ins t2ldrlabel:$addr), iii,
+ opc, ".w\t$Rt, $addr",
+ [(set target:$Rt, (opnode (ARMWrapper tconstpool:$addr)))]> {
+ let isReMaterializable = 1;
+ let Inst{31-27} = 0b11111;
+ let Inst{26-25} = 0b00;
+ let Inst{24} = signed;
+ let Inst{22-21} = opcod;
+ let Inst{20} = 1; // load
+ let Inst{19-16} = 0b1111; // Rn
+
+ bits<4> Rt;
+ let Inst{15-12} = Rt{3-0};
+
+ bits<13> addr;
+ let Inst{23} = addr{12}; // add = (U == '1')
+ let Inst{11-0} = addr{11-0};
+
+ let DecoderMethod = "DecodeT2LoadLabel";
+ }
+}
+
+/// T2I_st - Defines a set of (op r, {imm12|imm8|so_reg}) store patterns.
+multiclass T2I_st<bits<2> opcod, string opc,
+ InstrItinClass iii, InstrItinClass iis, RegisterClass target,
+ PatFrag opnode> {
+ def i12 : T2Ii12<(outs), (ins target:$Rt, t2addrmode_imm12:$addr), iii,
+ opc, ".w\t$Rt, $addr",
+ [(opnode target:$Rt, t2addrmode_imm12:$addr)]> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0001;
+ let Inst{22-21} = opcod;
+ let Inst{20} = 0; // !load
+
+ bits<4> Rt;
+ let Inst{15-12} = Rt;
+
+ bits<17> addr;
+ let addr{12} = 1; // add = TRUE
+ let Inst{19-16} = addr{16-13}; // Rn
+ let Inst{23} = addr{12}; // U
+ let Inst{11-0} = addr{11-0}; // imm
+ }
+ def i8 : T2Ii8 <(outs), (ins target:$Rt, t2addrmode_negimm8:$addr), iii,
+ opc, "\t$Rt, $addr",
+ [(opnode target:$Rt, t2addrmode_negimm8:$addr)]> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0000;
+ let Inst{22-21} = opcod;
+ let Inst{20} = 0; // !load
+ let Inst{11} = 1;
+ // Offset: index==TRUE, wback==FALSE
+ let Inst{10} = 1; // The P bit.
+ let Inst{8} = 0; // The W bit.
+
+ bits<4> Rt;
+ let Inst{15-12} = Rt;
+
+ bits<13> addr;
+ let Inst{19-16} = addr{12-9}; // Rn
+ let Inst{9} = addr{8}; // U
+ let Inst{7-0} = addr{7-0}; // imm
+ }
+ def s : T2Iso <(outs), (ins target:$Rt, t2addrmode_so_reg:$addr), iis,
+ opc, ".w\t$Rt, $addr",
+ [(opnode target:$Rt, t2addrmode_so_reg:$addr)]> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0000;
+ let Inst{22-21} = opcod;
+ let Inst{20} = 0; // !load
+ let Inst{11-6} = 0b000000;
+
+ bits<4> Rt;
+ let Inst{15-12} = Rt;
+
+ bits<10> addr;
+ let Inst{19-16} = addr{9-6}; // Rn
+ let Inst{3-0} = addr{5-2}; // Rm
+ let Inst{5-4} = addr{1-0}; // imm
+ }
+}
+
+/// T2I_ext_rrot - A unary operation with two forms: one whose operand is a
+/// register and one whose operand is a register rotated by 8/16/24.
+class T2I_ext_rrot<bits<3> opcod, string opc, PatFrag opnode>
+ : T2TwoReg<(outs rGPR:$Rd), (ins rGPR:$Rm, rot_imm:$rot), IIC_iEXTr,
+ opc, ".w\t$Rd, $Rm$rot",
+ [(set rGPR:$Rd, (opnode (rotr rGPR:$Rm, rot_imm:$rot)))]>,
+ Requires<[IsThumb2]> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0100;
+ let Inst{22-20} = opcod;
+ let Inst{19-16} = 0b1111; // Rn
+ let Inst{15-12} = 0b1111;
+ let Inst{7} = 1;
+
+ bits<2> rot;
+ let Inst{5-4} = rot{1-0}; // rotate
+}
+
+// UXTB16 - Requres T2ExtractPack, does not need the .w qualifier.
+class T2I_ext_rrot_uxtb16<bits<3> opcod, string opc, PatFrag opnode>
+ : T2TwoReg<(outs rGPR:$Rd), (ins rGPR:$Rm, rot_imm:$rot),
+ IIC_iEXTr, opc, "\t$Rd, $Rm$rot",
+ [(set rGPR:$Rd, (opnode (rotr rGPR:$Rm, rot_imm:$rot)))]>,
+ Requires<[HasT2ExtractPack, IsThumb2]> {
+ bits<2> rot;
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0100;
+ let Inst{22-20} = opcod;
+ let Inst{19-16} = 0b1111; // Rn
+ let Inst{15-12} = 0b1111;
+ let Inst{7} = 1;
+ let Inst{5-4} = rot;
+}
+
+// SXTB16 - Requres T2ExtractPack, does not need the .w qualifier, no pattern
+// supported yet.
+class T2I_ext_rrot_sxtb16<bits<3> opcod, string opc>
+ : T2TwoReg<(outs rGPR:$Rd), (ins rGPR:$Rm, rot_imm:$rot), IIC_iEXTr,
+ opc, "\t$Rd, $Rm$rot", []>,
+ Requires<[IsThumb2, HasT2ExtractPack]> {
+ bits<2> rot;
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0100;
+ let Inst{22-20} = opcod;
+ let Inst{19-16} = 0b1111; // Rn
+ let Inst{15-12} = 0b1111;
+ let Inst{7} = 1;
+ let Inst{5-4} = rot;
+}
+
+/// T2I_exta_rrot - A binary operation with two forms: one whose operand is a
+/// register and one whose operand is a register rotated by 8/16/24.
+class T2I_exta_rrot<bits<3> opcod, string opc, PatFrag opnode>
+ : T2ThreeReg<(outs rGPR:$Rd),
+ (ins rGPR:$Rn, rGPR:$Rm, rot_imm:$rot),
+ IIC_iEXTAsr, opc, "\t$Rd, $Rn, $Rm$rot",
+ [(set rGPR:$Rd, (opnode rGPR:$Rn, (rotr rGPR:$Rm,rot_imm:$rot)))]>,
+ Requires<[HasT2ExtractPack, IsThumb2]> {
+ bits<2> rot;
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0100;
+ let Inst{22-20} = opcod;
+ let Inst{15-12} = 0b1111;
+ let Inst{7} = 1;
+ let Inst{5-4} = rot;
+}
+
+class T2I_exta_rrot_np<bits<3> opcod, string opc>
+ : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm,rot_imm:$rot),
+ IIC_iEXTAsr, opc, "\t$Rd, $Rn, $Rm$rot", []>,
+ Requires<[HasT2ExtractPack, IsThumb2]> {
+ bits<2> rot;
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0100;
+ let Inst{22-20} = opcod;
+ let Inst{15-12} = 0b1111;
+ let Inst{7} = 1;
+ let Inst{5-4} = rot;
+}
+
+//===----------------------------------------------------------------------===//
+// Instructions
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Miscellaneous Instructions.
+//
+
+class T2PCOneRegImm<dag oops, dag iops, InstrItinClass itin,
+ string asm, list<dag> pattern>
+ : T2XI<oops, iops, itin, asm, pattern> {
+ bits<4> Rd;
+ bits<12> label;
+
+ let Inst{11-8} = Rd;
+ let Inst{26} = label{11};
+ let Inst{14-12} = label{10-8};
+ let Inst{7-0} = label{7-0};
+}
+
+// LEApcrel - Load a pc-relative address into a register without offending the
+// assembler.
+def t2ADR : T2PCOneRegImm<(outs rGPR:$Rd),
+ (ins t2adrlabel:$addr, pred:$p),
+ IIC_iALUi, "adr{$p}.w\t$Rd, $addr", []>,
+ Sched<[WriteALU, ReadALU]> {
+ let Inst{31-27} = 0b11110;
+ let Inst{25-24} = 0b10;
+ // Inst{23:21} = '11' (add = FALSE) or '00' (add = TRUE)
+ let Inst{22} = 0;
+ let Inst{20} = 0;
+ let Inst{19-16} = 0b1111; // Rn
+ let Inst{15} = 0;
+
+ bits<4> Rd;
+ bits<13> addr;
+ let Inst{11-8} = Rd;
+ let Inst{23} = addr{12};
+ let Inst{21} = addr{12};
+ let Inst{26} = addr{11};
+ let Inst{14-12} = addr{10-8};
+ let Inst{7-0} = addr{7-0};
+
+ let DecoderMethod = "DecodeT2Adr";
+}
+
+let hasSideEffects = 0, isReMaterializable = 1 in
+def t2LEApcrel : t2PseudoInst<(outs rGPR:$Rd), (ins i32imm:$label, pred:$p),
+ 4, IIC_iALUi, []>, Sched<[WriteALU, ReadALU]>;
+let hasSideEffects = 1 in
+def t2LEApcrelJT : t2PseudoInst<(outs rGPR:$Rd),
+ (ins i32imm:$label, pred:$p),
+ 4, IIC_iALUi,
+ []>, Sched<[WriteALU, ReadALU]>;
+
+
+//===----------------------------------------------------------------------===//
+// Load / store Instructions.
+//
+
+// Load
+let canFoldAsLoad = 1, isReMaterializable = 1 in
+defm t2LDR : T2I_ld<0, 0b10, "ldr", IIC_iLoad_i, IIC_iLoad_si, GPR,
+ UnOpFrag<(load node:$Src)>>;
+
+// Loads with zero extension
+defm t2LDRH : T2I_ld<0, 0b01, "ldrh", IIC_iLoad_bh_i, IIC_iLoad_bh_si,
+ GPRnopc, UnOpFrag<(zextloadi16 node:$Src)>>;
+defm t2LDRB : T2I_ld<0, 0b00, "ldrb", IIC_iLoad_bh_i, IIC_iLoad_bh_si,
+ GPRnopc, UnOpFrag<(zextloadi8 node:$Src)>>;
+
+// Loads with sign extension
+defm t2LDRSH : T2I_ld<1, 0b01, "ldrsh", IIC_iLoad_bh_i, IIC_iLoad_bh_si,
+ GPRnopc, UnOpFrag<(sextloadi16 node:$Src)>>;
+defm t2LDRSB : T2I_ld<1, 0b00, "ldrsb", IIC_iLoad_bh_i, IIC_iLoad_bh_si,
+ GPRnopc, UnOpFrag<(sextloadi8 node:$Src)>>;
+
+let mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 in {
+// Load doubleword
+def t2LDRDi8 : T2Ii8s4<1, 0, 1, (outs rGPR:$Rt, rGPR:$Rt2),
+ (ins t2addrmode_imm8s4:$addr),
+ IIC_iLoad_d_i, "ldrd", "\t$Rt, $Rt2, $addr", "", []>;
+} // mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1
+
+// zextload i1 -> zextload i8
+def : T2Pat<(zextloadi1 t2addrmode_imm12:$addr),
+ (t2LDRBi12 t2addrmode_imm12:$addr)>;
+def : T2Pat<(zextloadi1 t2addrmode_negimm8:$addr),
+ (t2LDRBi8 t2addrmode_negimm8:$addr)>;
+def : T2Pat<(zextloadi1 t2addrmode_so_reg:$addr),
+ (t2LDRBs t2addrmode_so_reg:$addr)>;
+def : T2Pat<(zextloadi1 (ARMWrapper tconstpool:$addr)),
+ (t2LDRBpci tconstpool:$addr)>;
+
+// extload -> zextload
+// FIXME: Reduce the number of patterns by legalizing extload to zextload
+// earlier?
+def : T2Pat<(extloadi1 t2addrmode_imm12:$addr),
+ (t2LDRBi12 t2addrmode_imm12:$addr)>;
+def : T2Pat<(extloadi1 t2addrmode_negimm8:$addr),
+ (t2LDRBi8 t2addrmode_negimm8:$addr)>;
+def : T2Pat<(extloadi1 t2addrmode_so_reg:$addr),
+ (t2LDRBs t2addrmode_so_reg:$addr)>;
+def : T2Pat<(extloadi1 (ARMWrapper tconstpool:$addr)),
+ (t2LDRBpci tconstpool:$addr)>;
+
+def : T2Pat<(extloadi8 t2addrmode_imm12:$addr),
+ (t2LDRBi12 t2addrmode_imm12:$addr)>;
+def : T2Pat<(extloadi8 t2addrmode_negimm8:$addr),
+ (t2LDRBi8 t2addrmode_negimm8:$addr)>;
+def : T2Pat<(extloadi8 t2addrmode_so_reg:$addr),
+ (t2LDRBs t2addrmode_so_reg:$addr)>;
+def : T2Pat<(extloadi8 (ARMWrapper tconstpool:$addr)),
+ (t2LDRBpci tconstpool:$addr)>;
+
+def : T2Pat<(extloadi16 t2addrmode_imm12:$addr),
+ (t2LDRHi12 t2addrmode_imm12:$addr)>;
+def : T2Pat<(extloadi16 t2addrmode_negimm8:$addr),
+ (t2LDRHi8 t2addrmode_negimm8:$addr)>;
+def : T2Pat<(extloadi16 t2addrmode_so_reg:$addr),
+ (t2LDRHs t2addrmode_so_reg:$addr)>;
+def : T2Pat<(extloadi16 (ARMWrapper tconstpool:$addr)),
+ (t2LDRHpci tconstpool:$addr)>;
+
+// FIXME: The destination register of the loads and stores can't be PC, but
+// can be SP. We need another regclass (similar to rGPR) to represent
+// that. Not a pressing issue since these are selected manually,
+// not via pattern.
+
+// Indexed loads
+
+let mayLoad = 1, hasSideEffects = 0 in {
+def t2LDR_PRE : T2Ipreldst<0, 0b10, 1, 1, (outs GPR:$Rt, GPR:$Rn_wb),
+ (ins t2addrmode_imm8_pre:$addr),
+ AddrModeT2_i8, IndexModePre, IIC_iLoad_iu,
+ "ldr", "\t$Rt, $addr!", "$addr.base = $Rn_wb", []>;
+
+def t2LDR_POST : T2Ipostldst<0, 0b10, 1, 0, (outs GPR:$Rt, GPR:$Rn_wb),
+ (ins addr_offset_none:$Rn, t2am_imm8_offset:$offset),
+ AddrModeT2_i8, IndexModePost, IIC_iLoad_iu,
+ "ldr", "\t$Rt, $Rn$offset", "$Rn = $Rn_wb", []>;
+
+def t2LDRB_PRE : T2Ipreldst<0, 0b00, 1, 1, (outs GPR:$Rt, GPR:$Rn_wb),
+ (ins t2addrmode_imm8_pre:$addr),
+ AddrModeT2_i8, IndexModePre, IIC_iLoad_bh_iu,
+ "ldrb", "\t$Rt, $addr!", "$addr.base = $Rn_wb", []>;
+
+def t2LDRB_POST : T2Ipostldst<0, 0b00, 1, 0, (outs GPR:$Rt, GPR:$Rn_wb),
+ (ins addr_offset_none:$Rn, t2am_imm8_offset:$offset),
+ AddrModeT2_i8, IndexModePost, IIC_iLoad_bh_iu,
+ "ldrb", "\t$Rt, $Rn$offset", "$Rn = $Rn_wb", []>;
+
+def t2LDRH_PRE : T2Ipreldst<0, 0b01, 1, 1, (outs GPR:$Rt, GPR:$Rn_wb),
+ (ins t2addrmode_imm8_pre:$addr),
+ AddrModeT2_i8, IndexModePre, IIC_iLoad_bh_iu,
+ "ldrh", "\t$Rt, $addr!", "$addr.base = $Rn_wb", []>;
+
+def t2LDRH_POST : T2Ipostldst<0, 0b01, 1, 0, (outs GPR:$Rt, GPR:$Rn_wb),
+ (ins addr_offset_none:$Rn, t2am_imm8_offset:$offset),
+ AddrModeT2_i8, IndexModePost, IIC_iLoad_bh_iu,
+ "ldrh", "\t$Rt, $Rn$offset", "$Rn = $Rn_wb", []>;
+
+def t2LDRSB_PRE : T2Ipreldst<1, 0b00, 1, 1, (outs GPR:$Rt, GPR:$Rn_wb),
+ (ins t2addrmode_imm8_pre:$addr),
+ AddrModeT2_i8, IndexModePre, IIC_iLoad_bh_iu,
+ "ldrsb", "\t$Rt, $addr!", "$addr.base = $Rn_wb",
+ []>;
+
+def t2LDRSB_POST : T2Ipostldst<1, 0b00, 1, 0, (outs GPR:$Rt, GPR:$Rn_wb),
+ (ins addr_offset_none:$Rn, t2am_imm8_offset:$offset),
+ AddrModeT2_i8, IndexModePost, IIC_iLoad_bh_iu,
+ "ldrsb", "\t$Rt, $Rn$offset", "$Rn = $Rn_wb", []>;
+
+def t2LDRSH_PRE : T2Ipreldst<1, 0b01, 1, 1, (outs GPR:$Rt, GPR:$Rn_wb),
+ (ins t2addrmode_imm8_pre:$addr),
+ AddrModeT2_i8, IndexModePre, IIC_iLoad_bh_iu,
+ "ldrsh", "\t$Rt, $addr!", "$addr.base = $Rn_wb",
+ []>;
+
+def t2LDRSH_POST : T2Ipostldst<1, 0b01, 1, 0, (outs GPR:$Rt, GPR:$Rn_wb),
+ (ins addr_offset_none:$Rn, t2am_imm8_offset:$offset),
+ AddrModeT2_i8, IndexModePost, IIC_iLoad_bh_iu,
+ "ldrsh", "\t$Rt, $Rn$offset", "$Rn = $Rn_wb", []>;
+} // mayLoad = 1, hasSideEffects = 0
+
+// LDRT, LDRBT, LDRHT, LDRSBT, LDRSHT all have offset mode (PUW=0b110).
+// Ref: A8.6.57 LDR (immediate, Thumb) Encoding T4
+class T2IldT<bit signed, bits<2> type, string opc, InstrItinClass ii>
+ : T2Ii8<(outs rGPR:$Rt), (ins t2addrmode_posimm8:$addr), ii, opc,
+ "\t$Rt, $addr", []> {
+ bits<4> Rt;
+ bits<13> addr;
+ let Inst{31-27} = 0b11111;
+ let Inst{26-25} = 0b00;
+ let Inst{24} = signed;
+ let Inst{23} = 0;
+ let Inst{22-21} = type;
+ let Inst{20} = 1; // load
+ let Inst{19-16} = addr{12-9};
+ let Inst{15-12} = Rt;
+ let Inst{11} = 1;
+ let Inst{10-8} = 0b110; // PUW.
+ let Inst{7-0} = addr{7-0};
+
+ let DecoderMethod = "DecodeT2LoadT";
+}
+
+def t2LDRT : T2IldT<0, 0b10, "ldrt", IIC_iLoad_i>;
+def t2LDRBT : T2IldT<0, 0b00, "ldrbt", IIC_iLoad_bh_i>;
+def t2LDRHT : T2IldT<0, 0b01, "ldrht", IIC_iLoad_bh_i>;
+def t2LDRSBT : T2IldT<1, 0b00, "ldrsbt", IIC_iLoad_bh_i>;
+def t2LDRSHT : T2IldT<1, 0b01, "ldrsht", IIC_iLoad_bh_i>;
+
+class T2Ildacq<bits<4> bits23_20, bits<2> bit54, dag oops, dag iops,
+ string opc, string asm, list<dag> pattern>
+ : Thumb2I<oops, iops, AddrModeNone, 4, NoItinerary,
+ opc, asm, "", pattern>, Requires<[IsThumb, HasV8]> {
+ bits<4> Rt;
+ bits<4> addr;
+
+ let Inst{31-27} = 0b11101;
+ let Inst{26-24} = 0b000;
+ let Inst{23-20} = bits23_20;
+ let Inst{11-6} = 0b111110;
+ let Inst{5-4} = bit54;
+ let Inst{3-0} = 0b1111;
+
+ // Encode instruction operands
+ let Inst{19-16} = addr;
+ let Inst{15-12} = Rt;
+}
+
+def t2LDA : T2Ildacq<0b1101, 0b10, (outs rGPR:$Rt),
+ (ins addr_offset_none:$addr), "lda", "\t$Rt, $addr", []>;
+def t2LDAB : T2Ildacq<0b1101, 0b00, (outs rGPR:$Rt),
+ (ins addr_offset_none:$addr), "ldab", "\t$Rt, $addr", []>;
+def t2LDAH : T2Ildacq<0b1101, 0b01, (outs rGPR:$Rt),
+ (ins addr_offset_none:$addr), "ldah", "\t$Rt, $addr", []>;
+
+// Store
+defm t2STR :T2I_st<0b10,"str", IIC_iStore_i, IIC_iStore_si, GPR,
+ BinOpFrag<(store node:$LHS, node:$RHS)>>;
+defm t2STRB:T2I_st<0b00,"strb", IIC_iStore_bh_i, IIC_iStore_bh_si,
+ rGPR, BinOpFrag<(truncstorei8 node:$LHS, node:$RHS)>>;
+defm t2STRH:T2I_st<0b01,"strh", IIC_iStore_bh_i, IIC_iStore_bh_si,
+ rGPR, BinOpFrag<(truncstorei16 node:$LHS, node:$RHS)>>;
+
+// Store doubleword
+let mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1 in
+def t2STRDi8 : T2Ii8s4<1, 0, 0, (outs),
+ (ins rGPR:$Rt, rGPR:$Rt2, t2addrmode_imm8s4:$addr),
+ IIC_iStore_d_r, "strd", "\t$Rt, $Rt2, $addr", "", []>;
+
+// Indexed stores
+
+let mayStore = 1, hasSideEffects = 0 in {
+def t2STR_PRE : T2Ipreldst<0, 0b10, 0, 1, (outs GPRnopc:$Rn_wb),
+ (ins GPRnopc:$Rt, t2addrmode_imm8_pre:$addr),
+ AddrModeT2_i8, IndexModePre, IIC_iStore_iu,
+ "str", "\t$Rt, $addr!",
+ "$addr.base = $Rn_wb,@earlyclobber $Rn_wb", []>;
+
+def t2STRH_PRE : T2Ipreldst<0, 0b01, 0, 1, (outs GPRnopc:$Rn_wb),
+ (ins rGPR:$Rt, t2addrmode_imm8_pre:$addr),
+ AddrModeT2_i8, IndexModePre, IIC_iStore_iu,
+ "strh", "\t$Rt, $addr!",
+ "$addr.base = $Rn_wb,@earlyclobber $Rn_wb", []>;
+
+def t2STRB_PRE : T2Ipreldst<0, 0b00, 0, 1, (outs GPRnopc:$Rn_wb),
+ (ins rGPR:$Rt, t2addrmode_imm8_pre:$addr),
+ AddrModeT2_i8, IndexModePre, IIC_iStore_bh_iu,
+ "strb", "\t$Rt, $addr!",
+ "$addr.base = $Rn_wb,@earlyclobber $Rn_wb", []>;
+} // mayStore = 1, hasSideEffects = 0
+
+def t2STR_POST : T2Ipostldst<0, 0b10, 0, 0, (outs GPRnopc:$Rn_wb),
+ (ins GPRnopc:$Rt, addr_offset_none:$Rn,
+ t2am_imm8_offset:$offset),
+ AddrModeT2_i8, IndexModePost, IIC_iStore_iu,
+ "str", "\t$Rt, $Rn$offset",
+ "$Rn = $Rn_wb,@earlyclobber $Rn_wb",
+ [(set GPRnopc:$Rn_wb,
+ (post_store GPRnopc:$Rt, addr_offset_none:$Rn,
+ t2am_imm8_offset:$offset))]>;
+
+def t2STRH_POST : T2Ipostldst<0, 0b01, 0, 0, (outs GPRnopc:$Rn_wb),
+ (ins rGPR:$Rt, addr_offset_none:$Rn,
+ t2am_imm8_offset:$offset),
+ AddrModeT2_i8, IndexModePost, IIC_iStore_bh_iu,
+ "strh", "\t$Rt, $Rn$offset",
+ "$Rn = $Rn_wb,@earlyclobber $Rn_wb",
+ [(set GPRnopc:$Rn_wb,
+ (post_truncsti16 rGPR:$Rt, addr_offset_none:$Rn,
+ t2am_imm8_offset:$offset))]>;
+
+def t2STRB_POST : T2Ipostldst<0, 0b00, 0, 0, (outs GPRnopc:$Rn_wb),
+ (ins rGPR:$Rt, addr_offset_none:$Rn,
+ t2am_imm8_offset:$offset),
+ AddrModeT2_i8, IndexModePost, IIC_iStore_bh_iu,
+ "strb", "\t$Rt, $Rn$offset",
+ "$Rn = $Rn_wb,@earlyclobber $Rn_wb",
+ [(set GPRnopc:$Rn_wb,
+ (post_truncsti8 rGPR:$Rt, addr_offset_none:$Rn,
+ t2am_imm8_offset:$offset))]>;
+
+// Pseudo-instructions for pattern matching the pre-indexed stores. We can't
+// put the patterns on the instruction definitions directly as ISel wants
+// the address base and offset to be separate operands, not a single
+// complex operand like we represent the instructions themselves. The
+// pseudos map between the two.
+let usesCustomInserter = 1,
+ Constraints = "$Rn = $Rn_wb,@earlyclobber $Rn_wb" in {
+def t2STR_preidx: t2PseudoInst<(outs GPRnopc:$Rn_wb),
+ (ins rGPR:$Rt, GPRnopc:$Rn, t2am_imm8_offset:$offset, pred:$p),
+ 4, IIC_iStore_ru,
+ [(set GPRnopc:$Rn_wb,
+ (pre_store rGPR:$Rt, GPRnopc:$Rn, t2am_imm8_offset:$offset))]>;
+def t2STRB_preidx: t2PseudoInst<(outs GPRnopc:$Rn_wb),
+ (ins rGPR:$Rt, GPRnopc:$Rn, t2am_imm8_offset:$offset, pred:$p),
+ 4, IIC_iStore_ru,
+ [(set GPRnopc:$Rn_wb,
+ (pre_truncsti8 rGPR:$Rt, GPRnopc:$Rn, t2am_imm8_offset:$offset))]>;
+def t2STRH_preidx: t2PseudoInst<(outs GPRnopc:$Rn_wb),
+ (ins rGPR:$Rt, GPRnopc:$Rn, t2am_imm8_offset:$offset, pred:$p),
+ 4, IIC_iStore_ru,
+ [(set GPRnopc:$Rn_wb,
+ (pre_truncsti16 rGPR:$Rt, GPRnopc:$Rn, t2am_imm8_offset:$offset))]>;
+}
+
+// STRT, STRBT, STRHT all have offset mode (PUW=0b110) and are for disassembly
+// only.
+// Ref: A8.6.193 STR (immediate, Thumb) Encoding T4
+class T2IstT<bits<2> type, string opc, InstrItinClass ii>
+ : T2Ii8<(outs rGPR:$Rt), (ins t2addrmode_imm8:$addr), ii, opc,
+ "\t$Rt, $addr", []> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-25} = 0b00;
+ let Inst{24} = 0; // not signed
+ let Inst{23} = 0;
+ let Inst{22-21} = type;
+ let Inst{20} = 0; // store
+ let Inst{11} = 1;
+ let Inst{10-8} = 0b110; // PUW
+
+ bits<4> Rt;
+ bits<13> addr;
+ let Inst{15-12} = Rt;
+ let Inst{19-16} = addr{12-9};
+ let Inst{7-0} = addr{7-0};
+}
+
+def t2STRT : T2IstT<0b10, "strt", IIC_iStore_i>;
+def t2STRBT : T2IstT<0b00, "strbt", IIC_iStore_bh_i>;
+def t2STRHT : T2IstT<0b01, "strht", IIC_iStore_bh_i>;
+
+// ldrd / strd pre / post variants
+
+let mayLoad = 1 in
+def t2LDRD_PRE : T2Ii8s4<1, 1, 1, (outs rGPR:$Rt, rGPR:$Rt2, GPR:$wb),
+ (ins t2addrmode_imm8s4_pre:$addr), IIC_iLoad_d_ru,
+ "ldrd", "\t$Rt, $Rt2, $addr!", "$addr.base = $wb", []> {
+ let DecoderMethod = "DecodeT2LDRDPreInstruction";
+}
+
+let mayLoad = 1 in
+def t2LDRD_POST : T2Ii8s4post<0, 1, 1, (outs rGPR:$Rt, rGPR:$Rt2, GPR:$wb),
+ (ins addr_offset_none:$addr, t2am_imm8s4_offset:$imm),
+ IIC_iLoad_d_ru, "ldrd", "\t$Rt, $Rt2, $addr$imm",
+ "$addr.base = $wb", []>;
+
+let mayStore = 1 in
+def t2STRD_PRE : T2Ii8s4<1, 1, 0, (outs GPR:$wb),
+ (ins rGPR:$Rt, rGPR:$Rt2, t2addrmode_imm8s4_pre:$addr),
+ IIC_iStore_d_ru, "strd", "\t$Rt, $Rt2, $addr!",
+ "$addr.base = $wb", []> {
+ let DecoderMethod = "DecodeT2STRDPreInstruction";
+}
+
+let mayStore = 1 in
+def t2STRD_POST : T2Ii8s4post<0, 1, 0, (outs GPR:$wb),
+ (ins rGPR:$Rt, rGPR:$Rt2, addr_offset_none:$addr,
+ t2am_imm8s4_offset:$imm),
+ IIC_iStore_d_ru, "strd", "\t$Rt, $Rt2, $addr$imm",
+ "$addr.base = $wb", []>;
+
+class T2Istrrel<bits<2> bit54, dag oops, dag iops,
+ string opc, string asm, list<dag> pattern>
+ : Thumb2I<oops, iops, AddrModeNone, 4, NoItinerary, opc,
+ asm, "", pattern>, Requires<[IsThumb, HasV8]> {
+ bits<4> Rt;
+ bits<4> addr;
+
+ let Inst{31-27} = 0b11101;
+ let Inst{26-20} = 0b0001100;
+ let Inst{11-6} = 0b111110;
+ let Inst{5-4} = bit54;
+ let Inst{3-0} = 0b1111;
+
+ // Encode instruction operands
+ let Inst{19-16} = addr;
+ let Inst{15-12} = Rt;
+}
+
+def t2STL : T2Istrrel<0b10, (outs), (ins rGPR:$Rt, addr_offset_none:$addr),
+ "stl", "\t$Rt, $addr", []>;
+def t2STLB : T2Istrrel<0b00, (outs), (ins rGPR:$Rt, addr_offset_none:$addr),
+ "stlb", "\t$Rt, $addr", []>;
+def t2STLH : T2Istrrel<0b01, (outs), (ins rGPR:$Rt, addr_offset_none:$addr),
+ "stlh", "\t$Rt, $addr", []>;
+
+// T2Ipl (Preload Data/Instruction) signals the memory system of possible future
+// data/instruction access.
+// instr_write is inverted for Thumb mode: (prefetch 3) -> (preload 0),
+// (prefetch 1) -> (preload 2), (prefetch 2) -> (preload 1).
+multiclass T2Ipl<bits<1> write, bits<1> instr, string opc> {
+
+ def i12 : T2Ii12<(outs), (ins t2addrmode_imm12:$addr), IIC_Preload, opc,
+ "\t$addr",
+ [(ARMPreload t2addrmode_imm12:$addr, (i32 write), (i32 instr))]>,
+ Sched<[WritePreLd]> {
+ let Inst{31-25} = 0b1111100;
+ let Inst{24} = instr;
+ let Inst{23} = 1;
+ let Inst{22} = 0;
+ let Inst{21} = write;
+ let Inst{20} = 1;
+ let Inst{15-12} = 0b1111;
+
+ bits<17> addr;
+ let Inst{19-16} = addr{16-13}; // Rn
+ let Inst{11-0} = addr{11-0}; // imm12
+
+ let DecoderMethod = "DecodeT2LoadImm12";
+ }
+
+ def i8 : T2Ii8<(outs), (ins t2addrmode_negimm8:$addr), IIC_Preload, opc,
+ "\t$addr",
+ [(ARMPreload t2addrmode_negimm8:$addr, (i32 write), (i32 instr))]>,
+ Sched<[WritePreLd]> {
+ let Inst{31-25} = 0b1111100;
+ let Inst{24} = instr;
+ let Inst{23} = 0; // U = 0
+ let Inst{22} = 0;
+ let Inst{21} = write;
+ let Inst{20} = 1;
+ let Inst{15-12} = 0b1111;
+ let Inst{11-8} = 0b1100;
+
+ bits<13> addr;
+ let Inst{19-16} = addr{12-9}; // Rn
+ let Inst{7-0} = addr{7-0}; // imm8
+
+ let DecoderMethod = "DecodeT2LoadImm8";
+ }
+
+ def s : T2Iso<(outs), (ins t2addrmode_so_reg:$addr), IIC_Preload, opc,
+ "\t$addr",
+ [(ARMPreload t2addrmode_so_reg:$addr, (i32 write), (i32 instr))]>,
+ Sched<[WritePreLd]> {
+ let Inst{31-25} = 0b1111100;
+ let Inst{24} = instr;
+ let Inst{23} = 0; // add = TRUE for T1
+ let Inst{22} = 0;
+ let Inst{21} = write;
+ let Inst{20} = 1;
+ let Inst{15-12} = 0b1111;
+ let Inst{11-6} = 0b000000;
+
+ bits<10> addr;
+ let Inst{19-16} = addr{9-6}; // Rn
+ let Inst{3-0} = addr{5-2}; // Rm
+ let Inst{5-4} = addr{1-0}; // imm2
+
+ let DecoderMethod = "DecodeT2LoadShift";
+ }
+}
+
+defm t2PLD : T2Ipl<0, 0, "pld">, Requires<[IsThumb2]>;
+defm t2PLDW : T2Ipl<1, 0, "pldw">, Requires<[IsThumb2,HasV7,HasMP]>;
+defm t2PLI : T2Ipl<0, 1, "pli">, Requires<[IsThumb2,HasV7]>;
+
+// pci variant is very similar to i12, but supports negative offsets
+// from the PC. Only PLD and PLI have pci variants (not PLDW)
+class T2Iplpci<bits<1> inst, string opc> : T2Iso<(outs), (ins t2ldrlabel:$addr),
+ IIC_Preload, opc, "\t$addr",
+ [(ARMPreload (ARMWrapper tconstpool:$addr),
+ (i32 0), (i32 inst))]>, Sched<[WritePreLd]> {
+ let Inst{31-25} = 0b1111100;
+ let Inst{24} = inst;
+ let Inst{22-20} = 0b001;
+ let Inst{19-16} = 0b1111;
+ let Inst{15-12} = 0b1111;
+
+ bits<13> addr;
+ let Inst{23} = addr{12}; // add = (U == '1')
+ let Inst{11-0} = addr{11-0}; // imm12
+
+ let DecoderMethod = "DecodeT2LoadLabel";
+}
+
+def t2PLDpci : T2Iplpci<0, "pld">, Requires<[IsThumb2]>;
+def t2PLIpci : T2Iplpci<1, "pli">, Requires<[IsThumb2,HasV7]>;
+
+//===----------------------------------------------------------------------===//
+// Load / store multiple Instructions.
+//
+
+multiclass thumb2_ld_mult<string asm, InstrItinClass itin,
+ InstrItinClass itin_upd, bit L_bit> {
+ def IA :
+ T2XI<(outs), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
+ itin, !strconcat(asm, "${p}.w\t$Rn, $regs"), []> {
+ bits<4> Rn;
+ bits<16> regs;
+
+ let Inst{31-27} = 0b11101;
+ let Inst{26-25} = 0b00;
+ let Inst{24-23} = 0b01; // Increment After
+ let Inst{22} = 0;
+ let Inst{21} = 0; // No writeback
+ let Inst{20} = L_bit;
+ let Inst{19-16} = Rn;
+ let Inst{15-0} = regs;
+ }
+ def IA_UPD :
+ T2XIt<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
+ itin_upd, !strconcat(asm, "${p}.w\t$Rn!, $regs"), "$Rn = $wb", []> {
+ bits<4> Rn;
+ bits<16> regs;
+
+ let Inst{31-27} = 0b11101;
+ let Inst{26-25} = 0b00;
+ let Inst{24-23} = 0b01; // Increment After
+ let Inst{22} = 0;
+ let Inst{21} = 1; // Writeback
+ let Inst{20} = L_bit;
+ let Inst{19-16} = Rn;
+ let Inst{15-0} = regs;
+ }
+ def DB :
+ T2XI<(outs), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
+ itin, !strconcat(asm, "db${p}\t$Rn, $regs"), []> {
+ bits<4> Rn;
+ bits<16> regs;
+
+ let Inst{31-27} = 0b11101;
+ let Inst{26-25} = 0b00;
+ let Inst{24-23} = 0b10; // Decrement Before
+ let Inst{22} = 0;
+ let Inst{21} = 0; // No writeback
+ let Inst{20} = L_bit;
+ let Inst{19-16} = Rn;
+ let Inst{15-0} = regs;
+ }
+ def DB_UPD :
+ T2XIt<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
+ itin_upd, !strconcat(asm, "db${p}\t$Rn!, $regs"), "$Rn = $wb", []> {
+ bits<4> Rn;
+ bits<16> regs;
+
+ let Inst{31-27} = 0b11101;
+ let Inst{26-25} = 0b00;
+ let Inst{24-23} = 0b10; // Decrement Before
+ let Inst{22} = 0;
+ let Inst{21} = 1; // Writeback
+ let Inst{20} = L_bit;
+ let Inst{19-16} = Rn;
+ let Inst{15-0} = regs;
+ }
+}
+
+let hasSideEffects = 0 in {
+
+let mayLoad = 1, hasExtraDefRegAllocReq = 1 in
+defm t2LDM : thumb2_ld_mult<"ldm", IIC_iLoad_m, IIC_iLoad_mu, 1>;
+
+multiclass thumb2_st_mult<string asm, InstrItinClass itin,
+ InstrItinClass itin_upd, bit L_bit> {
+ def IA :
+ T2XI<(outs), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
+ itin, !strconcat(asm, "${p}.w\t$Rn, $regs"), []> {
+ bits<4> Rn;
+ bits<16> regs;
+
+ let Inst{31-27} = 0b11101;
+ let Inst{26-25} = 0b00;
+ let Inst{24-23} = 0b01; // Increment After
+ let Inst{22} = 0;
+ let Inst{21} = 0; // No writeback
+ let Inst{20} = L_bit;
+ let Inst{19-16} = Rn;
+ let Inst{15} = 0;
+ let Inst{14} = regs{14};
+ let Inst{13} = 0;
+ let Inst{12-0} = regs{12-0};
+ }
+ def IA_UPD :
+ T2XIt<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
+ itin_upd, !strconcat(asm, "${p}.w\t$Rn!, $regs"), "$Rn = $wb", []> {
+ bits<4> Rn;
+ bits<16> regs;
+
+ let Inst{31-27} = 0b11101;
+ let Inst{26-25} = 0b00;
+ let Inst{24-23} = 0b01; // Increment After
+ let Inst{22} = 0;
+ let Inst{21} = 1; // Writeback
+ let Inst{20} = L_bit;
+ let Inst{19-16} = Rn;
+ let Inst{15} = 0;
+ let Inst{14} = regs{14};
+ let Inst{13} = 0;
+ let Inst{12-0} = regs{12-0};
+ }
+ def DB :
+ T2XI<(outs), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
+ itin, !strconcat(asm, "db${p}\t$Rn, $regs"), []> {
+ bits<4> Rn;
+ bits<16> regs;
+
+ let Inst{31-27} = 0b11101;
+ let Inst{26-25} = 0b00;
+ let Inst{24-23} = 0b10; // Decrement Before
+ let Inst{22} = 0;
+ let Inst{21} = 0; // No writeback
+ let Inst{20} = L_bit;
+ let Inst{19-16} = Rn;
+ let Inst{15} = 0;
+ let Inst{14} = regs{14};
+ let Inst{13} = 0;
+ let Inst{12-0} = regs{12-0};
+ }
+ def DB_UPD :
+ T2XIt<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
+ itin_upd, !strconcat(asm, "db${p}\t$Rn!, $regs"), "$Rn = $wb", []> {
+ bits<4> Rn;
+ bits<16> regs;
+
+ let Inst{31-27} = 0b11101;
+ let Inst{26-25} = 0b00;
+ let Inst{24-23} = 0b10; // Decrement Before
+ let Inst{22} = 0;
+ let Inst{21} = 1; // Writeback
+ let Inst{20} = L_bit;
+ let Inst{19-16} = Rn;
+ let Inst{15} = 0;
+ let Inst{14} = regs{14};
+ let Inst{13} = 0;
+ let Inst{12-0} = regs{12-0};
+ }
+}
+
+
+let mayStore = 1, hasExtraSrcRegAllocReq = 1 in
+defm t2STM : thumb2_st_mult<"stm", IIC_iStore_m, IIC_iStore_mu, 0>;
+
+} // hasSideEffects
+
+
+//===----------------------------------------------------------------------===//
+// Move Instructions.
+//
+
+let hasSideEffects = 0 in
+def t2MOVr : T2sTwoReg<(outs GPRnopc:$Rd), (ins GPR:$Rm), IIC_iMOVr,
+ "mov", ".w\t$Rd, $Rm", []>, Sched<[WriteALU]> {
+ let Inst{31-27} = 0b11101;
+ let Inst{26-25} = 0b01;
+ let Inst{24-21} = 0b0010;
+ let Inst{19-16} = 0b1111; // Rn
+ let Inst{14-12} = 0b000;
+ let Inst{7-4} = 0b0000;
+}
+def : t2InstAlias<"mov${p}.w $Rd, $Rm", (t2MOVr GPRnopc:$Rd, GPR:$Rm,
+ pred:$p, zero_reg)>;
+def : t2InstAlias<"movs${p}.w $Rd, $Rm", (t2MOVr GPRnopc:$Rd, GPR:$Rm,
+ pred:$p, CPSR)>;
+def : t2InstAlias<"movs${p} $Rd, $Rm", (t2MOVr GPRnopc:$Rd, GPR:$Rm,
+ pred:$p, CPSR)>;
+
+// AddedComplexity to ensure isel tries t2MOVi before t2MOVi16.
+let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1,
+ AddedComplexity = 1 in
+def t2MOVi : T2sOneRegImm<(outs rGPR:$Rd), (ins t2_so_imm:$imm), IIC_iMOVi,
+ "mov", ".w\t$Rd, $imm",
+ [(set rGPR:$Rd, t2_so_imm:$imm)]>, Sched<[WriteALU]> {
+ let Inst{31-27} = 0b11110;
+ let Inst{25} = 0;
+ let Inst{24-21} = 0b0010;
+ let Inst{19-16} = 0b1111; // Rn
+ let Inst{15} = 0;
+}
+
+// cc_out is handled as part of the explicit mnemonic in the parser for 'mov'.
+// Use aliases to get that to play nice here.
+def : t2InstAlias<"movs${p}.w $Rd, $imm", (t2MOVi rGPR:$Rd, t2_so_imm:$imm,
+ pred:$p, CPSR)>;
+def : t2InstAlias<"movs${p} $Rd, $imm", (t2MOVi rGPR:$Rd, t2_so_imm:$imm,
+ pred:$p, CPSR)>;
+
+def : t2InstAlias<"mov${p}.w $Rd, $imm", (t2MOVi rGPR:$Rd, t2_so_imm:$imm,
+ pred:$p, zero_reg)>;
+def : t2InstAlias<"mov${p} $Rd, $imm", (t2MOVi rGPR:$Rd, t2_so_imm:$imm,
+ pred:$p, zero_reg)>;
+
+let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in
+def t2MOVi16 : T2I<(outs rGPR:$Rd), (ins imm0_65535_expr:$imm), IIC_iMOVi,
+ "movw", "\t$Rd, $imm",
+ [(set rGPR:$Rd, imm0_65535:$imm)]>, Sched<[WriteALU]> {
+ let Inst{31-27} = 0b11110;
+ let Inst{25} = 1;
+ let Inst{24-21} = 0b0010;
+ let Inst{20} = 0; // The S bit.
+ let Inst{15} = 0;
+
+ bits<4> Rd;
+ bits<16> imm;
+
+ let Inst{11-8} = Rd;
+ let Inst{19-16} = imm{15-12};
+ let Inst{26} = imm{11};
+ let Inst{14-12} = imm{10-8};
+ let Inst{7-0} = imm{7-0};
+ let DecoderMethod = "DecodeT2MOVTWInstruction";
+}
+
+def : t2InstAlias<"mov${p} $Rd, $imm",
+ (t2MOVi16 rGPR:$Rd, imm256_65535_expr:$imm, pred:$p)>;
+
+def t2MOVi16_ga_pcrel : PseudoInst<(outs rGPR:$Rd),
+ (ins i32imm:$addr, pclabel:$id), IIC_iMOVi, []>;
+
+let Constraints = "$src = $Rd" in {
+def t2MOVTi16 : T2I<(outs rGPR:$Rd),
+ (ins rGPR:$src, imm0_65535_expr:$imm), IIC_iMOVi,
+ "movt", "\t$Rd, $imm",
+ [(set rGPR:$Rd,
+ (or (and rGPR:$src, 0xffff), lo16AllZero:$imm))]>,
+ Sched<[WriteALU]> {
+ let Inst{31-27} = 0b11110;
+ let Inst{25} = 1;
+ let Inst{24-21} = 0b0110;
+ let Inst{20} = 0; // The S bit.
+ let Inst{15} = 0;
+
+ bits<4> Rd;
+ bits<16> imm;
+
+ let Inst{11-8} = Rd;
+ let Inst{19-16} = imm{15-12};
+ let Inst{26} = imm{11};
+ let Inst{14-12} = imm{10-8};
+ let Inst{7-0} = imm{7-0};
+ let DecoderMethod = "DecodeT2MOVTWInstruction";
+}
+
+def t2MOVTi16_ga_pcrel : PseudoInst<(outs rGPR:$Rd),
+ (ins rGPR:$src, i32imm:$addr, pclabel:$id), IIC_iMOVi, []>,
+ Sched<[WriteALU]>;
+} // Constraints
+
+def : T2Pat<(or rGPR:$src, 0xffff0000), (t2MOVTi16 rGPR:$src, 0xffff)>;
+
+//===----------------------------------------------------------------------===//
+// Extend Instructions.
+//
+
+// Sign extenders
+
+def t2SXTB : T2I_ext_rrot<0b100, "sxtb",
+ UnOpFrag<(sext_inreg node:$Src, i8)>>;
+def t2SXTH : T2I_ext_rrot<0b000, "sxth",
+ UnOpFrag<(sext_inreg node:$Src, i16)>>;
+def t2SXTB16 : T2I_ext_rrot_sxtb16<0b010, "sxtb16">;
+
+def t2SXTAB : T2I_exta_rrot<0b100, "sxtab",
+ BinOpFrag<(add node:$LHS, (sext_inreg node:$RHS, i8))>>;
+def t2SXTAH : T2I_exta_rrot<0b000, "sxtah",
+ BinOpFrag<(add node:$LHS, (sext_inreg node:$RHS,i16))>>;
+def t2SXTAB16 : T2I_exta_rrot_np<0b010, "sxtab16">;
+
+// A simple right-shift can also be used in most cases (the exception is the
+// SXTH operations with a rotate of 24: there the non-contiguous bits are
+// relevant).
+def : Pat<(add rGPR:$Rn, (sext_inreg (srl rGPR:$Rm, rot_imm:$rot), i8)),
+ (t2SXTAB rGPR:$Rn, rGPR:$Rm, rot_imm:$rot)>,
+ Requires<[HasT2ExtractPack, IsThumb2]>;
+def : Pat<(add rGPR:$Rn, (sext_inreg (srl rGPR:$Rm, imm8_or_16:$rot), i16)),
+ (t2SXTAH rGPR:$Rn, rGPR:$Rm, rot_imm:$rot)>,
+ Requires<[HasT2ExtractPack, IsThumb2]>;
+
+// Zero extenders
+
+let AddedComplexity = 16 in {
+def t2UXTB : T2I_ext_rrot<0b101, "uxtb",
+ UnOpFrag<(and node:$Src, 0x000000FF)>>;
+def t2UXTH : T2I_ext_rrot<0b001, "uxth",
+ UnOpFrag<(and node:$Src, 0x0000FFFF)>>;
+def t2UXTB16 : T2I_ext_rrot_uxtb16<0b011, "uxtb16",
+ UnOpFrag<(and node:$Src, 0x00FF00FF)>>;
+
+// FIXME: This pattern incorrectly assumes the shl operator is a rotate.
+// The transformation should probably be done as a combiner action
+// instead so we can include a check for masking back in the upper
+// eight bits of the source into the lower eight bits of the result.
+//def : T2Pat<(and (shl rGPR:$Src, (i32 8)), 0xFF00FF),
+// (t2UXTB16 rGPR:$Src, 3)>,
+// Requires<[HasT2ExtractPack, IsThumb2]>;
+def : T2Pat<(and (srl rGPR:$Src, (i32 8)), 0xFF00FF),
+ (t2UXTB16 rGPR:$Src, 1)>,
+ Requires<[HasT2ExtractPack, IsThumb2]>;
+
+def t2UXTAB : T2I_exta_rrot<0b101, "uxtab",
+ BinOpFrag<(add node:$LHS, (and node:$RHS, 0x00FF))>>;
+def t2UXTAH : T2I_exta_rrot<0b001, "uxtah",
+ BinOpFrag<(add node:$LHS, (and node:$RHS, 0xFFFF))>>;
+def t2UXTAB16 : T2I_exta_rrot_np<0b011, "uxtab16">;
+
+def : Pat<(add rGPR:$Rn, (and (srl rGPR:$Rm, rot_imm:$rot), 0xFF)),
+ (t2UXTAB rGPR:$Rn, rGPR:$Rm, rot_imm:$rot)>,
+ Requires<[HasT2ExtractPack, IsThumb2]>;
+def : Pat<(add rGPR:$Rn, (and (srl rGPR:$Rm, imm8_or_16:$rot), 0xFFFF)),
+ (t2UXTAH rGPR:$Rn, rGPR:$Rm, rot_imm:$rot)>,
+ Requires<[HasT2ExtractPack, IsThumb2]>;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Arithmetic Instructions.
+//
+
+defm t2ADD : T2I_bin_ii12rs<0b000, "add",
+ BinOpFrag<(add node:$LHS, node:$RHS)>, 1>;
+defm t2SUB : T2I_bin_ii12rs<0b101, "sub",
+ BinOpFrag<(sub node:$LHS, node:$RHS)>>;
+
+// ADD and SUB with 's' bit set. No 12-bit immediate (T4) variants.
+//
+// Currently, t2ADDS/t2SUBS are pseudo opcodes that exist only in the
+// selection DAG. They are "lowered" to real t2ADD/t2SUB opcodes by
+// AdjustInstrPostInstrSelection where we determine whether or not to
+// set the "s" bit based on CPSR liveness.
+//
+// FIXME: Eliminate t2ADDS/t2SUBS pseudo opcodes after adding tablegen
+// support for an optional CPSR definition that corresponds to the DAG
+// node's second value. We can then eliminate the implicit def of CPSR.
+defm t2ADDS : T2I_bin_s_irs <IIC_iALUi, IIC_iALUr, IIC_iALUsi,
+ BinOpFrag<(ARMaddc node:$LHS, node:$RHS)>, 1>;
+defm t2SUBS : T2I_bin_s_irs <IIC_iALUi, IIC_iALUr, IIC_iALUsi,
+ BinOpFrag<(ARMsubc node:$LHS, node:$RHS)>>;
+
+let hasPostISelHook = 1 in {
+defm t2ADC : T2I_adde_sube_irs<0b1010, "adc",
+ BinOpWithFlagFrag<(ARMadde node:$LHS, node:$RHS, node:$FLAG)>, 1>;
+defm t2SBC : T2I_adde_sube_irs<0b1011, "sbc",
+ BinOpWithFlagFrag<(ARMsube node:$LHS, node:$RHS, node:$FLAG)>>;
+}
+
+// RSB
+defm t2RSB : T2I_rbin_irs <0b1110, "rsb",
+ BinOpFrag<(sub node:$LHS, node:$RHS)>>;
+
+// FIXME: Eliminate them if we can write def : Pat patterns which defines
+// CPSR and the implicit def of CPSR is not needed.
+defm t2RSBS : T2I_rbin_s_is <BinOpFrag<(ARMsubc node:$LHS, node:$RHS)>>;
+
+// (sub X, imm) gets canonicalized to (add X, -imm). Match this form.
+// The assume-no-carry-in form uses the negation of the input since add/sub
+// assume opposite meanings of the carry flag (i.e., carry == !borrow).
+// See the definition of AddWithCarry() in the ARM ARM A2.2.1 for the gory
+// details.
+// The AddedComplexity preferences the first variant over the others since
+// it can be shrunk to a 16-bit wide encoding, while the others cannot.
+let AddedComplexity = 1 in
+def : T2Pat<(add GPR:$src, imm1_255_neg:$imm),
+ (t2SUBri GPR:$src, imm1_255_neg:$imm)>;
+def : T2Pat<(add GPR:$src, t2_so_imm_neg:$imm),
+ (t2SUBri GPR:$src, t2_so_imm_neg:$imm)>;
+def : T2Pat<(add GPR:$src, imm0_4095_neg:$imm),
+ (t2SUBri12 GPR:$src, imm0_4095_neg:$imm)>;
+def : T2Pat<(add GPR:$src, imm0_65535_neg:$imm),
+ (t2SUBrr GPR:$src, (t2MOVi16 (imm_neg_XFORM imm:$imm)))>;
+
+let AddedComplexity = 1 in
+def : T2Pat<(ARMaddc rGPR:$src, imm1_255_neg:$imm),
+ (t2SUBSri rGPR:$src, imm1_255_neg:$imm)>;
+def : T2Pat<(ARMaddc rGPR:$src, t2_so_imm_neg:$imm),
+ (t2SUBSri rGPR:$src, t2_so_imm_neg:$imm)>;
+def : T2Pat<(ARMaddc rGPR:$src, imm0_65535_neg:$imm),
+ (t2SUBSrr rGPR:$src, (t2MOVi16 (imm_neg_XFORM imm:$imm)))>;
+// The with-carry-in form matches bitwise not instead of the negation.
+// Effectively, the inverse interpretation of the carry flag already accounts
+// for part of the negation.
+let AddedComplexity = 1 in
+def : T2Pat<(ARMadde rGPR:$src, imm0_255_not:$imm, CPSR),
+ (t2SBCri rGPR:$src, imm0_255_not:$imm)>;
+def : T2Pat<(ARMadde rGPR:$src, t2_so_imm_not:$imm, CPSR),
+ (t2SBCri rGPR:$src, t2_so_imm_not:$imm)>;
+def : T2Pat<(ARMadde rGPR:$src, imm0_65535_neg:$imm, CPSR),
+ (t2SBCrr rGPR:$src, (t2MOVi16 (imm_not_XFORM imm:$imm)))>;
+
+// Select Bytes -- for disassembly only
+
+def t2SEL : T2ThreeReg<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
+ NoItinerary, "sel", "\t$Rd, $Rn, $Rm", []>,
+ Requires<[IsThumb2, HasDSP]> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-24} = 0b010;
+ let Inst{23} = 0b1;
+ let Inst{22-20} = 0b010;
+ let Inst{15-12} = 0b1111;
+ let Inst{7} = 0b1;
+ let Inst{6-4} = 0b000;
+}
+
+// A6.3.13, A6.3.14, A6.3.15 Parallel addition and subtraction (signed/unsigned)
+// And Miscellaneous operations -- for disassembly only
+class T2I_pam<bits<3> op22_20, bits<4> op7_4, string opc,
+ list<dag> pat = [/* For disassembly only; pattern left blank */],
+ dag iops = (ins rGPR:$Rn, rGPR:$Rm),
+ string asm = "\t$Rd, $Rn, $Rm">
+ : T2I<(outs rGPR:$Rd), iops, NoItinerary, opc, asm, pat>,
+ Requires<[IsThumb2, HasDSP]> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0101;
+ let Inst{22-20} = op22_20;
+ let Inst{15-12} = 0b1111;
+ let Inst{7-4} = op7_4;
+
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<4> Rm;
+
+ let Inst{11-8} = Rd;
+ let Inst{19-16} = Rn;
+ let Inst{3-0} = Rm;
+}
+
+// Saturating add/subtract -- for disassembly only
+
+def t2QADD : T2I_pam<0b000, 0b1000, "qadd",
+ [(set rGPR:$Rd, (int_arm_qadd rGPR:$Rn, rGPR:$Rm))],
+ (ins rGPR:$Rm, rGPR:$Rn), "\t$Rd, $Rm, $Rn">;
+def t2QADD16 : T2I_pam<0b001, 0b0001, "qadd16">;
+def t2QADD8 : T2I_pam<0b000, 0b0001, "qadd8">;
+def t2QASX : T2I_pam<0b010, 0b0001, "qasx">;
+def t2QDADD : T2I_pam<0b000, 0b1001, "qdadd", [],
+ (ins rGPR:$Rm, rGPR:$Rn), "\t$Rd, $Rm, $Rn">;
+def t2QDSUB : T2I_pam<0b000, 0b1011, "qdsub", [],
+ (ins rGPR:$Rm, rGPR:$Rn), "\t$Rd, $Rm, $Rn">;
+def t2QSAX : T2I_pam<0b110, 0b0001, "qsax">;
+def t2QSUB : T2I_pam<0b000, 0b1010, "qsub",
+ [(set rGPR:$Rd, (int_arm_qsub rGPR:$Rn, rGPR:$Rm))],
+ (ins rGPR:$Rm, rGPR:$Rn), "\t$Rd, $Rm, $Rn">;
+def t2QSUB16 : T2I_pam<0b101, 0b0001, "qsub16">;
+def t2QSUB8 : T2I_pam<0b100, 0b0001, "qsub8">;
+def t2UQADD16 : T2I_pam<0b001, 0b0101, "uqadd16">;
+def t2UQADD8 : T2I_pam<0b000, 0b0101, "uqadd8">;
+def t2UQASX : T2I_pam<0b010, 0b0101, "uqasx">;
+def t2UQSAX : T2I_pam<0b110, 0b0101, "uqsax">;
+def t2UQSUB16 : T2I_pam<0b101, 0b0101, "uqsub16">;
+def t2UQSUB8 : T2I_pam<0b100, 0b0101, "uqsub8">;
+
+// Signed/Unsigned add/subtract -- for disassembly only
+
+def t2SASX : T2I_pam<0b010, 0b0000, "sasx">;
+def t2SADD16 : T2I_pam<0b001, 0b0000, "sadd16">;
+def t2SADD8 : T2I_pam<0b000, 0b0000, "sadd8">;
+def t2SSAX : T2I_pam<0b110, 0b0000, "ssax">;
+def t2SSUB16 : T2I_pam<0b101, 0b0000, "ssub16">;
+def t2SSUB8 : T2I_pam<0b100, 0b0000, "ssub8">;
+def t2UASX : T2I_pam<0b010, 0b0100, "uasx">;
+def t2UADD16 : T2I_pam<0b001, 0b0100, "uadd16">;
+def t2UADD8 : T2I_pam<0b000, 0b0100, "uadd8">;
+def t2USAX : T2I_pam<0b110, 0b0100, "usax">;
+def t2USUB16 : T2I_pam<0b101, 0b0100, "usub16">;
+def t2USUB8 : T2I_pam<0b100, 0b0100, "usub8">;
+
+// Signed/Unsigned halving add/subtract -- for disassembly only
+
+def t2SHASX : T2I_pam<0b010, 0b0010, "shasx">;
+def t2SHADD16 : T2I_pam<0b001, 0b0010, "shadd16">;
+def t2SHADD8 : T2I_pam<0b000, 0b0010, "shadd8">;
+def t2SHSAX : T2I_pam<0b110, 0b0010, "shsax">;
+def t2SHSUB16 : T2I_pam<0b101, 0b0010, "shsub16">;
+def t2SHSUB8 : T2I_pam<0b100, 0b0010, "shsub8">;
+def t2UHASX : T2I_pam<0b010, 0b0110, "uhasx">;
+def t2UHADD16 : T2I_pam<0b001, 0b0110, "uhadd16">;
+def t2UHADD8 : T2I_pam<0b000, 0b0110, "uhadd8">;
+def t2UHSAX : T2I_pam<0b110, 0b0110, "uhsax">;
+def t2UHSUB16 : T2I_pam<0b101, 0b0110, "uhsub16">;
+def t2UHSUB8 : T2I_pam<0b100, 0b0110, "uhsub8">;
+
+// Helper class for disassembly only
+// A6.3.16 & A6.3.17
+// T2Imac - Thumb2 multiply [accumulate, and absolute difference] instructions.
+class T2ThreeReg_mac<bit long, bits<3> op22_20, bits<4> op7_4, dag oops,
+ dag iops, InstrItinClass itin, string opc, string asm, list<dag> pattern>
+ : T2ThreeReg<oops, iops, itin, opc, asm, pattern> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-24} = 0b011;
+ let Inst{23} = long;
+ let Inst{22-20} = op22_20;
+ let Inst{7-4} = op7_4;
+}
+
+class T2FourReg_mac<bit long, bits<3> op22_20, bits<4> op7_4, dag oops,
+ dag iops, InstrItinClass itin, string opc, string asm, list<dag> pattern>
+ : T2FourReg<oops, iops, itin, opc, asm, pattern> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-24} = 0b011;
+ let Inst{23} = long;
+ let Inst{22-20} = op22_20;
+ let Inst{7-4} = op7_4;
+}
+
+// Unsigned Sum of Absolute Differences [and Accumulate].
+def t2USAD8 : T2ThreeReg_mac<0, 0b111, 0b0000, (outs rGPR:$Rd),
+ (ins rGPR:$Rn, rGPR:$Rm),
+ NoItinerary, "usad8", "\t$Rd, $Rn, $Rm", []>,
+ Requires<[IsThumb2, HasDSP]> {
+ let Inst{15-12} = 0b1111;
+}
+def t2USADA8 : T2FourReg_mac<0, 0b111, 0b0000, (outs rGPR:$Rd),
+ (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), NoItinerary,
+ "usada8", "\t$Rd, $Rn, $Rm, $Ra", []>,
+ Requires<[IsThumb2, HasDSP]>;
+
+// Signed/Unsigned saturate.
+class T2SatI<dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T2I<oops, iops, itin, opc, asm, pattern> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<5> sat_imm;
+ bits<7> sh;
+
+ let Inst{11-8} = Rd;
+ let Inst{19-16} = Rn;
+ let Inst{4-0} = sat_imm;
+ let Inst{21} = sh{5};
+ let Inst{14-12} = sh{4-2};
+ let Inst{7-6} = sh{1-0};
+}
+
+def t2SSAT: T2SatI<
+ (outs rGPR:$Rd),
+ (ins imm1_32:$sat_imm, rGPR:$Rn, t2_shift_imm:$sh),
+ NoItinerary, "ssat", "\t$Rd, $sat_imm, $Rn$sh", []> {
+ let Inst{31-27} = 0b11110;
+ let Inst{25-22} = 0b1100;
+ let Inst{20} = 0;
+ let Inst{15} = 0;
+ let Inst{5} = 0;
+}
+
+def t2SSAT16: T2SatI<
+ (outs rGPR:$Rd), (ins imm1_16:$sat_imm, rGPR:$Rn), NoItinerary,
+ "ssat16", "\t$Rd, $sat_imm, $Rn", []>,
+ Requires<[IsThumb2, HasDSP]> {
+ let Inst{31-27} = 0b11110;
+ let Inst{25-22} = 0b1100;
+ let Inst{20} = 0;
+ let Inst{15} = 0;
+ let Inst{21} = 1; // sh = '1'
+ let Inst{14-12} = 0b000; // imm3 = '000'
+ let Inst{7-6} = 0b00; // imm2 = '00'
+ let Inst{5-4} = 0b00;
+}
+
+def t2USAT: T2SatI<
+ (outs rGPR:$Rd),
+ (ins imm0_31:$sat_imm, rGPR:$Rn, t2_shift_imm:$sh),
+ NoItinerary, "usat", "\t$Rd, $sat_imm, $Rn$sh", []> {
+ let Inst{31-27} = 0b11110;
+ let Inst{25-22} = 0b1110;
+ let Inst{20} = 0;
+ let Inst{15} = 0;
+}
+
+def t2USAT16: T2SatI<(outs rGPR:$Rd), (ins imm0_15:$sat_imm, rGPR:$Rn),
+ NoItinerary,
+ "usat16", "\t$Rd, $sat_imm, $Rn", []>,
+ Requires<[IsThumb2, HasDSP]> {
+ let Inst{31-22} = 0b1111001110;
+ let Inst{20} = 0;
+ let Inst{15} = 0;
+ let Inst{21} = 1; // sh = '1'
+ let Inst{14-12} = 0b000; // imm3 = '000'
+ let Inst{7-6} = 0b00; // imm2 = '00'
+ let Inst{5-4} = 0b00;
+}
+
+def : T2Pat<(int_arm_ssat GPR:$a, imm1_32:$pos), (t2SSAT imm1_32:$pos, GPR:$a, 0)>;
+def : T2Pat<(int_arm_usat GPR:$a, imm0_31:$pos), (t2USAT imm0_31:$pos, GPR:$a, 0)>;
+
+//===----------------------------------------------------------------------===//
+// Shift and rotate Instructions.
+//
+
+defm t2LSL : T2I_sh_ir<0b00, "lsl", imm0_31,
+ BinOpFrag<(shl node:$LHS, node:$RHS)>>;
+defm t2LSR : T2I_sh_ir<0b01, "lsr", imm_sr,
+ BinOpFrag<(srl node:$LHS, node:$RHS)>>;
+defm t2ASR : T2I_sh_ir<0b10, "asr", imm_sr,
+ BinOpFrag<(sra node:$LHS, node:$RHS)>>;
+defm t2ROR : T2I_sh_ir<0b11, "ror", imm0_31,
+ BinOpFrag<(rotr node:$LHS, node:$RHS)>>;
+
+// (rotr x, (and y, 0x...1f)) ==> (ROR x, y)
+def : T2Pat<(rotr rGPR:$lhs, (and rGPR:$rhs, lo5AllOne)),
+ (t2RORrr rGPR:$lhs, rGPR:$rhs)>;
+
+let Uses = [CPSR] in {
+def t2RRX : T2sTwoReg<(outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iMOVsi,
+ "rrx", "\t$Rd, $Rm",
+ [(set rGPR:$Rd, (ARMrrx rGPR:$Rm))]>, Sched<[WriteALU]> {
+ let Inst{31-27} = 0b11101;
+ let Inst{26-25} = 0b01;
+ let Inst{24-21} = 0b0010;
+ let Inst{19-16} = 0b1111; // Rn
+ let Inst{14-12} = 0b000;
+ let Inst{7-4} = 0b0011;
+}
+}
+
+let isCodeGenOnly = 1, Defs = [CPSR] in {
+def t2MOVsrl_flag : T2TwoRegShiftImm<
+ (outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iMOVsi,
+ "lsrs", ".w\t$Rd, $Rm, #1",
+ [(set rGPR:$Rd, (ARMsrl_flag rGPR:$Rm))]>,
+ Sched<[WriteALU]> {
+ let Inst{31-27} = 0b11101;
+ let Inst{26-25} = 0b01;
+ let Inst{24-21} = 0b0010;
+ let Inst{20} = 1; // The S bit.
+ let Inst{19-16} = 0b1111; // Rn
+ let Inst{5-4} = 0b01; // Shift type.
+ // Shift amount = Inst{14-12:7-6} = 1.
+ let Inst{14-12} = 0b000;
+ let Inst{7-6} = 0b01;
+}
+def t2MOVsra_flag : T2TwoRegShiftImm<
+ (outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iMOVsi,
+ "asrs", ".w\t$Rd, $Rm, #1",
+ [(set rGPR:$Rd, (ARMsra_flag rGPR:$Rm))]>,
+ Sched<[WriteALU]> {
+ let Inst{31-27} = 0b11101;
+ let Inst{26-25} = 0b01;
+ let Inst{24-21} = 0b0010;
+ let Inst{20} = 1; // The S bit.
+ let Inst{19-16} = 0b1111; // Rn
+ let Inst{5-4} = 0b10; // Shift type.
+ // Shift amount = Inst{14-12:7-6} = 1.
+ let Inst{14-12} = 0b000;
+ let Inst{7-6} = 0b01;
+}
+}
+
+//===----------------------------------------------------------------------===//
+// Bitwise Instructions.
+//
+
+defm t2AND : T2I_bin_w_irs<0b0000, "and",
+ IIC_iBITi, IIC_iBITr, IIC_iBITsi,
+ BinOpFrag<(and node:$LHS, node:$RHS)>, 1>;
+defm t2ORR : T2I_bin_w_irs<0b0010, "orr",
+ IIC_iBITi, IIC_iBITr, IIC_iBITsi,
+ BinOpFrag<(or node:$LHS, node:$RHS)>, 1>;
+defm t2EOR : T2I_bin_w_irs<0b0100, "eor",
+ IIC_iBITi, IIC_iBITr, IIC_iBITsi,
+ BinOpFrag<(xor node:$LHS, node:$RHS)>, 1>;
+
+defm t2BIC : T2I_bin_w_irs<0b0001, "bic",
+ IIC_iBITi, IIC_iBITr, IIC_iBITsi,
+ BinOpFrag<(and node:$LHS, (not node:$RHS))>>;
+
+class T2BitFI<dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T2I<oops, iops, itin, opc, asm, pattern> {
+ bits<4> Rd;
+ bits<5> msb;
+ bits<5> lsb;
+
+ let Inst{11-8} = Rd;
+ let Inst{4-0} = msb{4-0};
+ let Inst{14-12} = lsb{4-2};
+ let Inst{7-6} = lsb{1-0};
+}
+
+class T2TwoRegBitFI<dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T2BitFI<oops, iops, itin, opc, asm, pattern> {
+ bits<4> Rn;
+
+ let Inst{19-16} = Rn;
+}
+
+let Constraints = "$src = $Rd" in
+def t2BFC : T2BitFI<(outs rGPR:$Rd), (ins rGPR:$src, bf_inv_mask_imm:$imm),
+ IIC_iUNAsi, "bfc", "\t$Rd, $imm",
+ [(set rGPR:$Rd, (and rGPR:$src, bf_inv_mask_imm:$imm))]> {
+ let Inst{31-27} = 0b11110;
+ let Inst{26} = 0; // should be 0.
+ let Inst{25} = 1;
+ let Inst{24-20} = 0b10110;
+ let Inst{19-16} = 0b1111; // Rn
+ let Inst{15} = 0;
+ let Inst{5} = 0; // should be 0.
+
+ bits<10> imm;
+ let msb{4-0} = imm{9-5};
+ let lsb{4-0} = imm{4-0};
+}
+
+def t2SBFX: T2TwoRegBitFI<
+ (outs rGPR:$Rd), (ins rGPR:$Rn, imm0_31:$lsb, imm1_32:$msb),
+ IIC_iUNAsi, "sbfx", "\t$Rd, $Rn, $lsb, $msb", []> {
+ let Inst{31-27} = 0b11110;
+ let Inst{25} = 1;
+ let Inst{24-20} = 0b10100;
+ let Inst{15} = 0;
+}
+
+def t2UBFX: T2TwoRegBitFI<
+ (outs rGPR:$Rd), (ins rGPR:$Rn, imm0_31:$lsb, imm1_32:$msb),
+ IIC_iUNAsi, "ubfx", "\t$Rd, $Rn, $lsb, $msb", []> {
+ let Inst{31-27} = 0b11110;
+ let Inst{25} = 1;
+ let Inst{24-20} = 0b11100;
+ let Inst{15} = 0;
+}
+
+// A8.8.247 UDF - Undefined (Encoding T2)
+def t2UDF : T2XI<(outs), (ins imm0_65535:$imm16), IIC_Br, "udf.w\t$imm16",
+ [(int_arm_undefined imm0_65535:$imm16)]> {
+ bits<16> imm16;
+ let Inst{31-29} = 0b111;
+ let Inst{28-27} = 0b10;
+ let Inst{26-20} = 0b1111111;
+ let Inst{19-16} = imm16{15-12};
+ let Inst{15} = 0b1;
+ let Inst{14-12} = 0b010;
+ let Inst{11-0} = imm16{11-0};
+}
+
+// A8.6.18 BFI - Bitfield insert (Encoding T1)
+let Constraints = "$src = $Rd" in {
+ def t2BFI : T2TwoRegBitFI<(outs rGPR:$Rd),
+ (ins rGPR:$src, rGPR:$Rn, bf_inv_mask_imm:$imm),
+ IIC_iBITi, "bfi", "\t$Rd, $Rn, $imm",
+ [(set rGPR:$Rd, (ARMbfi rGPR:$src, rGPR:$Rn,
+ bf_inv_mask_imm:$imm))]> {
+ let Inst{31-27} = 0b11110;
+ let Inst{26} = 0; // should be 0.
+ let Inst{25} = 1;
+ let Inst{24-20} = 0b10110;
+ let Inst{15} = 0;
+ let Inst{5} = 0; // should be 0.
+
+ bits<10> imm;
+ let msb{4-0} = imm{9-5};
+ let lsb{4-0} = imm{4-0};
+ }
+}
+
+defm t2ORN : T2I_bin_irs<0b0011, "orn",
+ IIC_iBITi, IIC_iBITr, IIC_iBITsi,
+ BinOpFrag<(or node:$LHS, (not node:$RHS))>, 0, "">;
+
+/// T2I_un_irs - Defines a set of (op reg, {so_imm|r|so_reg}) patterns for a
+/// unary operation that produces a value. These are predicable and can be
+/// changed to modify CPSR.
+multiclass T2I_un_irs<bits<4> opcod, string opc,
+ InstrItinClass iii, InstrItinClass iir, InstrItinClass iis,
+ PatFrag opnode,
+ bit Cheap = 0, bit ReMat = 0, bit MoveImm = 0> {
+ // shifted imm
+ def i : T2sOneRegImm<(outs rGPR:$Rd), (ins t2_so_imm:$imm), iii,
+ opc, "\t$Rd, $imm",
+ [(set rGPR:$Rd, (opnode t2_so_imm:$imm))]>, Sched<[WriteALU]> {
+ let isAsCheapAsAMove = Cheap;
+ let isReMaterializable = ReMat;
+ let isMoveImm = MoveImm;
+ let Inst{31-27} = 0b11110;
+ let Inst{25} = 0;
+ let Inst{24-21} = opcod;
+ let Inst{19-16} = 0b1111; // Rn
+ let Inst{15} = 0;
+ }
+ // register
+ def r : T2sTwoReg<(outs rGPR:$Rd), (ins rGPR:$Rm), iir,
+ opc, ".w\t$Rd, $Rm",
+ [(set rGPR:$Rd, (opnode rGPR:$Rm))]>, Sched<[WriteALU]> {
+ let Inst{31-27} = 0b11101;
+ let Inst{26-25} = 0b01;
+ let Inst{24-21} = opcod;
+ let Inst{19-16} = 0b1111; // Rn
+ let Inst{14-12} = 0b000; // imm3
+ let Inst{7-6} = 0b00; // imm2
+ let Inst{5-4} = 0b00; // type
+ }
+ // shifted register
+ def s : T2sOneRegShiftedReg<(outs rGPR:$Rd), (ins t2_so_reg:$ShiftedRm), iis,
+ opc, ".w\t$Rd, $ShiftedRm",
+ [(set rGPR:$Rd, (opnode t2_so_reg:$ShiftedRm))]>,
+ Sched<[WriteALU]> {
+ let Inst{31-27} = 0b11101;
+ let Inst{26-25} = 0b01;
+ let Inst{24-21} = opcod;
+ let Inst{19-16} = 0b1111; // Rn
+ }
+}
+
+// Prefer over of t2EORri ra, rb, -1 because mvn has 16-bit version
+let AddedComplexity = 1 in
+defm t2MVN : T2I_un_irs <0b0011, "mvn",
+ IIC_iMVNi, IIC_iMVNr, IIC_iMVNsi,
+ UnOpFrag<(not node:$Src)>, 1, 1, 1>;
+
+let AddedComplexity = 1 in
+def : T2Pat<(and rGPR:$src, t2_so_imm_not:$imm),
+ (t2BICri rGPR:$src, t2_so_imm_not:$imm)>;
+
+// top16Zero - answer true if the upper 16 bits of $src are 0, false otherwise
+def top16Zero: PatLeaf<(i32 rGPR:$src), [{
+ return CurDAG->MaskedValueIsZero(SDValue(N,0), APInt::getHighBitsSet(32, 16));
+ }]>;
+
+// so_imm_notSext is needed instead of so_imm_not, as the value of imm
+// will match the extended, not the original bitWidth for $src.
+def : T2Pat<(and top16Zero:$src, t2_so_imm_notSext:$imm),
+ (t2BICri rGPR:$src, t2_so_imm_notSext:$imm)>;
+
+
+// FIXME: Disable this pattern on Darwin to workaround an assembler bug.
+def : T2Pat<(or rGPR:$src, t2_so_imm_not:$imm),
+ (t2ORNri rGPR:$src, t2_so_imm_not:$imm)>,
+ Requires<[IsThumb2]>;
+
+def : T2Pat<(t2_so_imm_not:$src),
+ (t2MVNi t2_so_imm_not:$src)>;
+
+//===----------------------------------------------------------------------===//
+// Multiply Instructions.
+//
+let isCommutable = 1 in
+def t2MUL: T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL32,
+ "mul", "\t$Rd, $Rn, $Rm",
+ [(set rGPR:$Rd, (mul rGPR:$Rn, rGPR:$Rm))]> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0110;
+ let Inst{22-20} = 0b000;
+ let Inst{15-12} = 0b1111; // Ra = 0b1111 (no accumulate)
+ let Inst{7-4} = 0b0000; // Multiply
+}
+
+def t2MLA: T2FourReg<
+ (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32,
+ "mla", "\t$Rd, $Rn, $Rm, $Ra",
+ [(set rGPR:$Rd, (add (mul rGPR:$Rn, rGPR:$Rm), rGPR:$Ra))]>,
+ Requires<[IsThumb2, UseMulOps]> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0110;
+ let Inst{22-20} = 0b000;
+ let Inst{7-4} = 0b0000; // Multiply
+}
+
+def t2MLS: T2FourReg<
+ (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32,
+ "mls", "\t$Rd, $Rn, $Rm, $Ra",
+ [(set rGPR:$Rd, (sub rGPR:$Ra, (mul rGPR:$Rn, rGPR:$Rm)))]>,
+ Requires<[IsThumb2, UseMulOps]> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0110;
+ let Inst{22-20} = 0b000;
+ let Inst{7-4} = 0b0001; // Multiply and Subtract
+}
+
+// Extra precision multiplies with low / high results
+let hasSideEffects = 0 in {
+let isCommutable = 1 in {
+def t2SMULL : T2MulLong<0b000, 0b0000,
+ (outs rGPR:$RdLo, rGPR:$RdHi),
+ (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL64,
+ "smull", "\t$RdLo, $RdHi, $Rn, $Rm", []>;
+
+def t2UMULL : T2MulLong<0b010, 0b0000,
+ (outs rGPR:$RdLo, rGPR:$RdHi),
+ (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL64,
+ "umull", "\t$RdLo, $RdHi, $Rn, $Rm", []>;
+} // isCommutable
+
+// Multiply + accumulate
+def t2SMLAL : T2MlaLong<0b100, 0b0000,
+ (outs rGPR:$RdLo, rGPR:$RdHi),
+ (ins rGPR:$Rn, rGPR:$Rm, rGPR:$RLo, rGPR:$RHi), IIC_iMAC64,
+ "smlal", "\t$RdLo, $RdHi, $Rn, $Rm", []>,
+ RegConstraint<"$RLo = $RdLo, $RHi = $RdHi">;
+
+def t2UMLAL : T2MlaLong<0b110, 0b0000,
+ (outs rGPR:$RdLo, rGPR:$RdHi),
+ (ins rGPR:$Rn, rGPR:$Rm, rGPR:$RLo, rGPR:$RHi), IIC_iMAC64,
+ "umlal", "\t$RdLo, $RdHi, $Rn, $Rm", []>,
+ RegConstraint<"$RLo = $RdLo, $RHi = $RdHi">;
+
+def t2UMAAL : T2MulLong<0b110, 0b0110,
+ (outs rGPR:$RdLo, rGPR:$RdHi),
+ (ins rGPR:$Rn, rGPR:$Rm), IIC_iMAC64,
+ "umaal", "\t$RdLo, $RdHi, $Rn, $Rm", []>,
+ Requires<[IsThumb2, HasDSP]>;
+} // hasSideEffects
+
+// Rounding variants of the below included for disassembly only
+
+// Most significant word multiply
+def t2SMMUL : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL32,
+ "smmul", "\t$Rd, $Rn, $Rm",
+ [(set rGPR:$Rd, (mulhs rGPR:$Rn, rGPR:$Rm))]>,
+ Requires<[IsThumb2, HasDSP]> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0110;
+ let Inst{22-20} = 0b101;
+ let Inst{15-12} = 0b1111; // Ra = 0b1111 (no accumulate)
+ let Inst{7-4} = 0b0000; // No Rounding (Inst{4} = 0)
+}
+
+def t2SMMULR : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL32,
+ "smmulr", "\t$Rd, $Rn, $Rm", []>,
+ Requires<[IsThumb2, HasDSP]> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0110;
+ let Inst{22-20} = 0b101;
+ let Inst{15-12} = 0b1111; // Ra = 0b1111 (no accumulate)
+ let Inst{7-4} = 0b0001; // Rounding (Inst{4} = 1)
+}
+
+def t2SMMLA : T2FourReg<
+ (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32,
+ "smmla", "\t$Rd, $Rn, $Rm, $Ra",
+ [(set rGPR:$Rd, (add (mulhs rGPR:$Rm, rGPR:$Rn), rGPR:$Ra))]>,
+ Requires<[IsThumb2, HasDSP, UseMulOps]> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0110;
+ let Inst{22-20} = 0b101;
+ let Inst{7-4} = 0b0000; // No Rounding (Inst{4} = 0)
+}
+
+def t2SMMLAR: T2FourReg<
+ (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32,
+ "smmlar", "\t$Rd, $Rn, $Rm, $Ra", []>,
+ Requires<[IsThumb2, HasDSP]> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0110;
+ let Inst{22-20} = 0b101;
+ let Inst{7-4} = 0b0001; // Rounding (Inst{4} = 1)
+}
+
+def t2SMMLS: T2FourReg<
+ (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32,
+ "smmls", "\t$Rd, $Rn, $Rm, $Ra",
+ [(set rGPR:$Rd, (sub rGPR:$Ra, (mulhs rGPR:$Rn, rGPR:$Rm)))]>,
+ Requires<[IsThumb2, HasDSP, UseMulOps]> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0110;
+ let Inst{22-20} = 0b110;
+ let Inst{7-4} = 0b0000; // No Rounding (Inst{4} = 0)
+}
+
+def t2SMMLSR:T2FourReg<
+ (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32,
+ "smmlsr", "\t$Rd, $Rn, $Rm, $Ra", []>,
+ Requires<[IsThumb2, HasDSP]> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0110;
+ let Inst{22-20} = 0b110;
+ let Inst{7-4} = 0b0001; // Rounding (Inst{4} = 1)
+}
+
+multiclass T2I_smul<string opc, PatFrag opnode> {
+ def BB : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL16,
+ !strconcat(opc, "bb"), "\t$Rd, $Rn, $Rm",
+ [(set rGPR:$Rd, (opnode (sext_inreg rGPR:$Rn, i16),
+ (sext_inreg rGPR:$Rm, i16)))]>,
+ Requires<[IsThumb2, HasDSP]> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0110;
+ let Inst{22-20} = 0b001;
+ let Inst{15-12} = 0b1111; // Ra = 0b1111 (no accumulate)
+ let Inst{7-6} = 0b00;
+ let Inst{5-4} = 0b00;
+ }
+
+ def BT : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL16,
+ !strconcat(opc, "bt"), "\t$Rd, $Rn, $Rm",
+ [(set rGPR:$Rd, (opnode (sext_inreg rGPR:$Rn, i16),
+ (sra rGPR:$Rm, (i32 16))))]>,
+ Requires<[IsThumb2, HasDSP]> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0110;
+ let Inst{22-20} = 0b001;
+ let Inst{15-12} = 0b1111; // Ra = 0b1111 (no accumulate)
+ let Inst{7-6} = 0b00;
+ let Inst{5-4} = 0b01;
+ }
+
+ def TB : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL16,
+ !strconcat(opc, "tb"), "\t$Rd, $Rn, $Rm",
+ [(set rGPR:$Rd, (opnode (sra rGPR:$Rn, (i32 16)),
+ (sext_inreg rGPR:$Rm, i16)))]>,
+ Requires<[IsThumb2, HasDSP]> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0110;
+ let Inst{22-20} = 0b001;
+ let Inst{15-12} = 0b1111; // Ra = 0b1111 (no accumulate)
+ let Inst{7-6} = 0b00;
+ let Inst{5-4} = 0b10;
+ }
+
+ def TT : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL16,
+ !strconcat(opc, "tt"), "\t$Rd, $Rn, $Rm",
+ [(set rGPR:$Rd, (opnode (sra rGPR:$Rn, (i32 16)),
+ (sra rGPR:$Rm, (i32 16))))]>,
+ Requires<[IsThumb2, HasDSP]> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0110;
+ let Inst{22-20} = 0b001;
+ let Inst{15-12} = 0b1111; // Ra = 0b1111 (no accumulate)
+ let Inst{7-6} = 0b00;
+ let Inst{5-4} = 0b11;
+ }
+
+ def WB : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL16,
+ !strconcat(opc, "wb"), "\t$Rd, $Rn, $Rm",
+ []>,
+ Requires<[IsThumb2, HasDSP]> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0110;
+ let Inst{22-20} = 0b011;
+ let Inst{15-12} = 0b1111; // Ra = 0b1111 (no accumulate)
+ let Inst{7-6} = 0b00;
+ let Inst{5-4} = 0b00;
+ }
+
+ def WT : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL16,
+ !strconcat(opc, "wt"), "\t$Rd, $Rn, $Rm",
+ []>,
+ Requires<[IsThumb2, HasDSP]> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0110;
+ let Inst{22-20} = 0b011;
+ let Inst{15-12} = 0b1111; // Ra = 0b1111 (no accumulate)
+ let Inst{7-6} = 0b00;
+ let Inst{5-4} = 0b01;
+ }
+}
+
+
+multiclass T2I_smla<string opc, PatFrag opnode> {
+ def BB : T2FourReg<
+ (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC16,
+ !strconcat(opc, "bb"), "\t$Rd, $Rn, $Rm, $Ra",
+ [(set rGPR:$Rd, (add rGPR:$Ra,
+ (opnode (sext_inreg rGPR:$Rn, i16),
+ (sext_inreg rGPR:$Rm, i16))))]>,
+ Requires<[IsThumb2, HasDSP, UseMulOps]> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0110;
+ let Inst{22-20} = 0b001;
+ let Inst{7-6} = 0b00;
+ let Inst{5-4} = 0b00;
+ }
+
+ def BT : T2FourReg<
+ (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC16,
+ !strconcat(opc, "bt"), "\t$Rd, $Rn, $Rm, $Ra",
+ [(set rGPR:$Rd, (add rGPR:$Ra, (opnode (sext_inreg rGPR:$Rn, i16),
+ (sra rGPR:$Rm, (i32 16)))))]>,
+ Requires<[IsThumb2, HasDSP, UseMulOps]> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0110;
+ let Inst{22-20} = 0b001;
+ let Inst{7-6} = 0b00;
+ let Inst{5-4} = 0b01;
+ }
+
+ def TB : T2FourReg<
+ (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC16,
+ !strconcat(opc, "tb"), "\t$Rd, $Rn, $Rm, $Ra",
+ [(set rGPR:$Rd, (add rGPR:$Ra, (opnode (sra rGPR:$Rn, (i32 16)),
+ (sext_inreg rGPR:$Rm, i16))))]>,
+ Requires<[IsThumb2, HasDSP, UseMulOps]> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0110;
+ let Inst{22-20} = 0b001;
+ let Inst{7-6} = 0b00;
+ let Inst{5-4} = 0b10;
+ }
+
+ def TT : T2FourReg<
+ (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC16,
+ !strconcat(opc, "tt"), "\t$Rd, $Rn, $Rm, $Ra",
+ [(set rGPR:$Rd, (add rGPR:$Ra, (opnode (sra rGPR:$Rn, (i32 16)),
+ (sra rGPR:$Rm, (i32 16)))))]>,
+ Requires<[IsThumb2, HasDSP, UseMulOps]> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0110;
+ let Inst{22-20} = 0b001;
+ let Inst{7-6} = 0b00;
+ let Inst{5-4} = 0b11;
+ }
+
+ def WB : T2FourReg<
+ (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC16,
+ !strconcat(opc, "wb"), "\t$Rd, $Rn, $Rm, $Ra",
+ []>,
+ Requires<[IsThumb2, HasDSP, UseMulOps]> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0110;
+ let Inst{22-20} = 0b011;
+ let Inst{7-6} = 0b00;
+ let Inst{5-4} = 0b00;
+ }
+
+ def WT : T2FourReg<
+ (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC16,
+ !strconcat(opc, "wt"), "\t$Rd, $Rn, $Rm, $Ra",
+ []>,
+ Requires<[IsThumb2, HasDSP, UseMulOps]> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0110;
+ let Inst{22-20} = 0b011;
+ let Inst{7-6} = 0b00;
+ let Inst{5-4} = 0b01;
+ }
+}
+
+defm t2SMUL : T2I_smul<"smul", BinOpFrag<(mul node:$LHS, node:$RHS)>>;
+defm t2SMLA : T2I_smla<"smla", BinOpFrag<(mul node:$LHS, node:$RHS)>>;
+
+// Halfword multiple accumulate long: SMLAL<x><y>
+def t2SMLALBB : T2FourReg_mac<1, 0b100, 0b1000, (outs rGPR:$Ra,rGPR:$Rd),
+ (ins rGPR:$Rn,rGPR:$Rm), IIC_iMAC64, "smlalbb", "\t$Ra, $Rd, $Rn, $Rm",
+ [/* For disassembly only; pattern left blank */]>,
+ Requires<[IsThumb2, HasDSP]>;
+def t2SMLALBT : T2FourReg_mac<1, 0b100, 0b1001, (outs rGPR:$Ra,rGPR:$Rd),
+ (ins rGPR:$Rn,rGPR:$Rm), IIC_iMAC64, "smlalbt", "\t$Ra, $Rd, $Rn, $Rm",
+ [/* For disassembly only; pattern left blank */]>,
+ Requires<[IsThumb2, HasDSP]>;
+def t2SMLALTB : T2FourReg_mac<1, 0b100, 0b1010, (outs rGPR:$Ra,rGPR:$Rd),
+ (ins rGPR:$Rn,rGPR:$Rm), IIC_iMAC64, "smlaltb", "\t$Ra, $Rd, $Rn, $Rm",
+ [/* For disassembly only; pattern left blank */]>,
+ Requires<[IsThumb2, HasDSP]>;
+def t2SMLALTT : T2FourReg_mac<1, 0b100, 0b1011, (outs rGPR:$Ra,rGPR:$Rd),
+ (ins rGPR:$Rn,rGPR:$Rm), IIC_iMAC64, "smlaltt", "\t$Ra, $Rd, $Rn, $Rm",
+ [/* For disassembly only; pattern left blank */]>,
+ Requires<[IsThumb2, HasDSP]>;
+
+// Dual halfword multiple: SMUAD, SMUSD, SMLAD, SMLSD, SMLALD, SMLSLD
+def t2SMUAD: T2ThreeReg_mac<
+ 0, 0b010, 0b0000, (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm),
+ IIC_iMAC32, "smuad", "\t$Rd, $Rn, $Rm", []>,
+ Requires<[IsThumb2, HasDSP]> {
+ let Inst{15-12} = 0b1111;
+}
+def t2SMUADX:T2ThreeReg_mac<
+ 0, 0b010, 0b0001, (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm),
+ IIC_iMAC32, "smuadx", "\t$Rd, $Rn, $Rm", []>,
+ Requires<[IsThumb2, HasDSP]> {
+ let Inst{15-12} = 0b1111;
+}
+def t2SMUSD: T2ThreeReg_mac<
+ 0, 0b100, 0b0000, (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm),
+ IIC_iMAC32, "smusd", "\t$Rd, $Rn, $Rm", []>,
+ Requires<[IsThumb2, HasDSP]> {
+ let Inst{15-12} = 0b1111;
+}
+def t2SMUSDX:T2ThreeReg_mac<
+ 0, 0b100, 0b0001, (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm),
+ IIC_iMAC32, "smusdx", "\t$Rd, $Rn, $Rm", []>,
+ Requires<[IsThumb2, HasDSP]> {
+ let Inst{15-12} = 0b1111;
+}
+def t2SMLAD : T2FourReg_mac<
+ 0, 0b010, 0b0000, (outs rGPR:$Rd),
+ (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32, "smlad",
+ "\t$Rd, $Rn, $Rm, $Ra", []>,
+ Requires<[IsThumb2, HasDSP]>;
+def t2SMLADX : T2FourReg_mac<
+ 0, 0b010, 0b0001, (outs rGPR:$Rd),
+ (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32, "smladx",
+ "\t$Rd, $Rn, $Rm, $Ra", []>,
+ Requires<[IsThumb2, HasDSP]>;
+def t2SMLSD : T2FourReg_mac<0, 0b100, 0b0000, (outs rGPR:$Rd),
+ (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32, "smlsd",
+ "\t$Rd, $Rn, $Rm, $Ra", []>,
+ Requires<[IsThumb2, HasDSP]>;
+def t2SMLSDX : T2FourReg_mac<0, 0b100, 0b0001, (outs rGPR:$Rd),
+ (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32, "smlsdx",
+ "\t$Rd, $Rn, $Rm, $Ra", []>,
+ Requires<[IsThumb2, HasDSP]>;
+def t2SMLALD : T2FourReg_mac<1, 0b100, 0b1100, (outs rGPR:$Ra,rGPR:$Rd),
+ (ins rGPR:$Rn, rGPR:$Rm), IIC_iMAC64, "smlald",
+ "\t$Ra, $Rd, $Rn, $Rm", []>,
+ Requires<[IsThumb2, HasDSP]>;
+def t2SMLALDX : T2FourReg_mac<1, 0b100, 0b1101, (outs rGPR:$Ra,rGPR:$Rd),
+ (ins rGPR:$Rn,rGPR:$Rm), IIC_iMAC64, "smlaldx",
+ "\t$Ra, $Rd, $Rn, $Rm", []>,
+ Requires<[IsThumb2, HasDSP]>;
+def t2SMLSLD : T2FourReg_mac<1, 0b101, 0b1100, (outs rGPR:$Ra,rGPR:$Rd),
+ (ins rGPR:$Rn,rGPR:$Rm), IIC_iMAC64, "smlsld",
+ "\t$Ra, $Rd, $Rn, $Rm", []>,
+ Requires<[IsThumb2, HasDSP]>;
+def t2SMLSLDX : T2FourReg_mac<1, 0b101, 0b1101, (outs rGPR:$Ra,rGPR:$Rd),
+ (ins rGPR:$Rm,rGPR:$Rn), IIC_iMAC64, "smlsldx",
+ "\t$Ra, $Rd, $Rn, $Rm", []>,
+ Requires<[IsThumb2, HasDSP]>;
+
+//===----------------------------------------------------------------------===//
+// Division Instructions.
+// Signed and unsigned division on v7-M
+//
+def t2SDIV : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iDIV,
+ "sdiv", "\t$Rd, $Rn, $Rm",
+ [(set rGPR:$Rd, (sdiv rGPR:$Rn, rGPR:$Rm))]>,
+ Requires<[HasDivide, IsThumb2]> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-21} = 0b011100;
+ let Inst{20} = 0b1;
+ let Inst{15-12} = 0b1111;
+ let Inst{7-4} = 0b1111;
+}
+
+def t2UDIV : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iDIV,
+ "udiv", "\t$Rd, $Rn, $Rm",
+ [(set rGPR:$Rd, (udiv rGPR:$Rn, rGPR:$Rm))]>,
+ Requires<[HasDivide, IsThumb2]> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-21} = 0b011101;
+ let Inst{20} = 0b1;
+ let Inst{15-12} = 0b1111;
+ let Inst{7-4} = 0b1111;
+}
+
+//===----------------------------------------------------------------------===//
+// Misc. Arithmetic Instructions.
+//
+
+class T2I_misc<bits<2> op1, bits<2> op2, dag oops, dag iops,
+ InstrItinClass itin, string opc, string asm, list<dag> pattern>
+ : T2ThreeReg<oops, iops, itin, opc, asm, pattern> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-22} = 0b01010;
+ let Inst{21-20} = op1;
+ let Inst{15-12} = 0b1111;
+ let Inst{7-6} = 0b10;
+ let Inst{5-4} = op2;
+ let Rn{3-0} = Rm;
+}
+
+def t2CLZ : T2I_misc<0b11, 0b00, (outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iUNAr,
+ "clz", "\t$Rd, $Rm", [(set rGPR:$Rd, (ctlz rGPR:$Rm))]>,
+ Sched<[WriteALU]>;
+
+def t2RBIT : T2I_misc<0b01, 0b10, (outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iUNAr,
+ "rbit", "\t$Rd, $Rm",
+ [(set rGPR:$Rd, (bitreverse rGPR:$Rm))]>,
+ Sched<[WriteALU]>;
+
+def t2REV : T2I_misc<0b01, 0b00, (outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iUNAr,
+ "rev", ".w\t$Rd, $Rm", [(set rGPR:$Rd, (bswap rGPR:$Rm))]>,
+ Sched<[WriteALU]>;
+
+def t2REV16 : T2I_misc<0b01, 0b01, (outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iUNAr,
+ "rev16", ".w\t$Rd, $Rm",
+ [(set rGPR:$Rd, (rotr (bswap rGPR:$Rm), (i32 16)))]>,
+ Sched<[WriteALU]>;
+
+def t2REVSH : T2I_misc<0b01, 0b11, (outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iUNAr,
+ "revsh", ".w\t$Rd, $Rm",
+ [(set rGPR:$Rd, (sra (bswap rGPR:$Rm), (i32 16)))]>,
+ Sched<[WriteALU]>;
+
+def : T2Pat<(or (sra (shl rGPR:$Rm, (i32 24)), (i32 16)),
+ (and (srl rGPR:$Rm, (i32 8)), 0xFF)),
+ (t2REVSH rGPR:$Rm)>;
+
+def t2PKHBT : T2ThreeReg<
+ (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, pkh_lsl_amt:$sh),
+ IIC_iBITsi, "pkhbt", "\t$Rd, $Rn, $Rm$sh",
+ [(set rGPR:$Rd, (or (and rGPR:$Rn, 0xFFFF),
+ (and (shl rGPR:$Rm, pkh_lsl_amt:$sh),
+ 0xFFFF0000)))]>,
+ Requires<[HasT2ExtractPack, IsThumb2]>,
+ Sched<[WriteALUsi, ReadALU]> {
+ let Inst{31-27} = 0b11101;
+ let Inst{26-25} = 0b01;
+ let Inst{24-20} = 0b01100;
+ let Inst{5} = 0; // BT form
+ let Inst{4} = 0;
+
+ bits<5> sh;
+ let Inst{14-12} = sh{4-2};
+ let Inst{7-6} = sh{1-0};
+}
+
+// Alternate cases for PKHBT where identities eliminate some nodes.
+def : T2Pat<(or (and rGPR:$src1, 0xFFFF), (and rGPR:$src2, 0xFFFF0000)),
+ (t2PKHBT rGPR:$src1, rGPR:$src2, 0)>,
+ Requires<[HasT2ExtractPack, IsThumb2]>;
+def : T2Pat<(or (and rGPR:$src1, 0xFFFF), (shl rGPR:$src2, imm16_31:$sh)),
+ (t2PKHBT rGPR:$src1, rGPR:$src2, imm16_31:$sh)>,
+ Requires<[HasT2ExtractPack, IsThumb2]>;
+
+// Note: Shifts of 1-15 bits will be transformed to srl instead of sra and
+// will match the pattern below.
+def t2PKHTB : T2ThreeReg<
+ (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, pkh_asr_amt:$sh),
+ IIC_iBITsi, "pkhtb", "\t$Rd, $Rn, $Rm$sh",
+ [(set rGPR:$Rd, (or (and rGPR:$Rn, 0xFFFF0000),
+ (and (sra rGPR:$Rm, pkh_asr_amt:$sh),
+ 0xFFFF)))]>,
+ Requires<[HasT2ExtractPack, IsThumb2]>,
+ Sched<[WriteALUsi, ReadALU]> {
+ let Inst{31-27} = 0b11101;
+ let Inst{26-25} = 0b01;
+ let Inst{24-20} = 0b01100;
+ let Inst{5} = 1; // TB form
+ let Inst{4} = 0;
+
+ bits<5> sh;
+ let Inst{14-12} = sh{4-2};
+ let Inst{7-6} = sh{1-0};
+}
+
+// Alternate cases for PKHTB where identities eliminate some nodes. Note that
+// a shift amount of 0 is *not legal* here, it is PKHBT instead.
+// We also can not replace a srl (17..31) by an arithmetic shift we would use in
+// pkhtb src1, src2, asr (17..31).
+def : T2Pat<(or (and rGPR:$src1, 0xFFFF0000), (srl rGPR:$src2, imm16:$sh)),
+ (t2PKHTB rGPR:$src1, rGPR:$src2, imm16:$sh)>,
+ Requires<[HasT2ExtractPack, IsThumb2]>;
+def : T2Pat<(or (and rGPR:$src1, 0xFFFF0000), (sra rGPR:$src2, imm16_31:$sh)),
+ (t2PKHTB rGPR:$src1, rGPR:$src2, imm16_31:$sh)>,
+ Requires<[HasT2ExtractPack, IsThumb2]>;
+def : T2Pat<(or (and rGPR:$src1, 0xFFFF0000),
+ (and (srl rGPR:$src2, imm1_15:$sh), 0xFFFF)),
+ (t2PKHTB rGPR:$src1, rGPR:$src2, imm1_15:$sh)>,
+ Requires<[HasT2ExtractPack, IsThumb2]>;
+
+//===----------------------------------------------------------------------===//
+// CRC32 Instructions
+//
+// Polynomials:
+// + CRC32{B,H,W} 0x04C11DB7
+// + CRC32C{B,H,W} 0x1EDC6F41
+//
+
+class T2I_crc32<bit C, bits<2> sz, string suffix, SDPatternOperator builtin>
+ : T2ThreeRegNoP<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), NoItinerary,
+ !strconcat("crc32", suffix, "\t$Rd, $Rn, $Rm"),
+ [(set rGPR:$Rd, (builtin rGPR:$Rn, rGPR:$Rm))]>,
+ Requires<[IsThumb2, HasV8, HasCRC]> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-21} = 0b010110;
+ let Inst{20} = C;
+ let Inst{15-12} = 0b1111;
+ let Inst{7-6} = 0b10;
+ let Inst{5-4} = sz;
+}
+
+def t2CRC32B : T2I_crc32<0, 0b00, "b", int_arm_crc32b>;
+def t2CRC32CB : T2I_crc32<1, 0b00, "cb", int_arm_crc32cb>;
+def t2CRC32H : T2I_crc32<0, 0b01, "h", int_arm_crc32h>;
+def t2CRC32CH : T2I_crc32<1, 0b01, "ch", int_arm_crc32ch>;
+def t2CRC32W : T2I_crc32<0, 0b10, "w", int_arm_crc32w>;
+def t2CRC32CW : T2I_crc32<1, 0b10, "cw", int_arm_crc32cw>;
+
+//===----------------------------------------------------------------------===//
+// Comparison Instructions...
+//
+defm t2CMP : T2I_cmp_irs<0b1101, "cmp",
+ IIC_iCMPi, IIC_iCMPr, IIC_iCMPsi,
+ BinOpFrag<(ARMcmp node:$LHS, node:$RHS)>>;
+
+def : T2Pat<(ARMcmpZ GPRnopc:$lhs, t2_so_imm:$imm),
+ (t2CMPri GPRnopc:$lhs, t2_so_imm:$imm)>;
+def : T2Pat<(ARMcmpZ GPRnopc:$lhs, rGPR:$rhs),
+ (t2CMPrr GPRnopc:$lhs, rGPR:$rhs)>;
+def : T2Pat<(ARMcmpZ GPRnopc:$lhs, t2_so_reg:$rhs),
+ (t2CMPrs GPRnopc:$lhs, t2_so_reg:$rhs)>;
+
+let isCompare = 1, Defs = [CPSR] in {
+ // shifted imm
+ def t2CMNri : T2OneRegCmpImm<
+ (outs), (ins GPRnopc:$Rn, t2_so_imm:$imm), IIC_iCMPi,
+ "cmn", ".w\t$Rn, $imm",
+ [(ARMcmn GPRnopc:$Rn, (ineg t2_so_imm:$imm))]>,
+ Sched<[WriteCMP, ReadALU]> {
+ let Inst{31-27} = 0b11110;
+ let Inst{25} = 0;
+ let Inst{24-21} = 0b1000;
+ let Inst{20} = 1; // The S bit.
+ let Inst{15} = 0;
+ let Inst{11-8} = 0b1111; // Rd
+ }
+ // register
+ def t2CMNzrr : T2TwoRegCmp<
+ (outs), (ins GPRnopc:$Rn, rGPR:$Rm), IIC_iCMPr,
+ "cmn", ".w\t$Rn, $Rm",
+ [(BinOpFrag<(ARMcmpZ node:$LHS,(ineg node:$RHS))>
+ GPRnopc:$Rn, rGPR:$Rm)]>, Sched<[WriteCMP, ReadALU, ReadALU]> {
+ let Inst{31-27} = 0b11101;
+ let Inst{26-25} = 0b01;
+ let Inst{24-21} = 0b1000;
+ let Inst{20} = 1; // The S bit.
+ let Inst{14-12} = 0b000; // imm3
+ let Inst{11-8} = 0b1111; // Rd
+ let Inst{7-6} = 0b00; // imm2
+ let Inst{5-4} = 0b00; // type
+ }
+ // shifted register
+ def t2CMNzrs : T2OneRegCmpShiftedReg<
+ (outs), (ins GPRnopc:$Rn, t2_so_reg:$ShiftedRm), IIC_iCMPsi,
+ "cmn", ".w\t$Rn, $ShiftedRm",
+ [(BinOpFrag<(ARMcmpZ node:$LHS,(ineg node:$RHS))>
+ GPRnopc:$Rn, t2_so_reg:$ShiftedRm)]>,
+ Sched<[WriteCMPsi, ReadALU, ReadALU]> {
+ let Inst{31-27} = 0b11101;
+ let Inst{26-25} = 0b01;
+ let Inst{24-21} = 0b1000;
+ let Inst{20} = 1; // The S bit.
+ let Inst{11-8} = 0b1111; // Rd
+ }
+}
+
+// Assembler aliases w/o the ".w" suffix.
+// No alias here for 'rr' version as not all instantiations of this multiclass
+// want one (CMP in particular, does not).
+def : t2InstAlias<"cmn${p} $Rn, $imm",
+ (t2CMNri GPRnopc:$Rn, t2_so_imm:$imm, pred:$p)>;
+def : t2InstAlias<"cmn${p} $Rn, $shift",
+ (t2CMNzrs GPRnopc:$Rn, t2_so_reg:$shift, pred:$p)>;
+
+def : T2Pat<(ARMcmp GPR:$src, t2_so_imm_neg:$imm),
+ (t2CMNri GPR:$src, t2_so_imm_neg:$imm)>;
+
+def : T2Pat<(ARMcmpZ GPRnopc:$src, t2_so_imm_neg:$imm),
+ (t2CMNri GPRnopc:$src, t2_so_imm_neg:$imm)>;
+
+defm t2TST : T2I_cmp_irs<0b0000, "tst",
+ IIC_iTSTi, IIC_iTSTr, IIC_iTSTsi,
+ BinOpFrag<(ARMcmpZ (and_su node:$LHS, node:$RHS), 0)>>;
+defm t2TEQ : T2I_cmp_irs<0b0100, "teq",
+ IIC_iTSTi, IIC_iTSTr, IIC_iTSTsi,
+ BinOpFrag<(ARMcmpZ (xor_su node:$LHS, node:$RHS), 0)>>;
+
+// Conditional moves
+let hasSideEffects = 0 in {
+
+let isCommutable = 1, isSelect = 1 in
+def t2MOVCCr : t2PseudoInst<(outs rGPR:$Rd),
+ (ins rGPR:$false, rGPR:$Rm, cmovpred:$p),
+ 4, IIC_iCMOVr,
+ [(set rGPR:$Rd, (ARMcmov rGPR:$false, rGPR:$Rm,
+ cmovpred:$p))]>,
+ RegConstraint<"$false = $Rd">, Sched<[WriteALU]>;
+
+let isMoveImm = 1 in
+def t2MOVCCi
+ : t2PseudoInst<(outs rGPR:$Rd),
+ (ins rGPR:$false, t2_so_imm:$imm, cmovpred:$p),
+ 4, IIC_iCMOVi,
+ [(set rGPR:$Rd, (ARMcmov rGPR:$false,t2_so_imm:$imm,
+ cmovpred:$p))]>,
+ RegConstraint<"$false = $Rd">, Sched<[WriteALU]>;
+
+let isCodeGenOnly = 1 in {
+let isMoveImm = 1 in
+def t2MOVCCi16
+ : t2PseudoInst<(outs rGPR:$Rd),
+ (ins rGPR:$false, imm0_65535_expr:$imm, cmovpred:$p),
+ 4, IIC_iCMOVi,
+ [(set rGPR:$Rd, (ARMcmov rGPR:$false, imm0_65535:$imm,
+ cmovpred:$p))]>,
+ RegConstraint<"$false = $Rd">, Sched<[WriteALU]>;
+
+let isMoveImm = 1 in
+def t2MVNCCi
+ : t2PseudoInst<(outs rGPR:$Rd),
+ (ins rGPR:$false, t2_so_imm:$imm, cmovpred:$p),
+ 4, IIC_iCMOVi,
+ [(set rGPR:$Rd,
+ (ARMcmov rGPR:$false, t2_so_imm_not:$imm,
+ cmovpred:$p))]>,
+ RegConstraint<"$false = $Rd">, Sched<[WriteALU]>;
+
+class MOVCCShPseudo<SDPatternOperator opnode, Operand ty>
+ : t2PseudoInst<(outs rGPR:$Rd),
+ (ins rGPR:$false, rGPR:$Rm, i32imm:$imm, cmovpred:$p),
+ 4, IIC_iCMOVsi,
+ [(set rGPR:$Rd, (ARMcmov rGPR:$false,
+ (opnode rGPR:$Rm, (i32 ty:$imm)),
+ cmovpred:$p))]>,
+ RegConstraint<"$false = $Rd">, Sched<[WriteALU]>;
+
+def t2MOVCClsl : MOVCCShPseudo<shl, imm0_31>;
+def t2MOVCClsr : MOVCCShPseudo<srl, imm_sr>;
+def t2MOVCCasr : MOVCCShPseudo<sra, imm_sr>;
+def t2MOVCCror : MOVCCShPseudo<rotr, imm0_31>;
+
+let isMoveImm = 1 in
+def t2MOVCCi32imm
+ : t2PseudoInst<(outs rGPR:$dst),
+ (ins rGPR:$false, i32imm:$src, cmovpred:$p),
+ 8, IIC_iCMOVix2,
+ [(set rGPR:$dst, (ARMcmov rGPR:$false, imm:$src,
+ cmovpred:$p))]>,
+ RegConstraint<"$false = $dst">;
+} // isCodeGenOnly = 1
+
+} // hasSideEffects
+
+//===----------------------------------------------------------------------===//
+// Atomic operations intrinsics
+//
+
+// memory barriers protect the atomic sequences
+let hasSideEffects = 1 in {
+def t2DMB : T2I<(outs), (ins memb_opt:$opt), NoItinerary,
+ "dmb", "\t$opt", [(int_arm_dmb (i32 imm0_15:$opt))]>,
+ Requires<[IsThumb, HasDB]> {
+ bits<4> opt;
+ let Inst{31-4} = 0xf3bf8f5;
+ let Inst{3-0} = opt;
+}
+
+def t2DSB : T2I<(outs), (ins memb_opt:$opt), NoItinerary,
+ "dsb", "\t$opt", [(int_arm_dsb (i32 imm0_15:$opt))]>,
+ Requires<[IsThumb, HasDB]> {
+ bits<4> opt;
+ let Inst{31-4} = 0xf3bf8f4;
+ let Inst{3-0} = opt;
+}
+
+def t2ISB : T2I<(outs), (ins instsyncb_opt:$opt), NoItinerary,
+ "isb", "\t$opt", [(int_arm_isb (i32 imm0_15:$opt))]>,
+ Requires<[IsThumb, HasDB]> {
+ bits<4> opt;
+ let Inst{31-4} = 0xf3bf8f6;
+ let Inst{3-0} = opt;
+}
+}
+
+class T2I_ldrex<bits<4> opcod, dag oops, dag iops, AddrMode am, int sz,
+ InstrItinClass itin, string opc, string asm, string cstr,
+ list<dag> pattern, bits<4> rt2 = 0b1111>
+ : Thumb2I<oops, iops, am, sz, itin, opc, asm, cstr, pattern> {
+ let Inst{31-27} = 0b11101;
+ let Inst{26-20} = 0b0001101;
+ let Inst{11-8} = rt2;
+ let Inst{7-4} = opcod;
+ let Inst{3-0} = 0b1111;
+
+ bits<4> addr;
+ bits<4> Rt;
+ let Inst{19-16} = addr;
+ let Inst{15-12} = Rt;
+}
+class T2I_strex<bits<4> opcod, dag oops, dag iops, AddrMode am, int sz,
+ InstrItinClass itin, string opc, string asm, string cstr,
+ list<dag> pattern, bits<4> rt2 = 0b1111>
+ : Thumb2I<oops, iops, am, sz, itin, opc, asm, cstr, pattern> {
+ let Inst{31-27} = 0b11101;
+ let Inst{26-20} = 0b0001100;
+ let Inst{11-8} = rt2;
+ let Inst{7-4} = opcod;
+
+ bits<4> Rd;
+ bits<4> addr;
+ bits<4> Rt;
+ let Inst{3-0} = Rd;
+ let Inst{19-16} = addr;
+ let Inst{15-12} = Rt;
+}
+
+let mayLoad = 1 in {
+def t2LDREXB : T2I_ldrex<0b0100, (outs rGPR:$Rt), (ins addr_offset_none:$addr),
+ AddrModeNone, 4, NoItinerary,
+ "ldrexb", "\t$Rt, $addr", "",
+ [(set rGPR:$Rt, (ldrex_1 addr_offset_none:$addr))]>;
+def t2LDREXH : T2I_ldrex<0b0101, (outs rGPR:$Rt), (ins addr_offset_none:$addr),
+ AddrModeNone, 4, NoItinerary,
+ "ldrexh", "\t$Rt, $addr", "",
+ [(set rGPR:$Rt, (ldrex_2 addr_offset_none:$addr))]>;
+def t2LDREX : Thumb2I<(outs rGPR:$Rt), (ins t2addrmode_imm0_1020s4:$addr),
+ AddrModeNone, 4, NoItinerary,
+ "ldrex", "\t$Rt, $addr", "",
+ [(set rGPR:$Rt, (ldrex_4 t2addrmode_imm0_1020s4:$addr))]> {
+ bits<4> Rt;
+ bits<12> addr;
+ let Inst{31-27} = 0b11101;
+ let Inst{26-20} = 0b0000101;
+ let Inst{19-16} = addr{11-8};
+ let Inst{15-12} = Rt;
+ let Inst{11-8} = 0b1111;
+ let Inst{7-0} = addr{7-0};
+}
+let hasExtraDefRegAllocReq = 1 in
+def t2LDREXD : T2I_ldrex<0b0111, (outs rGPR:$Rt, rGPR:$Rt2),
+ (ins addr_offset_none:$addr),
+ AddrModeNone, 4, NoItinerary,
+ "ldrexd", "\t$Rt, $Rt2, $addr", "",
+ [], {?, ?, ?, ?}>,
+ Requires<[IsThumb2, IsNotMClass]> {
+ bits<4> Rt2;
+ let Inst{11-8} = Rt2;
+}
+def t2LDAEXB : T2I_ldrex<0b1100, (outs rGPR:$Rt), (ins addr_offset_none:$addr),
+ AddrModeNone, 4, NoItinerary,
+ "ldaexb", "\t$Rt, $addr", "",
+ [(set rGPR:$Rt, (ldaex_1 addr_offset_none:$addr))]>,
+ Requires<[IsThumb, HasV8]>;
+def t2LDAEXH : T2I_ldrex<0b1101, (outs rGPR:$Rt), (ins addr_offset_none:$addr),
+ AddrModeNone, 4, NoItinerary,
+ "ldaexh", "\t$Rt, $addr", "",
+ [(set rGPR:$Rt, (ldaex_2 addr_offset_none:$addr))]>,
+ Requires<[IsThumb, HasV8]>;
+def t2LDAEX : Thumb2I<(outs rGPR:$Rt), (ins addr_offset_none:$addr),
+ AddrModeNone, 4, NoItinerary,
+ "ldaex", "\t$Rt, $addr", "",
+ [(set rGPR:$Rt, (ldaex_4 addr_offset_none:$addr))]>,
+ Requires<[IsThumb, HasV8]> {
+ bits<4> Rt;
+ bits<4> addr;
+ let Inst{31-27} = 0b11101;
+ let Inst{26-20} = 0b0001101;
+ let Inst{19-16} = addr;
+ let Inst{15-12} = Rt;
+ let Inst{11-8} = 0b1111;
+ let Inst{7-0} = 0b11101111;
+}
+let hasExtraDefRegAllocReq = 1 in
+def t2LDAEXD : T2I_ldrex<0b1111, (outs rGPR:$Rt, rGPR:$Rt2),
+ (ins addr_offset_none:$addr),
+ AddrModeNone, 4, NoItinerary,
+ "ldaexd", "\t$Rt, $Rt2, $addr", "",
+ [], {?, ?, ?, ?}>, Requires<[IsThumb, HasV8]> {
+ bits<4> Rt2;
+ let Inst{11-8} = Rt2;
+
+ let Inst{7} = 1;
+}
+}
+
+let mayStore = 1, Constraints = "@earlyclobber $Rd" in {
+def t2STREXB : T2I_strex<0b0100, (outs rGPR:$Rd),
+ (ins rGPR:$Rt, addr_offset_none:$addr),
+ AddrModeNone, 4, NoItinerary,
+ "strexb", "\t$Rd, $Rt, $addr", "",
+ [(set rGPR:$Rd,
+ (strex_1 rGPR:$Rt, addr_offset_none:$addr))]>;
+def t2STREXH : T2I_strex<0b0101, (outs rGPR:$Rd),
+ (ins rGPR:$Rt, addr_offset_none:$addr),
+ AddrModeNone, 4, NoItinerary,
+ "strexh", "\t$Rd, $Rt, $addr", "",
+ [(set rGPR:$Rd,
+ (strex_2 rGPR:$Rt, addr_offset_none:$addr))]>;
+
+def t2STREX : Thumb2I<(outs rGPR:$Rd), (ins rGPR:$Rt,
+ t2addrmode_imm0_1020s4:$addr),
+ AddrModeNone, 4, NoItinerary,
+ "strex", "\t$Rd, $Rt, $addr", "",
+ [(set rGPR:$Rd,
+ (strex_4 rGPR:$Rt, t2addrmode_imm0_1020s4:$addr))]> {
+ bits<4> Rd;
+ bits<4> Rt;
+ bits<12> addr;
+ let Inst{31-27} = 0b11101;
+ let Inst{26-20} = 0b0000100;
+ let Inst{19-16} = addr{11-8};
+ let Inst{15-12} = Rt;
+ let Inst{11-8} = Rd;
+ let Inst{7-0} = addr{7-0};
+}
+let hasExtraSrcRegAllocReq = 1 in
+def t2STREXD : T2I_strex<0b0111, (outs rGPR:$Rd),
+ (ins rGPR:$Rt, rGPR:$Rt2, addr_offset_none:$addr),
+ AddrModeNone, 4, NoItinerary,
+ "strexd", "\t$Rd, $Rt, $Rt2, $addr", "", [],
+ {?, ?, ?, ?}>,
+ Requires<[IsThumb2, IsNotMClass]> {
+ bits<4> Rt2;
+ let Inst{11-8} = Rt2;
+}
+def t2STLEXB : T2I_strex<0b1100, (outs rGPR:$Rd),
+ (ins rGPR:$Rt, addr_offset_none:$addr),
+ AddrModeNone, 4, NoItinerary,
+ "stlexb", "\t$Rd, $Rt, $addr", "",
+ [(set rGPR:$Rd,
+ (stlex_1 rGPR:$Rt, addr_offset_none:$addr))]>,
+ Requires<[IsThumb, HasV8]>;
+
+def t2STLEXH : T2I_strex<0b1101, (outs rGPR:$Rd),
+ (ins rGPR:$Rt, addr_offset_none:$addr),
+ AddrModeNone, 4, NoItinerary,
+ "stlexh", "\t$Rd, $Rt, $addr", "",
+ [(set rGPR:$Rd,
+ (stlex_2 rGPR:$Rt, addr_offset_none:$addr))]>,
+ Requires<[IsThumb, HasV8]>;
+
+def t2STLEX : Thumb2I<(outs rGPR:$Rd), (ins rGPR:$Rt,
+ addr_offset_none:$addr),
+ AddrModeNone, 4, NoItinerary,
+ "stlex", "\t$Rd, $Rt, $addr", "",
+ [(set rGPR:$Rd,
+ (stlex_4 rGPR:$Rt, addr_offset_none:$addr))]>,
+ Requires<[IsThumb, HasV8]> {
+ bits<4> Rd;
+ bits<4> Rt;
+ bits<4> addr;
+ let Inst{31-27} = 0b11101;
+ let Inst{26-20} = 0b0001100;
+ let Inst{19-16} = addr;
+ let Inst{15-12} = Rt;
+ let Inst{11-4} = 0b11111110;
+ let Inst{3-0} = Rd;
+}
+let hasExtraSrcRegAllocReq = 1 in
+def t2STLEXD : T2I_strex<0b1111, (outs rGPR:$Rd),
+ (ins rGPR:$Rt, rGPR:$Rt2, addr_offset_none:$addr),
+ AddrModeNone, 4, NoItinerary,
+ "stlexd", "\t$Rd, $Rt, $Rt2, $addr", "", [],
+ {?, ?, ?, ?}>, Requires<[IsThumb, HasV8]> {
+ bits<4> Rt2;
+ let Inst{11-8} = Rt2;
+}
+}
+
+def t2CLREX : T2I<(outs), (ins), NoItinerary, "clrex", "", [(int_arm_clrex)]>,
+ Requires<[IsThumb2, HasV7]> {
+ let Inst{31-16} = 0xf3bf;
+ let Inst{15-14} = 0b10;
+ let Inst{13} = 0;
+ let Inst{12} = 0;
+ let Inst{11-8} = 0b1111;
+ let Inst{7-4} = 0b0010;
+ let Inst{3-0} = 0b1111;
+}
+
+def : T2Pat<(and (ldrex_1 addr_offset_none:$addr), 0xff),
+ (t2LDREXB addr_offset_none:$addr)>;
+def : T2Pat<(and (ldrex_2 addr_offset_none:$addr), 0xffff),
+ (t2LDREXH addr_offset_none:$addr)>;
+def : T2Pat<(strex_1 (and GPR:$Rt, 0xff), addr_offset_none:$addr),
+ (t2STREXB GPR:$Rt, addr_offset_none:$addr)>;
+def : T2Pat<(strex_2 (and GPR:$Rt, 0xffff), addr_offset_none:$addr),
+ (t2STREXH GPR:$Rt, addr_offset_none:$addr)>;
+
+def : T2Pat<(and (ldaex_1 addr_offset_none:$addr), 0xff),
+ (t2LDAEXB addr_offset_none:$addr)>;
+def : T2Pat<(and (ldaex_2 addr_offset_none:$addr), 0xffff),
+ (t2LDAEXH addr_offset_none:$addr)>;
+def : T2Pat<(stlex_1 (and GPR:$Rt, 0xff), addr_offset_none:$addr),
+ (t2STLEXB GPR:$Rt, addr_offset_none:$addr)>;
+def : T2Pat<(stlex_2 (and GPR:$Rt, 0xffff), addr_offset_none:$addr),
+ (t2STLEXH GPR:$Rt, addr_offset_none:$addr)>;
+
+//===----------------------------------------------------------------------===//
+// SJLJ Exception handling intrinsics
+// eh_sjlj_setjmp() is an instruction sequence to store the return
+// address and save #0 in R0 for the non-longjmp case.
+// Since by its nature we may be coming from some other function to get
+// here, and we're using the stack frame for the containing function to
+// save/restore registers, we can't keep anything live in regs across
+// the eh_sjlj_setjmp(), else it will almost certainly have been tromped upon
+// when we get here from a longjmp(). We force everything out of registers
+// except for our own input by listing the relevant registers in Defs. By
+// doing so, we also cause the prologue/epilogue code to actively preserve
+// all of the callee-saved resgisters, which is exactly what we want.
+// $val is a scratch register for our use.
+let Defs =
+ [ R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR, CPSR,
+ Q0, Q1, Q2, Q3, Q8, Q9, Q10, Q11, Q12, Q13, Q14, Q15],
+ hasSideEffects = 1, isBarrier = 1, isCodeGenOnly = 1,
+ usesCustomInserter = 1 in {
+ def t2Int_eh_sjlj_setjmp : Thumb2XI<(outs), (ins tGPR:$src, tGPR:$val),
+ AddrModeNone, 0, NoItinerary, "", "",
+ [(set R0, (ARMeh_sjlj_setjmp tGPR:$src, tGPR:$val))]>,
+ Requires<[IsThumb2, HasVFP2]>;
+}
+
+let Defs =
+ [ R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR, CPSR ],
+ hasSideEffects = 1, isBarrier = 1, isCodeGenOnly = 1,
+ usesCustomInserter = 1 in {
+ def t2Int_eh_sjlj_setjmp_nofp : Thumb2XI<(outs), (ins tGPR:$src, tGPR:$val),
+ AddrModeNone, 0, NoItinerary, "", "",
+ [(set R0, (ARMeh_sjlj_setjmp tGPR:$src, tGPR:$val))]>,
+ Requires<[IsThumb2, NoVFP]>;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Control-Flow Instructions
+//
+
+// FIXME: remove when we have a way to marking a MI with these properties.
+// FIXME: Should pc be an implicit operand like PICADD, etc?
+let isReturn = 1, isTerminator = 1, isBarrier = 1, mayLoad = 1,
+ hasExtraDefRegAllocReq = 1, isCodeGenOnly = 1 in
+def t2LDMIA_RET: t2PseudoExpand<(outs GPR:$wb), (ins GPR:$Rn, pred:$p,
+ reglist:$regs, variable_ops),
+ 4, IIC_iLoad_mBr, [],
+ (t2LDMIA_UPD GPR:$wb, GPR:$Rn, pred:$p, reglist:$regs)>,
+ RegConstraint<"$Rn = $wb">;
+
+let isBranch = 1, isTerminator = 1, isBarrier = 1 in {
+let isPredicable = 1 in
+def t2B : T2I<(outs), (ins uncondbrtarget:$target), IIC_Br,
+ "b", ".w\t$target",
+ [(br bb:$target)]>, Sched<[WriteBr]> {
+ let Inst{31-27} = 0b11110;
+ let Inst{15-14} = 0b10;
+ let Inst{12} = 1;
+
+ bits<24> target;
+ let Inst{26} = target{23};
+ let Inst{13} = target{22};
+ let Inst{11} = target{21};
+ let Inst{25-16} = target{20-11};
+ let Inst{10-0} = target{10-0};
+ let DecoderMethod = "DecodeT2BInstruction";
+ let AsmMatchConverter = "cvtThumbBranches";
+}
+
+let Size = 4, isNotDuplicable = 1, isIndirectBranch = 1 in {
+def t2BR_JT : t2PseudoInst<(outs),
+ (ins GPR:$target, GPR:$index, i32imm:$jt),
+ 0, IIC_Br,
+ [(ARMbr2jt GPR:$target, GPR:$index, tjumptable:$jt)]>,
+ Sched<[WriteBr]>;
+
+// FIXME: Add a case that can be predicated.
+def t2TBB_JT : t2PseudoInst<(outs),
+ (ins GPR:$base, GPR:$index, i32imm:$jt, i32imm:$pclbl), 0, IIC_Br, []>,
+ Sched<[WriteBr]>;
+
+def t2TBH_JT : t2PseudoInst<(outs),
+ (ins GPR:$base, GPR:$index, i32imm:$jt, i32imm:$pclbl), 0, IIC_Br, []>,
+ Sched<[WriteBr]>;
+
+def t2TBB : T2I<(outs), (ins addrmode_tbb:$addr), IIC_Br,
+ "tbb", "\t$addr", []>, Sched<[WriteBrTbl]> {
+ bits<4> Rn;
+ bits<4> Rm;
+ let Inst{31-20} = 0b111010001101;
+ let Inst{19-16} = Rn;
+ let Inst{15-5} = 0b11110000000;
+ let Inst{4} = 0; // B form
+ let Inst{3-0} = Rm;
+
+ let DecoderMethod = "DecodeThumbTableBranch";
+}
+
+def t2TBH : T2I<(outs), (ins addrmode_tbh:$addr), IIC_Br,
+ "tbh", "\t$addr", []>, Sched<[WriteBrTbl]> {
+ bits<4> Rn;
+ bits<4> Rm;
+ let Inst{31-20} = 0b111010001101;
+ let Inst{19-16} = Rn;
+ let Inst{15-5} = 0b11110000000;
+ let Inst{4} = 1; // H form
+ let Inst{3-0} = Rm;
+
+ let DecoderMethod = "DecodeThumbTableBranch";
+}
+} // isNotDuplicable, isIndirectBranch
+
+} // isBranch, isTerminator, isBarrier
+
+// FIXME: should be able to write a pattern for ARMBrcond, but can't use
+// a two-value operand where a dag node expects ", "two operands. :(
+let isBranch = 1, isTerminator = 1 in
+def t2Bcc : T2I<(outs), (ins brtarget:$target), IIC_Br,
+ "b", ".w\t$target",
+ [/*(ARMbrcond bb:$target, imm:$cc)*/]>, Sched<[WriteBr]> {
+ let Inst{31-27} = 0b11110;
+ let Inst{15-14} = 0b10;
+ let Inst{12} = 0;
+
+ bits<4> p;
+ let Inst{25-22} = p;
+
+ bits<21> target;
+ let Inst{26} = target{20};
+ let Inst{11} = target{19};
+ let Inst{13} = target{18};
+ let Inst{21-16} = target{17-12};
+ let Inst{10-0} = target{11-1};
+
+ let DecoderMethod = "DecodeThumb2BCCInstruction";
+ let AsmMatchConverter = "cvtThumbBranches";
+}
+
+// Tail calls. The MachO version of thumb tail calls uses a t2 branch, so
+// it goes here.
+let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in {
+ // IOS version.
+ let Uses = [SP] in
+ def tTAILJMPd: tPseudoExpand<(outs),
+ (ins uncondbrtarget:$dst, pred:$p),
+ 4, IIC_Br, [],
+ (t2B uncondbrtarget:$dst, pred:$p)>,
+ Requires<[IsThumb2, IsMachO]>, Sched<[WriteBr]>;
+}
+
+// IT block
+let Defs = [ITSTATE] in
+def t2IT : Thumb2XI<(outs), (ins it_pred:$cc, it_mask:$mask),
+ AddrModeNone, 2, IIC_iALUx,
+ "it$mask\t$cc", "", []>,
+ ComplexDeprecationPredicate<"IT"> {
+ // 16-bit instruction.
+ let Inst{31-16} = 0x0000;
+ let Inst{15-8} = 0b10111111;
+
+ bits<4> cc;
+ bits<4> mask;
+ let Inst{7-4} = cc;
+ let Inst{3-0} = mask;
+
+ let DecoderMethod = "DecodeIT";
+}
+
+// Branch and Exchange Jazelle -- for disassembly only
+// Rm = Inst{19-16}
+def t2BXJ : T2I<(outs), (ins GPRnopc:$func), NoItinerary, "bxj", "\t$func", []>,
+ Sched<[WriteBr]>, Requires<[IsThumb2, IsNotMClass]> {
+ bits<4> func;
+ let Inst{31-27} = 0b11110;
+ let Inst{26} = 0;
+ let Inst{25-20} = 0b111100;
+ let Inst{19-16} = func;
+ let Inst{15-0} = 0b1000111100000000;
+}
+
+// Compare and branch on zero / non-zero
+let isBranch = 1, isTerminator = 1 in {
+ def tCBZ : T1I<(outs), (ins tGPR:$Rn, t_cbtarget:$target), IIC_Br,
+ "cbz\t$Rn, $target", []>,
+ T1Misc<{0,0,?,1,?,?,?}>,
+ Requires<[IsThumb2]>, Sched<[WriteBr]> {
+ // A8.6.27
+ bits<6> target;
+ bits<3> Rn;
+ let Inst{9} = target{5};
+ let Inst{7-3} = target{4-0};
+ let Inst{2-0} = Rn;
+ }
+
+ def tCBNZ : T1I<(outs), (ins tGPR:$Rn, t_cbtarget:$target), IIC_Br,
+ "cbnz\t$Rn, $target", []>,
+ T1Misc<{1,0,?,1,?,?,?}>,
+ Requires<[IsThumb2]>, Sched<[WriteBr]> {
+ // A8.6.27
+ bits<6> target;
+ bits<3> Rn;
+ let Inst{9} = target{5};
+ let Inst{7-3} = target{4-0};
+ let Inst{2-0} = Rn;
+ }
+}
+
+
+// Change Processor State is a system instruction.
+// FIXME: Since the asm parser has currently no clean way to handle optional
+// operands, create 3 versions of the same instruction. Once there's a clean
+// framework to represent optional operands, change this behavior.
+class t2CPS<dag iops, string asm_op> : T2XI<(outs), iops, NoItinerary,
+ !strconcat("cps", asm_op), []>,
+ Requires<[IsThumb2, IsNotMClass]> {
+ bits<2> imod;
+ bits<3> iflags;
+ bits<5> mode;
+ bit M;
+
+ let Inst{31-11} = 0b111100111010111110000;
+ let Inst{10-9} = imod;
+ let Inst{8} = M;
+ let Inst{7-5} = iflags;
+ let Inst{4-0} = mode;
+ let DecoderMethod = "DecodeT2CPSInstruction";
+}
+
+let M = 1 in
+ def t2CPS3p : t2CPS<(ins imod_op:$imod, iflags_op:$iflags, i32imm:$mode),
+ "$imod\t$iflags, $mode">;
+let mode = 0, M = 0 in
+ def t2CPS2p : t2CPS<(ins imod_op:$imod, iflags_op:$iflags),
+ "$imod.w\t$iflags">;
+let imod = 0, iflags = 0, M = 1 in
+ def t2CPS1p : t2CPS<(ins imm0_31:$mode), "\t$mode">;
+
+def : t2InstAlias<"cps$imod.w $iflags, $mode",
+ (t2CPS3p imod_op:$imod, iflags_op:$iflags, i32imm:$mode), 0>;
+def : t2InstAlias<"cps.w $mode", (t2CPS1p imm0_31:$mode), 0>;
+
+// A6.3.4 Branches and miscellaneous control
+// Table A6-14 Change Processor State, and hint instructions
+def t2HINT : T2I<(outs), (ins imm0_239:$imm), NoItinerary, "hint", ".w\t$imm",
+ [(int_arm_hint imm0_239:$imm)]> {
+ bits<8> imm;
+ let Inst{31-3} = 0b11110011101011111000000000000;
+ let Inst{7-0} = imm;
+}
+
+def : t2InstAlias<"hint$p $imm", (t2HINT imm0_239:$imm, pred:$p)>;
+def : t2InstAlias<"nop$p.w", (t2HINT 0, pred:$p)>;
+def : t2InstAlias<"yield$p.w", (t2HINT 1, pred:$p)>;
+def : t2InstAlias<"wfe$p.w", (t2HINT 2, pred:$p)>;
+def : t2InstAlias<"wfi$p.w", (t2HINT 3, pred:$p)>;
+def : t2InstAlias<"sev$p.w", (t2HINT 4, pred:$p)>;
+def : t2InstAlias<"sevl$p.w", (t2HINT 5, pred:$p)> {
+ let Predicates = [IsThumb2, HasV8];
+}
+
+def t2DBG : T2I<(outs), (ins imm0_15:$opt), NoItinerary, "dbg", "\t$opt",
+ [(int_arm_dbg imm0_15:$opt)]> {
+ bits<4> opt;
+ let Inst{31-20} = 0b111100111010;
+ let Inst{19-16} = 0b1111;
+ let Inst{15-8} = 0b10000000;
+ let Inst{7-4} = 0b1111;
+ let Inst{3-0} = opt;
+}
+
+// Secure Monitor Call is a system instruction.
+// Option = Inst{19-16}
+def t2SMC : T2I<(outs), (ins imm0_15:$opt), NoItinerary, "smc", "\t$opt",
+ []>, Requires<[IsThumb2, HasTrustZone]> {
+ let Inst{31-27} = 0b11110;
+ let Inst{26-20} = 0b1111111;
+ let Inst{15-12} = 0b1000;
+
+ bits<4> opt;
+ let Inst{19-16} = opt;
+}
+
+class T2DCPS<bits<2> opt, string opc>
+ : T2I<(outs), (ins), NoItinerary, opc, "", []>, Requires<[IsThumb2, HasV8]> {
+ let Inst{31-27} = 0b11110;
+ let Inst{26-20} = 0b1111000;
+ let Inst{19-16} = 0b1111;
+ let Inst{15-12} = 0b1000;
+ let Inst{11-2} = 0b0000000000;
+ let Inst{1-0} = opt;
+}
+
+def t2DCPS1 : T2DCPS<0b01, "dcps1">;
+def t2DCPS2 : T2DCPS<0b10, "dcps2">;
+def t2DCPS3 : T2DCPS<0b11, "dcps3">;
+
+class T2SRS<bits<2> Op, bit W, dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T2I<oops, iops, itin, opc, asm, pattern>,
+ Requires<[IsThumb2,IsNotMClass]> {
+ bits<5> mode;
+ let Inst{31-25} = 0b1110100;
+ let Inst{24-23} = Op;
+ let Inst{22} = 0;
+ let Inst{21} = W;
+ let Inst{20-16} = 0b01101;
+ let Inst{15-5} = 0b11000000000;
+ let Inst{4-0} = mode{4-0};
+}
+
+// Store Return State is a system instruction.
+def t2SRSDB_UPD : T2SRS<0b00, 1, (outs), (ins imm0_31:$mode), NoItinerary,
+ "srsdb", "\tsp!, $mode", []>;
+def t2SRSDB : T2SRS<0b00, 0, (outs), (ins imm0_31:$mode), NoItinerary,
+ "srsdb","\tsp, $mode", []>;
+def t2SRSIA_UPD : T2SRS<0b11, 1, (outs), (ins imm0_31:$mode), NoItinerary,
+ "srsia","\tsp!, $mode", []>;
+def t2SRSIA : T2SRS<0b11, 0, (outs), (ins imm0_31:$mode), NoItinerary,
+ "srsia","\tsp, $mode", []>;
+
+
+def : t2InstAlias<"srsdb${p} $mode", (t2SRSDB imm0_31:$mode, pred:$p)>;
+def : t2InstAlias<"srsdb${p} $mode!", (t2SRSDB_UPD imm0_31:$mode, pred:$p)>;
+
+def : t2InstAlias<"srsia${p} $mode", (t2SRSIA imm0_31:$mode, pred:$p)>;
+def : t2InstAlias<"srsia${p} $mode!", (t2SRSIA_UPD imm0_31:$mode, pred:$p)>;
+
+// Return From Exception is a system instruction.
+class T2RFE<bits<12> op31_20, dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T2I<oops, iops, itin, opc, asm, pattern>,
+ Requires<[IsThumb2,IsNotMClass]> {
+ let Inst{31-20} = op31_20{11-0};
+
+ bits<4> Rn;
+ let Inst{19-16} = Rn;
+ let Inst{15-0} = 0xc000;
+}
+
+def t2RFEDBW : T2RFE<0b111010000011,
+ (outs), (ins GPR:$Rn), NoItinerary, "rfedb", "\t$Rn!",
+ [/* For disassembly only; pattern left blank */]>;
+def t2RFEDB : T2RFE<0b111010000001,
+ (outs), (ins GPR:$Rn), NoItinerary, "rfedb", "\t$Rn",
+ [/* For disassembly only; pattern left blank */]>;
+def t2RFEIAW : T2RFE<0b111010011011,
+ (outs), (ins GPR:$Rn), NoItinerary, "rfeia", "\t$Rn!",
+ [/* For disassembly only; pattern left blank */]>;
+def t2RFEIA : T2RFE<0b111010011001,
+ (outs), (ins GPR:$Rn), NoItinerary, "rfeia", "\t$Rn",
+ [/* For disassembly only; pattern left blank */]>;
+
+// B9.3.19 SUBS PC, LR, #imm (Thumb2) system instruction.
+// Exception return instruction is "subs pc, lr, #imm".
+let isReturn = 1, isBarrier = 1, isTerminator = 1, Defs = [PC] in
+def t2SUBS_PC_LR : T2I <(outs), (ins imm0_255:$imm), NoItinerary,
+ "subs", "\tpc, lr, $imm",
+ [(ARMintretflag imm0_255:$imm)]>,
+ Requires<[IsThumb2,IsNotMClass]> {
+ let Inst{31-8} = 0b111100111101111010001111;
+
+ bits<8> imm;
+ let Inst{7-0} = imm;
+}
+
+// Hypervisor Call is a system instruction.
+let isCall = 1 in {
+def t2HVC : T2XI <(outs), (ins imm0_65535:$imm16), IIC_Br, "hvc.w\t$imm16", []>,
+ Requires<[IsThumb2, HasVirtualization]>, Sched<[WriteBr]> {
+ bits<16> imm16;
+ let Inst{31-20} = 0b111101111110;
+ let Inst{19-16} = imm16{15-12};
+ let Inst{15-12} = 0b1000;
+ let Inst{11-0} = imm16{11-0};
+}
+}
+
+// Alias for HVC without the ".w" optional width specifier
+def : t2InstAlias<"hvc\t$imm16", (t2HVC imm0_65535:$imm16)>;
+
+// ERET - Return from exception in Hypervisor mode.
+// B9.3.3, B9.3.20: ERET is an alias for "SUBS PC, LR, #0" in an implementation that
+// includes virtualization extensions.
+def t2ERET : InstAlias<"eret${p}", (t2SUBS_PC_LR 0, pred:$p)>,
+ Requires<[IsThumb2, HasVirtualization]>;
+
+//===----------------------------------------------------------------------===//
+// Non-Instruction Patterns
+//
+
+// 32-bit immediate using movw + movt.
+// This is a single pseudo instruction to make it re-materializable.
+// FIXME: Remove this when we can do generalized remat.
+let isReMaterializable = 1, isMoveImm = 1 in
+def t2MOVi32imm : PseudoInst<(outs rGPR:$dst), (ins i32imm:$src), IIC_iMOVix2,
+ [(set rGPR:$dst, (i32 imm:$src))]>,
+ Requires<[IsThumb, UseMovt]>;
+
+// Pseudo instruction that combines movw + movt + add pc (if pic).
+// It also makes it possible to rematerialize the instructions.
+// FIXME: Remove this when we can do generalized remat and when machine licm
+// can properly the instructions.
+let isReMaterializable = 1 in {
+def t2MOV_ga_pcrel : PseudoInst<(outs rGPR:$dst), (ins i32imm:$addr),
+ IIC_iMOVix2addpc,
+ [(set rGPR:$dst, (ARMWrapperPIC tglobaladdr:$addr))]>,
+ Requires<[IsThumb2, UseMovt]>;
+
+}
+
+// ConstantPool, GlobalAddress, and JumpTable
+def : T2Pat<(ARMWrapper tconstpool :$dst), (t2LEApcrel tconstpool :$dst)>;
+def : T2Pat<(ARMWrapper tglobaladdr :$dst), (t2MOVi32imm tglobaladdr :$dst)>,
+ Requires<[IsThumb2, UseMovt]>;
+
+def : T2Pat<(ARMWrapperJT tjumptable:$dst),
+ (t2LEApcrelJT tjumptable:$dst)>;
+
+// Pseudo instruction that combines ldr from constpool and add pc. This should
+// be expanded into two instructions late to allow if-conversion and
+// scheduling.
+let canFoldAsLoad = 1, isReMaterializable = 1 in
+def t2LDRpci_pic : PseudoInst<(outs rGPR:$dst), (ins i32imm:$addr, pclabel:$cp),
+ IIC_iLoadiALU,
+ [(set rGPR:$dst, (ARMpic_add (load (ARMWrapper tconstpool:$addr)),
+ imm:$cp))]>,
+ Requires<[IsThumb2]>;
+
+// Pseudo isntruction that combines movs + predicated rsbmi
+// to implement integer ABS
+let usesCustomInserter = 1, Defs = [CPSR] in {
+def t2ABS : PseudoInst<(outs rGPR:$dst), (ins rGPR:$src),
+ NoItinerary, []>, Requires<[IsThumb2]>;
+}
+
+//===----------------------------------------------------------------------===//
+// Coprocessor load/store -- for disassembly only
+//
+class T2CI<bits<4> op31_28, dag oops, dag iops, string opc, string asm>
+ : T2I<oops, iops, NoItinerary, opc, asm, []> {
+ let Inst{31-28} = op31_28;
+ let Inst{27-25} = 0b110;
+}
+
+multiclass t2LdStCop<bits<4> op31_28, bit load, bit Dbit, string asm> {
+ def _OFFSET : T2CI<op31_28,
+ (outs), (ins p_imm:$cop, c_imm:$CRd, addrmode5:$addr),
+ asm, "\t$cop, $CRd, $addr"> {
+ bits<13> addr;
+ bits<4> cop;
+ bits<4> CRd;
+ let Inst{24} = 1; // P = 1
+ let Inst{23} = addr{8};
+ let Inst{22} = Dbit;
+ let Inst{21} = 0; // W = 0
+ let Inst{20} = load;
+ let Inst{19-16} = addr{12-9};
+ let Inst{15-12} = CRd;
+ let Inst{11-8} = cop;
+ let Inst{7-0} = addr{7-0};
+ let DecoderMethod = "DecodeCopMemInstruction";
+ }
+ def _PRE : T2CI<op31_28,
+ (outs), (ins p_imm:$cop, c_imm:$CRd, addrmode5_pre:$addr),
+ asm, "\t$cop, $CRd, $addr!"> {
+ bits<13> addr;
+ bits<4> cop;
+ bits<4> CRd;
+ let Inst{24} = 1; // P = 1
+ let Inst{23} = addr{8};
+ let Inst{22} = Dbit;
+ let Inst{21} = 1; // W = 1
+ let Inst{20} = load;
+ let Inst{19-16} = addr{12-9};
+ let Inst{15-12} = CRd;
+ let Inst{11-8} = cop;
+ let Inst{7-0} = addr{7-0};
+ let DecoderMethod = "DecodeCopMemInstruction";
+ }
+ def _POST: T2CI<op31_28,
+ (outs), (ins p_imm:$cop, c_imm:$CRd, addr_offset_none:$addr,
+ postidx_imm8s4:$offset),
+ asm, "\t$cop, $CRd, $addr, $offset"> {
+ bits<9> offset;
+ bits<4> addr;
+ bits<4> cop;
+ bits<4> CRd;
+ let Inst{24} = 0; // P = 0
+ let Inst{23} = offset{8};
+ let Inst{22} = Dbit;
+ let Inst{21} = 1; // W = 1
+ let Inst{20} = load;
+ let Inst{19-16} = addr;
+ let Inst{15-12} = CRd;
+ let Inst{11-8} = cop;
+ let Inst{7-0} = offset{7-0};
+ let DecoderMethod = "DecodeCopMemInstruction";
+ }
+ def _OPTION : T2CI<op31_28, (outs),
+ (ins p_imm:$cop, c_imm:$CRd, addr_offset_none:$addr,
+ coproc_option_imm:$option),
+ asm, "\t$cop, $CRd, $addr, $option"> {
+ bits<8> option;
+ bits<4> addr;
+ bits<4> cop;
+ bits<4> CRd;
+ let Inst{24} = 0; // P = 0
+ let Inst{23} = 1; // U = 1
+ let Inst{22} = Dbit;
+ let Inst{21} = 0; // W = 0
+ let Inst{20} = load;
+ let Inst{19-16} = addr;
+ let Inst{15-12} = CRd;
+ let Inst{11-8} = cop;
+ let Inst{7-0} = option;
+ let DecoderMethod = "DecodeCopMemInstruction";
+ }
+}
+
+defm t2LDC : t2LdStCop<0b1110, 1, 0, "ldc">;
+defm t2LDCL : t2LdStCop<0b1110, 1, 1, "ldcl">;
+defm t2STC : t2LdStCop<0b1110, 0, 0, "stc">;
+defm t2STCL : t2LdStCop<0b1110, 0, 1, "stcl">;
+defm t2LDC2 : t2LdStCop<0b1111, 1, 0, "ldc2">, Requires<[PreV8,IsThumb2]>;
+defm t2LDC2L : t2LdStCop<0b1111, 1, 1, "ldc2l">, Requires<[PreV8,IsThumb2]>;
+defm t2STC2 : t2LdStCop<0b1111, 0, 0, "stc2">, Requires<[PreV8,IsThumb2]>;
+defm t2STC2L : t2LdStCop<0b1111, 0, 1, "stc2l">, Requires<[PreV8,IsThumb2]>;
+
+
+//===----------------------------------------------------------------------===//
+// Move between special register and ARM core register -- for disassembly only
+//
+// Move to ARM core register from Special Register
+
+// A/R class MRS.
+//
+// A/R class can only move from CPSR or SPSR.
+def t2MRS_AR : T2I<(outs GPR:$Rd), (ins), NoItinerary, "mrs", "\t$Rd, apsr",
+ []>, Requires<[IsThumb2,IsNotMClass]> {
+ bits<4> Rd;
+ let Inst{31-12} = 0b11110011111011111000;
+ let Inst{11-8} = Rd;
+ let Inst{7-0} = 0b00000000;
+}
+
+def : t2InstAlias<"mrs${p} $Rd, cpsr", (t2MRS_AR GPR:$Rd, pred:$p)>;
+
+def t2MRSsys_AR: T2I<(outs GPR:$Rd), (ins), NoItinerary, "mrs", "\t$Rd, spsr",
+ []>, Requires<[IsThumb2,IsNotMClass]> {
+ bits<4> Rd;
+ let Inst{31-12} = 0b11110011111111111000;
+ let Inst{11-8} = Rd;
+ let Inst{7-0} = 0b00000000;
+}
+
+def t2MRSbanked : T2I<(outs rGPR:$Rd), (ins banked_reg:$banked),
+ NoItinerary, "mrs", "\t$Rd, $banked", []>,
+ Requires<[IsThumb, HasVirtualization]> {
+ bits<6> banked;
+ bits<4> Rd;
+
+ let Inst{31-21} = 0b11110011111;
+ let Inst{20} = banked{5}; // R bit
+ let Inst{19-16} = banked{3-0};
+ let Inst{15-12} = 0b1000;
+ let Inst{11-8} = Rd;
+ let Inst{7-5} = 0b001;
+ let Inst{4} = banked{4};
+ let Inst{3-0} = 0b0000;
+}
+
+
+// M class MRS.
+//
+// This MRS has a mask field in bits 7-0 and can take more values than
+// the A/R class (a full msr_mask).
+def t2MRS_M : T2I<(outs rGPR:$Rd), (ins msr_mask:$SYSm), NoItinerary,
+ "mrs", "\t$Rd, $SYSm", []>,
+ Requires<[IsThumb,IsMClass]> {
+ bits<4> Rd;
+ bits<8> SYSm;
+ let Inst{31-12} = 0b11110011111011111000;
+ let Inst{11-8} = Rd;
+ let Inst{7-0} = SYSm;
+
+ let Unpredictable{20-16} = 0b11111;
+ let Unpredictable{13} = 0b1;
+}
+
+
+// Move from ARM core register to Special Register
+//
+// A/R class MSR.
+//
+// No need to have both system and application versions, the encodings are the
+// same and the assembly parser has no way to distinguish between them. The mask
+// operand contains the special register (R Bit) in bit 4 and bits 3-0 contains
+// the mask with the fields to be accessed in the special register.
+def t2MSR_AR : T2I<(outs), (ins msr_mask:$mask, rGPR:$Rn),
+ NoItinerary, "msr", "\t$mask, $Rn", []>,
+ Requires<[IsThumb2,IsNotMClass]> {
+ bits<5> mask;
+ bits<4> Rn;
+ let Inst{31-21} = 0b11110011100;
+ let Inst{20} = mask{4}; // R Bit
+ let Inst{19-16} = Rn;
+ let Inst{15-12} = 0b1000;
+ let Inst{11-8} = mask{3-0};
+ let Inst{7-0} = 0;
+}
+
+// However, the MSR (banked register) system instruction (ARMv7VE) *does* have a
+// separate encoding (distinguished by bit 5.
+def t2MSRbanked : T2I<(outs), (ins banked_reg:$banked, rGPR:$Rn),
+ NoItinerary, "msr", "\t$banked, $Rn", []>,
+ Requires<[IsThumb, HasVirtualization]> {
+ bits<6> banked;
+ bits<4> Rn;
+
+ let Inst{31-21} = 0b11110011100;
+ let Inst{20} = banked{5}; // R bit
+ let Inst{19-16} = Rn;
+ let Inst{15-12} = 0b1000;
+ let Inst{11-8} = banked{3-0};
+ let Inst{7-5} = 0b001;
+ let Inst{4} = banked{4};
+ let Inst{3-0} = 0b0000;
+}
+
+
+// M class MSR.
+//
+// Move from ARM core register to Special Register
+def t2MSR_M : T2I<(outs), (ins msr_mask:$SYSm, rGPR:$Rn),
+ NoItinerary, "msr", "\t$SYSm, $Rn", []>,
+ Requires<[IsThumb,IsMClass]> {
+ bits<12> SYSm;
+ bits<4> Rn;
+ let Inst{31-21} = 0b11110011100;
+ let Inst{20} = 0b0;
+ let Inst{19-16} = Rn;
+ let Inst{15-12} = 0b1000;
+ let Inst{11-10} = SYSm{11-10};
+ let Inst{9-8} = 0b00;
+ let Inst{7-0} = SYSm{7-0};
+
+ let Unpredictable{20} = 0b1;
+ let Unpredictable{13} = 0b1;
+ let Unpredictable{9-8} = 0b11;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Move between coprocessor and ARM core register
+//
+
+class t2MovRCopro<bits<4> Op, string opc, bit direction, dag oops, dag iops,
+ list<dag> pattern>
+ : T2Cop<Op, oops, iops, opc, "\t$cop, $opc1, $Rt, $CRn, $CRm, $opc2",
+ pattern> {
+ let Inst{27-24} = 0b1110;
+ let Inst{20} = direction;
+ let Inst{4} = 1;
+
+ bits<4> Rt;
+ bits<4> cop;
+ bits<3> opc1;
+ bits<3> opc2;
+ bits<4> CRm;
+ bits<4> CRn;
+
+ let Inst{15-12} = Rt;
+ let Inst{11-8} = cop;
+ let Inst{23-21} = opc1;
+ let Inst{7-5} = opc2;
+ let Inst{3-0} = CRm;
+ let Inst{19-16} = CRn;
+}
+
+class t2MovRRCopro<bits<4> Op, string opc, bit direction, dag oops, dag iops,
+ list<dag> pattern = []>
+ : T2Cop<Op, oops, iops, opc, "\t$cop, $opc1, $Rt, $Rt2, $CRm", pattern> {
+ let Inst{27-24} = 0b1100;
+ let Inst{23-21} = 0b010;
+ let Inst{20} = direction;
+
+ bits<4> Rt;
+ bits<4> Rt2;
+ bits<4> cop;
+ bits<4> opc1;
+ bits<4> CRm;
+
+ let Inst{15-12} = Rt;
+ let Inst{19-16} = Rt2;
+ let Inst{11-8} = cop;
+ let Inst{7-4} = opc1;
+ let Inst{3-0} = CRm;
+}
+
+/* from ARM core register to coprocessor */
+def t2MCR : t2MovRCopro<0b1110, "mcr", 0,
+ (outs),
+ (ins p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn,
+ c_imm:$CRm, imm0_7:$opc2),
+ [(int_arm_mcr imm:$cop, imm:$opc1, GPR:$Rt, imm:$CRn,
+ imm:$CRm, imm:$opc2)]>,
+ ComplexDeprecationPredicate<"MCR">;
+def : t2InstAlias<"mcr${p} $cop, $opc1, $Rt, $CRn, $CRm",
+ (t2MCR p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn,
+ c_imm:$CRm, 0, pred:$p)>;
+def t2MCR2 : t2MovRCopro<0b1111, "mcr2", 0,
+ (outs), (ins p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn,
+ c_imm:$CRm, imm0_7:$opc2),
+ [(int_arm_mcr2 imm:$cop, imm:$opc1, GPR:$Rt, imm:$CRn,
+ imm:$CRm, imm:$opc2)]> {
+ let Predicates = [IsThumb2, PreV8];
+}
+def : t2InstAlias<"mcr2${p} $cop, $opc1, $Rt, $CRn, $CRm",
+ (t2MCR2 p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn,
+ c_imm:$CRm, 0, pred:$p)>;
+
+/* from coprocessor to ARM core register */
+def t2MRC : t2MovRCopro<0b1110, "mrc", 1,
+ (outs GPRwithAPSR:$Rt), (ins p_imm:$cop, imm0_7:$opc1, c_imm:$CRn,
+ c_imm:$CRm, imm0_7:$opc2), []>;
+def : t2InstAlias<"mrc${p} $cop, $opc1, $Rt, $CRn, $CRm",
+ (t2MRC GPRwithAPSR:$Rt, p_imm:$cop, imm0_7:$opc1, c_imm:$CRn,
+ c_imm:$CRm, 0, pred:$p)>;
+
+def t2MRC2 : t2MovRCopro<0b1111, "mrc2", 1,
+ (outs GPRwithAPSR:$Rt), (ins p_imm:$cop, imm0_7:$opc1, c_imm:$CRn,
+ c_imm:$CRm, imm0_7:$opc2), []> {
+ let Predicates = [IsThumb2, PreV8];
+}
+def : t2InstAlias<"mrc2${p} $cop, $opc1, $Rt, $CRn, $CRm",
+ (t2MRC2 GPRwithAPSR:$Rt, p_imm:$cop, imm0_7:$opc1, c_imm:$CRn,
+ c_imm:$CRm, 0, pred:$p)>;
+
+def : T2v6Pat<(int_arm_mrc imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2),
+ (t2MRC imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2)>;
+
+def : T2v6Pat<(int_arm_mrc2 imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2),
+ (t2MRC2 imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2)>;
+
+
+/* from ARM core register to coprocessor */
+def t2MCRR : t2MovRRCopro<0b1110, "mcrr", 0, (outs),
+ (ins p_imm:$cop, imm0_15:$opc1, GPR:$Rt, GPR:$Rt2,
+ c_imm:$CRm),
+ [(int_arm_mcrr imm:$cop, imm:$opc1, GPR:$Rt, GPR:$Rt2,
+ imm:$CRm)]>;
+def t2MCRR2 : t2MovRRCopro<0b1111, "mcrr2", 0, (outs),
+ (ins p_imm:$cop, imm0_15:$opc1, GPR:$Rt, GPR:$Rt2,
+ c_imm:$CRm),
+ [(int_arm_mcrr2 imm:$cop, imm:$opc1, GPR:$Rt,
+ GPR:$Rt2, imm:$CRm)]> {
+ let Predicates = [IsThumb2, PreV8];
+}
+
+/* from coprocessor to ARM core register */
+def t2MRRC : t2MovRRCopro<0b1110, "mrrc", 1, (outs GPR:$Rt, GPR:$Rt2),
+ (ins p_imm:$cop, imm0_15:$opc1, c_imm:$CRm)>;
+
+def t2MRRC2 : t2MovRRCopro<0b1111, "mrrc2", 1, (outs GPR:$Rt, GPR:$Rt2),
+ (ins p_imm:$cop, imm0_15:$opc1, c_imm:$CRm)> {
+ let Predicates = [IsThumb2, PreV8];
+}
+
+//===----------------------------------------------------------------------===//
+// Other Coprocessor Instructions.
+//
+
+def t2CDP : T2Cop<0b1110, (outs), (ins p_imm:$cop, imm0_15:$opc1,
+ c_imm:$CRd, c_imm:$CRn, c_imm:$CRm, imm0_7:$opc2),
+ "cdp", "\t$cop, $opc1, $CRd, $CRn, $CRm, $opc2",
+ [(int_arm_cdp imm:$cop, imm:$opc1, imm:$CRd, imm:$CRn,
+ imm:$CRm, imm:$opc2)]> {
+ let Inst{27-24} = 0b1110;
+
+ bits<4> opc1;
+ bits<4> CRn;
+ bits<4> CRd;
+ bits<4> cop;
+ bits<3> opc2;
+ bits<4> CRm;
+
+ let Inst{3-0} = CRm;
+ let Inst{4} = 0;
+ let Inst{7-5} = opc2;
+ let Inst{11-8} = cop;
+ let Inst{15-12} = CRd;
+ let Inst{19-16} = CRn;
+ let Inst{23-20} = opc1;
+
+ let Predicates = [IsThumb2, PreV8];
+}
+
+def t2CDP2 : T2Cop<0b1111, (outs), (ins p_imm:$cop, imm0_15:$opc1,
+ c_imm:$CRd, c_imm:$CRn, c_imm:$CRm, imm0_7:$opc2),
+ "cdp2", "\t$cop, $opc1, $CRd, $CRn, $CRm, $opc2",
+ [(int_arm_cdp2 imm:$cop, imm:$opc1, imm:$CRd, imm:$CRn,
+ imm:$CRm, imm:$opc2)]> {
+ let Inst{27-24} = 0b1110;
+
+ bits<4> opc1;
+ bits<4> CRn;
+ bits<4> CRd;
+ bits<4> cop;
+ bits<3> opc2;
+ bits<4> CRm;
+
+ let Inst{3-0} = CRm;
+ let Inst{4} = 0;
+ let Inst{7-5} = opc2;
+ let Inst{11-8} = cop;
+ let Inst{15-12} = CRd;
+ let Inst{19-16} = CRn;
+ let Inst{23-20} = opc1;
+
+ let Predicates = [IsThumb2, PreV8];
+}
+
+
+
+//===----------------------------------------------------------------------===//
+// ARMv8.1 Privilege Access Never extension
+//
+// SETPAN #imm1
+
+def t2SETPAN : T1I<(outs), (ins imm0_1:$imm), NoItinerary, "setpan\t$imm", []>,
+ T1Misc<0b0110000>, Requires<[IsThumb2, HasV8, HasV8_1a]> {
+ bits<1> imm;
+
+ let Inst{4} = 0b1;
+ let Inst{3} = imm;
+ let Inst{2-0} = 0b000;
+
+ let Unpredictable{4} = 0b1;
+ let Unpredictable{2-0} = 0b111;
+}
+
+//===----------------------------------------------------------------------===//
+// Non-Instruction Patterns
+//
+
+// SXT/UXT with no rotate
+let AddedComplexity = 16 in {
+def : T2Pat<(and rGPR:$Rm, 0x000000FF), (t2UXTB rGPR:$Rm, 0)>,
+ Requires<[IsThumb2]>;
+def : T2Pat<(and rGPR:$Rm, 0x0000FFFF), (t2UXTH rGPR:$Rm, 0)>,
+ Requires<[IsThumb2]>;
+def : T2Pat<(and rGPR:$Rm, 0x00FF00FF), (t2UXTB16 rGPR:$Rm, 0)>,
+ Requires<[HasT2ExtractPack, IsThumb2]>;
+def : T2Pat<(add rGPR:$Rn, (and rGPR:$Rm, 0x00FF)),
+ (t2UXTAB rGPR:$Rn, rGPR:$Rm, 0)>,
+ Requires<[HasT2ExtractPack, IsThumb2]>;
+def : T2Pat<(add rGPR:$Rn, (and rGPR:$Rm, 0xFFFF)),
+ (t2UXTAH rGPR:$Rn, rGPR:$Rm, 0)>,
+ Requires<[HasT2ExtractPack, IsThumb2]>;
+}
+
+def : T2Pat<(sext_inreg rGPR:$Src, i8), (t2SXTB rGPR:$Src, 0)>,
+ Requires<[IsThumb2]>;
+def : T2Pat<(sext_inreg rGPR:$Src, i16), (t2SXTH rGPR:$Src, 0)>,
+ Requires<[IsThumb2]>;
+def : T2Pat<(add rGPR:$Rn, (sext_inreg rGPR:$Rm, i8)),
+ (t2SXTAB rGPR:$Rn, rGPR:$Rm, 0)>,
+ Requires<[HasT2ExtractPack, IsThumb2]>;
+def : T2Pat<(add rGPR:$Rn, (sext_inreg rGPR:$Rm, i16)),
+ (t2SXTAH rGPR:$Rn, rGPR:$Rm, 0)>,
+ Requires<[HasT2ExtractPack, IsThumb2]>;
+
+// Atomic load/store patterns
+def : T2Pat<(atomic_load_8 t2addrmode_imm12:$addr),
+ (t2LDRBi12 t2addrmode_imm12:$addr)>;
+def : T2Pat<(atomic_load_8 t2addrmode_negimm8:$addr),
+ (t2LDRBi8 t2addrmode_negimm8:$addr)>;
+def : T2Pat<(atomic_load_8 t2addrmode_so_reg:$addr),
+ (t2LDRBs t2addrmode_so_reg:$addr)>;
+def : T2Pat<(atomic_load_16 t2addrmode_imm12:$addr),
+ (t2LDRHi12 t2addrmode_imm12:$addr)>;
+def : T2Pat<(atomic_load_16 t2addrmode_negimm8:$addr),
+ (t2LDRHi8 t2addrmode_negimm8:$addr)>;
+def : T2Pat<(atomic_load_16 t2addrmode_so_reg:$addr),
+ (t2LDRHs t2addrmode_so_reg:$addr)>;
+def : T2Pat<(atomic_load_32 t2addrmode_imm12:$addr),
+ (t2LDRi12 t2addrmode_imm12:$addr)>;
+def : T2Pat<(atomic_load_32 t2addrmode_negimm8:$addr),
+ (t2LDRi8 t2addrmode_negimm8:$addr)>;
+def : T2Pat<(atomic_load_32 t2addrmode_so_reg:$addr),
+ (t2LDRs t2addrmode_so_reg:$addr)>;
+def : T2Pat<(atomic_store_8 t2addrmode_imm12:$addr, GPR:$val),
+ (t2STRBi12 GPR:$val, t2addrmode_imm12:$addr)>;
+def : T2Pat<(atomic_store_8 t2addrmode_negimm8:$addr, GPR:$val),
+ (t2STRBi8 GPR:$val, t2addrmode_negimm8:$addr)>;
+def : T2Pat<(atomic_store_8 t2addrmode_so_reg:$addr, GPR:$val),
+ (t2STRBs GPR:$val, t2addrmode_so_reg:$addr)>;
+def : T2Pat<(atomic_store_16 t2addrmode_imm12:$addr, GPR:$val),
+ (t2STRHi12 GPR:$val, t2addrmode_imm12:$addr)>;
+def : T2Pat<(atomic_store_16 t2addrmode_negimm8:$addr, GPR:$val),
+ (t2STRHi8 GPR:$val, t2addrmode_negimm8:$addr)>;
+def : T2Pat<(atomic_store_16 t2addrmode_so_reg:$addr, GPR:$val),
+ (t2STRHs GPR:$val, t2addrmode_so_reg:$addr)>;
+def : T2Pat<(atomic_store_32 t2addrmode_imm12:$addr, GPR:$val),
+ (t2STRi12 GPR:$val, t2addrmode_imm12:$addr)>;
+def : T2Pat<(atomic_store_32 t2addrmode_negimm8:$addr, GPR:$val),
+ (t2STRi8 GPR:$val, t2addrmode_negimm8:$addr)>;
+def : T2Pat<(atomic_store_32 t2addrmode_so_reg:$addr, GPR:$val),
+ (t2STRs GPR:$val, t2addrmode_so_reg:$addr)>;
+
+let AddedComplexity = 8 in {
+ def : T2Pat<(atomic_load_acquire_8 addr_offset_none:$addr), (t2LDAB addr_offset_none:$addr)>;
+ def : T2Pat<(atomic_load_acquire_16 addr_offset_none:$addr), (t2LDAH addr_offset_none:$addr)>;
+ def : T2Pat<(atomic_load_acquire_32 addr_offset_none:$addr), (t2LDA addr_offset_none:$addr)>;
+ def : T2Pat<(atomic_store_release_8 addr_offset_none:$addr, GPR:$val), (t2STLB GPR:$val, addr_offset_none:$addr)>;
+ def : T2Pat<(atomic_store_release_16 addr_offset_none:$addr, GPR:$val), (t2STLH GPR:$val, addr_offset_none:$addr)>;
+ def : T2Pat<(atomic_store_release_32 addr_offset_none:$addr, GPR:$val), (t2STL GPR:$val, addr_offset_none:$addr)>;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Assembler aliases
+//
+
+// Aliases for ADC without the ".w" optional width specifier.
+def : t2InstAlias<"adc${s}${p} $Rd, $Rn, $Rm",
+ (t2ADCrr rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, pred:$p, cc_out:$s)>;
+def : t2InstAlias<"adc${s}${p} $Rd, $Rn, $ShiftedRm",
+ (t2ADCrs rGPR:$Rd, rGPR:$Rn, t2_so_reg:$ShiftedRm,
+ pred:$p, cc_out:$s)>;
+
+// Aliases for SBC without the ".w" optional width specifier.
+def : t2InstAlias<"sbc${s}${p} $Rd, $Rn, $Rm",
+ (t2SBCrr rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, pred:$p, cc_out:$s)>;
+def : t2InstAlias<"sbc${s}${p} $Rd, $Rn, $ShiftedRm",
+ (t2SBCrs rGPR:$Rd, rGPR:$Rn, t2_so_reg:$ShiftedRm,
+ pred:$p, cc_out:$s)>;
+
+// Aliases for ADD without the ".w" optional width specifier.
+def : t2InstAlias<"add${s}${p} $Rd, $Rn, $imm",
+ (t2ADDri GPRnopc:$Rd, GPRnopc:$Rn, t2_so_imm:$imm, pred:$p,
+ cc_out:$s)>;
+def : t2InstAlias<"add${p} $Rd, $Rn, $imm",
+ (t2ADDri12 GPRnopc:$Rd, GPR:$Rn, imm0_4095:$imm, pred:$p)>;
+def : t2InstAlias<"add${s}${p} $Rd, $Rn, $Rm",
+ (t2ADDrr GPRnopc:$Rd, GPRnopc:$Rn, rGPR:$Rm, pred:$p, cc_out:$s)>;
+def : t2InstAlias<"add${s}${p} $Rd, $Rn, $ShiftedRm",
+ (t2ADDrs GPRnopc:$Rd, GPRnopc:$Rn, t2_so_reg:$ShiftedRm,
+ pred:$p, cc_out:$s)>;
+// ... and with the destination and source register combined.
+def : t2InstAlias<"add${s}${p} $Rdn, $imm",
+ (t2ADDri GPRnopc:$Rdn, GPRnopc:$Rdn, t2_so_imm:$imm, pred:$p, cc_out:$s)>;
+def : t2InstAlias<"add${p} $Rdn, $imm",
+ (t2ADDri12 GPRnopc:$Rdn, GPRnopc:$Rdn, imm0_4095:$imm, pred:$p)>;
+def : t2InstAlias<"add${s}${p} $Rdn, $Rm",
+ (t2ADDrr GPRnopc:$Rdn, GPRnopc:$Rdn, rGPR:$Rm, pred:$p, cc_out:$s)>;
+def : t2InstAlias<"add${s}${p} $Rdn, $ShiftedRm",
+ (t2ADDrs GPRnopc:$Rdn, GPRnopc:$Rdn, t2_so_reg:$ShiftedRm,
+ pred:$p, cc_out:$s)>;
+
+// add w/ negative immediates is just a sub.
+def : t2InstAlias<"add${s}${p} $Rd, $Rn, $imm",
+ (t2SUBri GPRnopc:$Rd, GPRnopc:$Rn, t2_so_imm_neg:$imm, pred:$p,
+ cc_out:$s)>;
+def : t2InstAlias<"add${p} $Rd, $Rn, $imm",
+ (t2SUBri12 GPRnopc:$Rd, GPR:$Rn, imm0_4095_neg:$imm, pred:$p)>;
+def : t2InstAlias<"add${s}${p} $Rdn, $imm",
+ (t2SUBri GPRnopc:$Rdn, GPRnopc:$Rdn, t2_so_imm_neg:$imm, pred:$p,
+ cc_out:$s)>;
+def : t2InstAlias<"add${p} $Rdn, $imm",
+ (t2SUBri12 GPRnopc:$Rdn, GPRnopc:$Rdn, imm0_4095_neg:$imm, pred:$p)>;
+
+def : t2InstAlias<"add${s}${p}.w $Rd, $Rn, $imm",
+ (t2SUBri GPRnopc:$Rd, GPRnopc:$Rn, t2_so_imm_neg:$imm, pred:$p,
+ cc_out:$s)>;
+def : t2InstAlias<"addw${p} $Rd, $Rn, $imm",
+ (t2SUBri12 GPRnopc:$Rd, GPR:$Rn, imm0_4095_neg:$imm, pred:$p)>;
+def : t2InstAlias<"add${s}${p}.w $Rdn, $imm",
+ (t2SUBri GPRnopc:$Rdn, GPRnopc:$Rdn, t2_so_imm_neg:$imm, pred:$p,
+ cc_out:$s)>;
+def : t2InstAlias<"addw${p} $Rdn, $imm",
+ (t2SUBri12 GPRnopc:$Rdn, GPRnopc:$Rdn, imm0_4095_neg:$imm, pred:$p)>;
+
+
+// Aliases for SUB without the ".w" optional width specifier.
+def : t2InstAlias<"sub${s}${p} $Rd, $Rn, $imm",
+ (t2SUBri GPRnopc:$Rd, GPRnopc:$Rn, t2_so_imm:$imm, pred:$p, cc_out:$s)>;
+def : t2InstAlias<"sub${p} $Rd, $Rn, $imm",
+ (t2SUBri12 GPRnopc:$Rd, GPR:$Rn, imm0_4095:$imm, pred:$p)>;
+def : t2InstAlias<"sub${s}${p} $Rd, $Rn, $Rm",
+ (t2SUBrr GPRnopc:$Rd, GPRnopc:$Rn, rGPR:$Rm, pred:$p, cc_out:$s)>;
+def : t2InstAlias<"sub${s}${p} $Rd, $Rn, $ShiftedRm",
+ (t2SUBrs GPRnopc:$Rd, GPRnopc:$Rn, t2_so_reg:$ShiftedRm,
+ pred:$p, cc_out:$s)>;
+// ... and with the destination and source register combined.
+def : t2InstAlias<"sub${s}${p} $Rdn, $imm",
+ (t2SUBri GPRnopc:$Rdn, GPRnopc:$Rdn, t2_so_imm:$imm, pred:$p, cc_out:$s)>;
+def : t2InstAlias<"sub${p} $Rdn, $imm",
+ (t2SUBri12 GPRnopc:$Rdn, GPRnopc:$Rdn, imm0_4095:$imm, pred:$p)>;
+def : t2InstAlias<"sub${s}${p}.w $Rdn, $Rm",
+ (t2SUBrr GPRnopc:$Rdn, GPRnopc:$Rdn, rGPR:$Rm, pred:$p, cc_out:$s)>;
+def : t2InstAlias<"sub${s}${p} $Rdn, $Rm",
+ (t2SUBrr GPRnopc:$Rdn, GPRnopc:$Rdn, rGPR:$Rm, pred:$p, cc_out:$s)>;
+def : t2InstAlias<"sub${s}${p} $Rdn, $ShiftedRm",
+ (t2SUBrs GPRnopc:$Rdn, GPRnopc:$Rdn, t2_so_reg:$ShiftedRm,
+ pred:$p, cc_out:$s)>;
+
+// Alias for compares without the ".w" optional width specifier.
+def : t2InstAlias<"cmn${p} $Rn, $Rm",
+ (t2CMNzrr GPRnopc:$Rn, rGPR:$Rm, pred:$p)>;
+def : t2InstAlias<"teq${p} $Rn, $Rm",
+ (t2TEQrr GPRnopc:$Rn, rGPR:$Rm, pred:$p)>;
+def : t2InstAlias<"tst${p} $Rn, $Rm",
+ (t2TSTrr GPRnopc:$Rn, rGPR:$Rm, pred:$p)>;
+
+// Memory barriers
+def : InstAlias<"dmb${p}", (t2DMB 0xf, pred:$p)>, Requires<[HasDB]>;
+def : InstAlias<"dsb${p}", (t2DSB 0xf, pred:$p)>, Requires<[HasDB]>;
+def : InstAlias<"isb${p}", (t2ISB 0xf, pred:$p)>, Requires<[HasDB]>;
+
+// Alias for LDR, LDRB, LDRH, LDRSB, and LDRSH without the ".w" optional
+// width specifier.
+def : t2InstAlias<"ldr${p} $Rt, $addr",
+ (t2LDRi12 GPR:$Rt, t2addrmode_imm12:$addr, pred:$p)>;
+def : t2InstAlias<"ldrb${p} $Rt, $addr",
+ (t2LDRBi12 rGPR:$Rt, t2addrmode_imm12:$addr, pred:$p)>;
+def : t2InstAlias<"ldrh${p} $Rt, $addr",
+ (t2LDRHi12 rGPR:$Rt, t2addrmode_imm12:$addr, pred:$p)>;
+def : t2InstAlias<"ldrsb${p} $Rt, $addr",
+ (t2LDRSBi12 rGPR:$Rt, t2addrmode_imm12:$addr, pred:$p)>;
+def : t2InstAlias<"ldrsh${p} $Rt, $addr",
+ (t2LDRSHi12 rGPR:$Rt, t2addrmode_imm12:$addr, pred:$p)>;
+
+def : t2InstAlias<"ldr${p} $Rt, $addr",
+ (t2LDRs GPR:$Rt, t2addrmode_so_reg:$addr, pred:$p)>;
+def : t2InstAlias<"ldrb${p} $Rt, $addr",
+ (t2LDRBs rGPR:$Rt, t2addrmode_so_reg:$addr, pred:$p)>;
+def : t2InstAlias<"ldrh${p} $Rt, $addr",
+ (t2LDRHs rGPR:$Rt, t2addrmode_so_reg:$addr, pred:$p)>;
+def : t2InstAlias<"ldrsb${p} $Rt, $addr",
+ (t2LDRSBs rGPR:$Rt, t2addrmode_so_reg:$addr, pred:$p)>;
+def : t2InstAlias<"ldrsh${p} $Rt, $addr",
+ (t2LDRSHs rGPR:$Rt, t2addrmode_so_reg:$addr, pred:$p)>;
+
+def : t2InstAlias<"ldr${p} $Rt, $addr",
+ (t2LDRpci GPRnopc:$Rt, t2ldrlabel:$addr, pred:$p)>;
+def : t2InstAlias<"ldrb${p} $Rt, $addr",
+ (t2LDRBpci rGPR:$Rt, t2ldrlabel:$addr, pred:$p)>;
+def : t2InstAlias<"ldrh${p} $Rt, $addr",
+ (t2LDRHpci rGPR:$Rt, t2ldrlabel:$addr, pred:$p)>;
+def : t2InstAlias<"ldrsb${p} $Rt, $addr",
+ (t2LDRSBpci rGPR:$Rt, t2ldrlabel:$addr, pred:$p)>;
+def : t2InstAlias<"ldrsh${p} $Rt, $addr",
+ (t2LDRSHpci rGPR:$Rt, t2ldrlabel:$addr, pred:$p)>;
+
+// Alias for MVN with(out) the ".w" optional width specifier.
+def : t2InstAlias<"mvn${s}${p}.w $Rd, $imm",
+ (t2MVNi rGPR:$Rd, t2_so_imm:$imm, pred:$p, cc_out:$s)>;
+def : t2InstAlias<"mvn${s}${p} $Rd, $Rm",
+ (t2MVNr rGPR:$Rd, rGPR:$Rm, pred:$p, cc_out:$s)>;
+def : t2InstAlias<"mvn${s}${p} $Rd, $ShiftedRm",
+ (t2MVNs rGPR:$Rd, t2_so_reg:$ShiftedRm, pred:$p, cc_out:$s)>;
+
+// PKHBT/PKHTB with default shift amount. PKHTB is equivalent to PKHBT when the
+// shift amount is zero (i.e., unspecified).
+def : InstAlias<"pkhbt${p} $Rd, $Rn, $Rm",
+ (t2PKHBT rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, 0, pred:$p)>,
+ Requires<[HasT2ExtractPack, IsThumb2]>;
+def : InstAlias<"pkhtb${p} $Rd, $Rn, $Rm",
+ (t2PKHBT rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, 0, pred:$p)>,
+ Requires<[HasT2ExtractPack, IsThumb2]>;
+
+// PUSH/POP aliases for STM/LDM
+def : t2InstAlias<"push${p}.w $regs", (t2STMDB_UPD SP, pred:$p, reglist:$regs)>;
+def : t2InstAlias<"push${p} $regs", (t2STMDB_UPD SP, pred:$p, reglist:$regs)>;
+def : t2InstAlias<"pop${p}.w $regs", (t2LDMIA_UPD SP, pred:$p, reglist:$regs)>;
+def : t2InstAlias<"pop${p} $regs", (t2LDMIA_UPD SP, pred:$p, reglist:$regs)>;
+
+// STMIA/STMIA_UPD aliases w/o the optional .w suffix
+def : t2InstAlias<"stm${p} $Rn, $regs",
+ (t2STMIA GPR:$Rn, pred:$p, reglist:$regs)>;
+def : t2InstAlias<"stm${p} $Rn!, $regs",
+ (t2STMIA_UPD GPR:$Rn, pred:$p, reglist:$regs)>;
+
+// LDMIA/LDMIA_UPD aliases w/o the optional .w suffix
+def : t2InstAlias<"ldm${p} $Rn, $regs",
+ (t2LDMIA GPR:$Rn, pred:$p, reglist:$regs)>;
+def : t2InstAlias<"ldm${p} $Rn!, $regs",
+ (t2LDMIA_UPD GPR:$Rn, pred:$p, reglist:$regs)>;
+
+// STMDB/STMDB_UPD aliases w/ the optional .w suffix
+def : t2InstAlias<"stmdb${p}.w $Rn, $regs",
+ (t2STMDB GPR:$Rn, pred:$p, reglist:$regs)>;
+def : t2InstAlias<"stmdb${p}.w $Rn!, $regs",
+ (t2STMDB_UPD GPR:$Rn, pred:$p, reglist:$regs)>;
+
+// LDMDB/LDMDB_UPD aliases w/ the optional .w suffix
+def : t2InstAlias<"ldmdb${p}.w $Rn, $regs",
+ (t2LDMDB GPR:$Rn, pred:$p, reglist:$regs)>;
+def : t2InstAlias<"ldmdb${p}.w $Rn!, $regs",
+ (t2LDMDB_UPD GPR:$Rn, pred:$p, reglist:$regs)>;
+
+// Alias for REV/REV16/REVSH without the ".w" optional width specifier.
+def : t2InstAlias<"rev${p} $Rd, $Rm", (t2REV rGPR:$Rd, rGPR:$Rm, pred:$p)>;
+def : t2InstAlias<"rev16${p} $Rd, $Rm", (t2REV16 rGPR:$Rd, rGPR:$Rm, pred:$p)>;
+def : t2InstAlias<"revsh${p} $Rd, $Rm", (t2REVSH rGPR:$Rd, rGPR:$Rm, pred:$p)>;
+
+
+// Alias for RSB without the ".w" optional width specifier, and with optional
+// implied destination register.
+def : t2InstAlias<"rsb${s}${p} $Rd, $Rn, $imm",
+ (t2RSBri rGPR:$Rd, rGPR:$Rn, t2_so_imm:$imm, pred:$p, cc_out:$s)>;
+def : t2InstAlias<"rsb${s}${p} $Rdn, $imm",
+ (t2RSBri rGPR:$Rdn, rGPR:$Rdn, t2_so_imm:$imm, pred:$p, cc_out:$s)>;
+def : t2InstAlias<"rsb${s}${p} $Rdn, $Rm",
+ (t2RSBrr rGPR:$Rdn, rGPR:$Rdn, rGPR:$Rm, pred:$p, cc_out:$s)>;
+def : t2InstAlias<"rsb${s}${p} $Rdn, $ShiftedRm",
+ (t2RSBrs rGPR:$Rdn, rGPR:$Rdn, t2_so_reg:$ShiftedRm, pred:$p,
+ cc_out:$s)>;
+
+// SSAT/USAT optional shift operand.
+def : t2InstAlias<"ssat${p} $Rd, $sat_imm, $Rn",
+ (t2SSAT rGPR:$Rd, imm1_32:$sat_imm, rGPR:$Rn, 0, pred:$p)>;
+def : t2InstAlias<"usat${p} $Rd, $sat_imm, $Rn",
+ (t2USAT rGPR:$Rd, imm0_31:$sat_imm, rGPR:$Rn, 0, pred:$p)>;
+
+// STM w/o the .w suffix.
+def : t2InstAlias<"stm${p} $Rn, $regs",
+ (t2STMIA GPR:$Rn, pred:$p, reglist:$regs)>;
+
+// Alias for STR, STRB, and STRH without the ".w" optional
+// width specifier.
+def : t2InstAlias<"str${p} $Rt, $addr",
+ (t2STRi12 GPR:$Rt, t2addrmode_imm12:$addr, pred:$p)>;
+def : t2InstAlias<"strb${p} $Rt, $addr",
+ (t2STRBi12 rGPR:$Rt, t2addrmode_imm12:$addr, pred:$p)>;
+def : t2InstAlias<"strh${p} $Rt, $addr",
+ (t2STRHi12 rGPR:$Rt, t2addrmode_imm12:$addr, pred:$p)>;
+
+def : t2InstAlias<"str${p} $Rt, $addr",
+ (t2STRs GPR:$Rt, t2addrmode_so_reg:$addr, pred:$p)>;
+def : t2InstAlias<"strb${p} $Rt, $addr",
+ (t2STRBs rGPR:$Rt, t2addrmode_so_reg:$addr, pred:$p)>;
+def : t2InstAlias<"strh${p} $Rt, $addr",
+ (t2STRHs rGPR:$Rt, t2addrmode_so_reg:$addr, pred:$p)>;
+
+// Extend instruction optional rotate operand.
+def : InstAlias<"sxtab${p} $Rd, $Rn, $Rm",
+ (t2SXTAB rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, 0, pred:$p)>,
+ Requires<[HasT2ExtractPack, IsThumb2]>;
+def : InstAlias<"sxtah${p} $Rd, $Rn, $Rm",
+ (t2SXTAH rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, 0, pred:$p)>,
+ Requires<[HasT2ExtractPack, IsThumb2]>;
+def : InstAlias<"sxtab16${p} $Rd, $Rn, $Rm",
+ (t2SXTAB16 rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, 0, pred:$p)>,
+ Requires<[HasT2ExtractPack, IsThumb2]>;
+def : InstAlias<"sxtb16${p} $Rd, $Rm",
+ (t2SXTB16 rGPR:$Rd, rGPR:$Rm, 0, pred:$p)>,
+ Requires<[HasT2ExtractPack, IsThumb2]>;
+
+def : t2InstAlias<"sxtb${p} $Rd, $Rm",
+ (t2SXTB rGPR:$Rd, rGPR:$Rm, 0, pred:$p)>;
+def : t2InstAlias<"sxth${p} $Rd, $Rm",
+ (t2SXTH rGPR:$Rd, rGPR:$Rm, 0, pred:$p)>;
+def : t2InstAlias<"sxtb${p}.w $Rd, $Rm",
+ (t2SXTB rGPR:$Rd, rGPR:$Rm, 0, pred:$p)>;
+def : t2InstAlias<"sxth${p}.w $Rd, $Rm",
+ (t2SXTH rGPR:$Rd, rGPR:$Rm, 0, pred:$p)>;
+
+def : InstAlias<"uxtab${p} $Rd, $Rn, $Rm",
+ (t2UXTAB rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, 0, pred:$p)>,
+ Requires<[HasT2ExtractPack, IsThumb2]>;
+def : InstAlias<"uxtah${p} $Rd, $Rn, $Rm",
+ (t2UXTAH rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, 0, pred:$p)>,
+ Requires<[HasT2ExtractPack, IsThumb2]>;
+def : InstAlias<"uxtab16${p} $Rd, $Rn, $Rm",
+ (t2UXTAB16 rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, 0, pred:$p)>,
+ Requires<[HasT2ExtractPack, IsThumb2]>;
+def : InstAlias<"uxtb16${p} $Rd, $Rm",
+ (t2UXTB16 rGPR:$Rd, rGPR:$Rm, 0, pred:$p)>,
+ Requires<[HasT2ExtractPack, IsThumb2]>;
+
+def : t2InstAlias<"uxtb${p} $Rd, $Rm",
+ (t2UXTB rGPR:$Rd, rGPR:$Rm, 0, pred:$p)>;
+def : t2InstAlias<"uxth${p} $Rd, $Rm",
+ (t2UXTH rGPR:$Rd, rGPR:$Rm, 0, pred:$p)>;
+def : t2InstAlias<"uxtb${p}.w $Rd, $Rm",
+ (t2UXTB rGPR:$Rd, rGPR:$Rm, 0, pred:$p)>;
+def : t2InstAlias<"uxth${p}.w $Rd, $Rm",
+ (t2UXTH rGPR:$Rd, rGPR:$Rm, 0, pred:$p)>;
+
+// Extend instruction w/o the ".w" optional width specifier.
+def : t2InstAlias<"uxtb${p} $Rd, $Rm$rot",
+ (t2UXTB rGPR:$Rd, rGPR:$Rm, rot_imm:$rot, pred:$p)>;
+def : InstAlias<"uxtb16${p} $Rd, $Rm$rot",
+ (t2UXTB16 rGPR:$Rd, rGPR:$Rm, rot_imm:$rot, pred:$p)>,
+ Requires<[HasT2ExtractPack, IsThumb2]>;
+def : t2InstAlias<"uxth${p} $Rd, $Rm$rot",
+ (t2UXTH rGPR:$Rd, rGPR:$Rm, rot_imm:$rot, pred:$p)>;
+
+def : t2InstAlias<"sxtb${p} $Rd, $Rm$rot",
+ (t2SXTB rGPR:$Rd, rGPR:$Rm, rot_imm:$rot, pred:$p)>;
+def : InstAlias<"sxtb16${p} $Rd, $Rm$rot",
+ (t2SXTB16 rGPR:$Rd, rGPR:$Rm, rot_imm:$rot, pred:$p)>,
+ Requires<[HasT2ExtractPack, IsThumb2]>;
+def : t2InstAlias<"sxth${p} $Rd, $Rm$rot",
+ (t2SXTH rGPR:$Rd, rGPR:$Rm, rot_imm:$rot, pred:$p)>;
+
+
+// "mov Rd, t2_so_imm_not" can be handled via "mvn" in assembly, just like
+// for isel.
+def : t2InstAlias<"mov${p} $Rd, $imm",
+ (t2MVNi rGPR:$Rd, t2_so_imm_not:$imm, pred:$p, zero_reg)>;
+def : t2InstAlias<"mvn${p} $Rd, $imm",
+ (t2MOVi rGPR:$Rd, t2_so_imm_not:$imm, pred:$p, zero_reg)>;
+// Same for AND <--> BIC
+def : t2InstAlias<"bic${s}${p} $Rd, $Rn, $imm",
+ (t2ANDri rGPR:$Rd, rGPR:$Rn, t2_so_imm_not:$imm,
+ pred:$p, cc_out:$s)>;
+def : t2InstAlias<"bic${s}${p} $Rdn, $imm",
+ (t2ANDri rGPR:$Rdn, rGPR:$Rdn, t2_so_imm_not:$imm,
+ pred:$p, cc_out:$s)>;
+def : t2InstAlias<"and${s}${p} $Rd, $Rn, $imm",
+ (t2BICri rGPR:$Rd, rGPR:$Rn, t2_so_imm_not:$imm,
+ pred:$p, cc_out:$s)>;
+def : t2InstAlias<"and${s}${p} $Rdn, $imm",
+ (t2BICri rGPR:$Rdn, rGPR:$Rdn, t2_so_imm_not:$imm,
+ pred:$p, cc_out:$s)>;
+// Likewise, "add Rd, t2_so_imm_neg" -> sub
+def : t2InstAlias<"add${s}${p} $Rd, $Rn, $imm",
+ (t2SUBri GPRnopc:$Rd, GPRnopc:$Rn, t2_so_imm_neg:$imm,
+ pred:$p, cc_out:$s)>;
+def : t2InstAlias<"add${s}${p} $Rd, $imm",
+ (t2SUBri GPRnopc:$Rd, GPRnopc:$Rd, t2_so_imm_neg:$imm,
+ pred:$p, cc_out:$s)>;
+// Same for CMP <--> CMN via t2_so_imm_neg
+def : t2InstAlias<"cmp${p} $Rd, $imm",
+ (t2CMNri rGPR:$Rd, t2_so_imm_neg:$imm, pred:$p)>;
+def : t2InstAlias<"cmn${p} $Rd, $imm",
+ (t2CMPri rGPR:$Rd, t2_so_imm_neg:$imm, pred:$p)>;
+
+
+// Wide 'mul' encoding can be specified with only two operands.
+def : t2InstAlias<"mul${p} $Rn, $Rm",
+ (t2MUL rGPR:$Rn, rGPR:$Rm, rGPR:$Rn, pred:$p)>;
+
+// "neg" is and alias for "rsb rd, rn, #0"
+def : t2InstAlias<"neg${s}${p} $Rd, $Rm",
+ (t2RSBri rGPR:$Rd, rGPR:$Rm, 0, pred:$p, cc_out:$s)>;
+
+// MOV so_reg assembler pseudos. InstAlias isn't expressive enough for
+// these, unfortunately.
+def t2MOVsi: t2AsmPseudo<"mov${p} $Rd, $shift",
+ (ins rGPR:$Rd, t2_so_reg:$shift, pred:$p)>;
+def t2MOVSsi: t2AsmPseudo<"movs${p} $Rd, $shift",
+ (ins rGPR:$Rd, t2_so_reg:$shift, pred:$p)>;
+
+def t2MOVsr: t2AsmPseudo<"mov${p} $Rd, $shift",
+ (ins rGPR:$Rd, so_reg_reg:$shift, pred:$p)>;
+def t2MOVSsr: t2AsmPseudo<"movs${p} $Rd, $shift",
+ (ins rGPR:$Rd, so_reg_reg:$shift, pred:$p)>;
+
+// ADR w/o the .w suffix
+def : t2InstAlias<"adr${p} $Rd, $addr",
+ (t2ADR rGPR:$Rd, t2adrlabel:$addr, pred:$p)>;
+
+// LDR(literal) w/ alternate [pc, #imm] syntax.
+def t2LDRpcrel : t2AsmPseudo<"ldr${p} $Rt, $addr",
+ (ins GPR:$Rt, t2ldr_pcrel_imm12:$addr, pred:$p)>;
+def t2LDRBpcrel : t2AsmPseudo<"ldrb${p} $Rt, $addr",
+ (ins GPRnopc:$Rt, t2ldr_pcrel_imm12:$addr, pred:$p)>;
+def t2LDRHpcrel : t2AsmPseudo<"ldrh${p} $Rt, $addr",
+ (ins GPRnopc:$Rt, t2ldr_pcrel_imm12:$addr, pred:$p)>;
+def t2LDRSBpcrel : t2AsmPseudo<"ldrsb${p} $Rt, $addr",
+ (ins GPRnopc:$Rt, t2ldr_pcrel_imm12:$addr, pred:$p)>;
+def t2LDRSHpcrel : t2AsmPseudo<"ldrsh${p} $Rt, $addr",
+ (ins GPRnopc:$Rt, t2ldr_pcrel_imm12:$addr, pred:$p)>;
+ // Version w/ the .w suffix.
+def : t2InstAlias<"ldr${p}.w $Rt, $addr",
+ (t2LDRpcrel GPR:$Rt, t2ldr_pcrel_imm12:$addr, pred:$p), 0>;
+def : t2InstAlias<"ldrb${p}.w $Rt, $addr",
+ (t2LDRBpcrel GPRnopc:$Rt, t2ldr_pcrel_imm12:$addr, pred:$p)>;
+def : t2InstAlias<"ldrh${p}.w $Rt, $addr",
+ (t2LDRHpcrel GPRnopc:$Rt, t2ldr_pcrel_imm12:$addr, pred:$p)>;
+def : t2InstAlias<"ldrsb${p}.w $Rt, $addr",
+ (t2LDRSBpcrel GPRnopc:$Rt, t2ldr_pcrel_imm12:$addr, pred:$p)>;
+def : t2InstAlias<"ldrsh${p}.w $Rt, $addr",
+ (t2LDRSHpcrel GPRnopc:$Rt, t2ldr_pcrel_imm12:$addr, pred:$p)>;
+
+def : t2InstAlias<"add${p} $Rd, pc, $imm",
+ (t2ADR rGPR:$Rd, imm0_4095:$imm, pred:$p)>;
+
+// PLD/PLDW/PLI with alternate literal form.
+def : t2InstAlias<"pld${p} $addr",
+ (t2PLDpci t2ldr_pcrel_imm12:$addr, pred:$p)>;
+def : InstAlias<"pli${p} $addr",
+ (t2PLIpci t2ldr_pcrel_imm12:$addr, pred:$p)>,
+ Requires<[IsThumb2,HasV7]>;
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrVFP.td b/contrib/llvm/lib/Target/ARM/ARMInstrVFP.td
new file mode 100644
index 0000000..050cd1a
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMInstrVFP.td
@@ -0,0 +1,1894 @@
+//===-- ARMInstrVFP.td - VFP support for ARM ---------------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the ARM VFP instruction set.
+//
+//===----------------------------------------------------------------------===//
+
+def SDT_CMPFP0 : SDTypeProfile<0, 1, [SDTCisFP<0>]>;
+def SDT_VMOVDRR : SDTypeProfile<1, 2, [SDTCisVT<0, f64>, SDTCisVT<1, i32>,
+ SDTCisSameAs<1, 2>]>;
+
+def arm_fmstat : SDNode<"ARMISD::FMSTAT", SDTNone, [SDNPInGlue, SDNPOutGlue]>;
+def arm_cmpfp : SDNode<"ARMISD::CMPFP", SDT_ARMCmp, [SDNPOutGlue]>;
+def arm_cmpfp0 : SDNode<"ARMISD::CMPFPw0", SDT_CMPFP0, [SDNPOutGlue]>;
+def arm_fmdrr : SDNode<"ARMISD::VMOVDRR", SDT_VMOVDRR>;
+
+//===----------------------------------------------------------------------===//
+// Operand Definitions.
+//
+
+// 8-bit floating-point immediate encodings.
+def FPImmOperand : AsmOperandClass {
+ let Name = "FPImm";
+ let ParserMethod = "parseFPImm";
+}
+
+def vfp_f32imm : Operand<f32>,
+ PatLeaf<(f32 fpimm), [{
+ return ARM_AM::getFP32Imm(N->getValueAPF()) != -1;
+ }], SDNodeXForm<fpimm, [{
+ APFloat InVal = N->getValueAPF();
+ uint32_t enc = ARM_AM::getFP32Imm(InVal);
+ return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i32);
+ }]>> {
+ let PrintMethod = "printFPImmOperand";
+ let ParserMatchClass = FPImmOperand;
+}
+
+def vfp_f64imm : Operand<f64>,
+ PatLeaf<(f64 fpimm), [{
+ return ARM_AM::getFP64Imm(N->getValueAPF()) != -1;
+ }], SDNodeXForm<fpimm, [{
+ APFloat InVal = N->getValueAPF();
+ uint32_t enc = ARM_AM::getFP64Imm(InVal);
+ return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i32);
+ }]>> {
+ let PrintMethod = "printFPImmOperand";
+ let ParserMatchClass = FPImmOperand;
+}
+
+def alignedload32 : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+ return cast<LoadSDNode>(N)->getAlignment() >= 4;
+}]>;
+
+def alignedstore32 : PatFrag<(ops node:$val, node:$ptr),
+ (store node:$val, node:$ptr), [{
+ return cast<StoreSDNode>(N)->getAlignment() >= 4;
+}]>;
+
+// The VCVT to/from fixed-point instructions encode the 'fbits' operand
+// (the number of fixed bits) differently than it appears in the assembly
+// source. It's encoded as "Size - fbits" where Size is the size of the
+// fixed-point representation (32 or 16) and fbits is the value appearing
+// in the assembly source, an integer in [0,16] or (0,32], depending on size.
+def fbits32_asm_operand : AsmOperandClass { let Name = "FBits32"; }
+def fbits32 : Operand<i32> {
+ let PrintMethod = "printFBits32";
+ let ParserMatchClass = fbits32_asm_operand;
+}
+
+def fbits16_asm_operand : AsmOperandClass { let Name = "FBits16"; }
+def fbits16 : Operand<i32> {
+ let PrintMethod = "printFBits16";
+ let ParserMatchClass = fbits16_asm_operand;
+}
+
+//===----------------------------------------------------------------------===//
+// Load / store Instructions.
+//
+
+let canFoldAsLoad = 1, isReMaterializable = 1 in {
+
+def VLDRD : ADI5<0b1101, 0b01, (outs DPR:$Dd), (ins addrmode5:$addr),
+ IIC_fpLoad64, "vldr", "\t$Dd, $addr",
+ [(set DPR:$Dd, (f64 (alignedload32 addrmode5:$addr)))]>;
+
+def VLDRS : ASI5<0b1101, 0b01, (outs SPR:$Sd), (ins addrmode5:$addr),
+ IIC_fpLoad32, "vldr", "\t$Sd, $addr",
+ [(set SPR:$Sd, (alignedload32 addrmode5:$addr))]> {
+ // Some single precision VFP instructions may be executed on both NEON and VFP
+ // pipelines.
+ let D = VFPNeonDomain;
+}
+
+} // End of 'let canFoldAsLoad = 1, isReMaterializable = 1 in'
+
+def VSTRD : ADI5<0b1101, 0b00, (outs), (ins DPR:$Dd, addrmode5:$addr),
+ IIC_fpStore64, "vstr", "\t$Dd, $addr",
+ [(alignedstore32 (f64 DPR:$Dd), addrmode5:$addr)]>;
+
+def VSTRS : ASI5<0b1101, 0b00, (outs), (ins SPR:$Sd, addrmode5:$addr),
+ IIC_fpStore32, "vstr", "\t$Sd, $addr",
+ [(alignedstore32 SPR:$Sd, addrmode5:$addr)]> {
+ // Some single precision VFP instructions may be executed on both NEON and VFP
+ // pipelines.
+ let D = VFPNeonDomain;
+}
+
+//===----------------------------------------------------------------------===//
+// Load / store multiple Instructions.
+//
+
+multiclass vfp_ldst_mult<string asm, bit L_bit,
+ InstrItinClass itin, InstrItinClass itin_upd> {
+ // Double Precision
+ def DIA :
+ AXDI4<(outs), (ins GPR:$Rn, pred:$p, dpr_reglist:$regs, variable_ops),
+ IndexModeNone, itin,
+ !strconcat(asm, "ia${p}\t$Rn, $regs"), "", []> {
+ let Inst{24-23} = 0b01; // Increment After
+ let Inst{21} = 0; // No writeback
+ let Inst{20} = L_bit;
+ }
+ def DIA_UPD :
+ AXDI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, dpr_reglist:$regs,
+ variable_ops),
+ IndexModeUpd, itin_upd,
+ !strconcat(asm, "ia${p}\t$Rn!, $regs"), "$Rn = $wb", []> {
+ let Inst{24-23} = 0b01; // Increment After
+ let Inst{21} = 1; // Writeback
+ let Inst{20} = L_bit;
+ }
+ def DDB_UPD :
+ AXDI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, dpr_reglist:$regs,
+ variable_ops),
+ IndexModeUpd, itin_upd,
+ !strconcat(asm, "db${p}\t$Rn!, $regs"), "$Rn = $wb", []> {
+ let Inst{24-23} = 0b10; // Decrement Before
+ let Inst{21} = 1; // Writeback
+ let Inst{20} = L_bit;
+ }
+
+ // Single Precision
+ def SIA :
+ AXSI4<(outs), (ins GPR:$Rn, pred:$p, spr_reglist:$regs, variable_ops),
+ IndexModeNone, itin,
+ !strconcat(asm, "ia${p}\t$Rn, $regs"), "", []> {
+ let Inst{24-23} = 0b01; // Increment After
+ let Inst{21} = 0; // No writeback
+ let Inst{20} = L_bit;
+
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines.
+ let D = VFPNeonDomain;
+ }
+ def SIA_UPD :
+ AXSI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, spr_reglist:$regs,
+ variable_ops),
+ IndexModeUpd, itin_upd,
+ !strconcat(asm, "ia${p}\t$Rn!, $regs"), "$Rn = $wb", []> {
+ let Inst{24-23} = 0b01; // Increment After
+ let Inst{21} = 1; // Writeback
+ let Inst{20} = L_bit;
+
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines.
+ let D = VFPNeonDomain;
+ }
+ def SDB_UPD :
+ AXSI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, spr_reglist:$regs,
+ variable_ops),
+ IndexModeUpd, itin_upd,
+ !strconcat(asm, "db${p}\t$Rn!, $regs"), "$Rn = $wb", []> {
+ let Inst{24-23} = 0b10; // Decrement Before
+ let Inst{21} = 1; // Writeback
+ let Inst{20} = L_bit;
+
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines.
+ let D = VFPNeonDomain;
+ }
+}
+
+let hasSideEffects = 0 in {
+
+let mayLoad = 1, hasExtraDefRegAllocReq = 1 in
+defm VLDM : vfp_ldst_mult<"vldm", 1, IIC_fpLoad_m, IIC_fpLoad_mu>;
+
+let mayStore = 1, hasExtraSrcRegAllocReq = 1 in
+defm VSTM : vfp_ldst_mult<"vstm", 0, IIC_fpStore_m, IIC_fpStore_mu>;
+
+} // hasSideEffects
+
+def : MnemonicAlias<"vldm", "vldmia">;
+def : MnemonicAlias<"vstm", "vstmia">;
+
+// FLDM/FSTM - Load / Store multiple single / double precision registers for
+// pre-ARMv6 cores.
+// These instructions are deprecated!
+def : VFP2MnemonicAlias<"fldmias", "vldmia">;
+def : VFP2MnemonicAlias<"fldmdbs", "vldmdb">;
+def : VFP2MnemonicAlias<"fldmeas", "vldmdb">;
+def : VFP2MnemonicAlias<"fldmfds", "vldmia">;
+def : VFP2MnemonicAlias<"fldmiad", "vldmia">;
+def : VFP2MnemonicAlias<"fldmdbd", "vldmdb">;
+def : VFP2MnemonicAlias<"fldmead", "vldmdb">;
+def : VFP2MnemonicAlias<"fldmfdd", "vldmia">;
+
+def : VFP2MnemonicAlias<"fstmias", "vstmia">;
+def : VFP2MnemonicAlias<"fstmdbs", "vstmdb">;
+def : VFP2MnemonicAlias<"fstmeas", "vstmia">;
+def : VFP2MnemonicAlias<"fstmfds", "vstmdb">;
+def : VFP2MnemonicAlias<"fstmiad", "vstmia">;
+def : VFP2MnemonicAlias<"fstmdbd", "vstmdb">;
+def : VFP2MnemonicAlias<"fstmead", "vstmia">;
+def : VFP2MnemonicAlias<"fstmfdd", "vstmdb">;
+
+def : InstAlias<"vpush${p} $r", (VSTMDDB_UPD SP, pred:$p, dpr_reglist:$r)>,
+ Requires<[HasVFP2]>;
+def : InstAlias<"vpush${p} $r", (VSTMSDB_UPD SP, pred:$p, spr_reglist:$r)>,
+ Requires<[HasVFP2]>;
+def : InstAlias<"vpop${p} $r", (VLDMDIA_UPD SP, pred:$p, dpr_reglist:$r)>,
+ Requires<[HasVFP2]>;
+def : InstAlias<"vpop${p} $r", (VLDMSIA_UPD SP, pred:$p, spr_reglist:$r)>,
+ Requires<[HasVFP2]>;
+defm : VFPDTAnyInstAlias<"vpush${p}", "$r",
+ (VSTMSDB_UPD SP, pred:$p, spr_reglist:$r)>;
+defm : VFPDTAnyInstAlias<"vpush${p}", "$r",
+ (VSTMDDB_UPD SP, pred:$p, dpr_reglist:$r)>;
+defm : VFPDTAnyInstAlias<"vpop${p}", "$r",
+ (VLDMSIA_UPD SP, pred:$p, spr_reglist:$r)>;
+defm : VFPDTAnyInstAlias<"vpop${p}", "$r",
+ (VLDMDIA_UPD SP, pred:$p, dpr_reglist:$r)>;
+
+// FLDMX, FSTMX - Load and store multiple unknown precision registers for
+// pre-armv6 cores.
+// These instruction are deprecated so we don't want them to get selected.
+multiclass vfp_ldstx_mult<string asm, bit L_bit> {
+ // Unknown precision
+ def XIA :
+ AXXI4<(outs), (ins GPR:$Rn, pred:$p, dpr_reglist:$regs, variable_ops),
+ IndexModeNone, !strconcat(asm, "iax${p}\t$Rn, $regs"), "", []> {
+ let Inst{24-23} = 0b01; // Increment After
+ let Inst{21} = 0; // No writeback
+ let Inst{20} = L_bit;
+ }
+ def XIA_UPD :
+ AXXI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, dpr_reglist:$regs, variable_ops),
+ IndexModeUpd, !strconcat(asm, "iax${p}\t$Rn!, $regs"), "$Rn = $wb", []> {
+ let Inst{24-23} = 0b01; // Increment After
+ let Inst{21} = 1; // Writeback
+ let Inst{20} = L_bit;
+ }
+ def XDB_UPD :
+ AXXI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, dpr_reglist:$regs, variable_ops),
+ IndexModeUpd, !strconcat(asm, "dbx${p}\t$Rn!, $regs"), "$Rn = $wb", []> {
+ let Inst{24-23} = 0b10; // Decrement Before
+ let Inst{21} = 1; // Writeback
+ let Inst{20} = L_bit;
+ }
+}
+
+defm FLDM : vfp_ldstx_mult<"fldm", 1>;
+defm FSTM : vfp_ldstx_mult<"fstm", 0>;
+
+def : VFP2MnemonicAlias<"fldmeax", "fldmdbx">;
+def : VFP2MnemonicAlias<"fldmfdx", "fldmiax">;
+
+def : VFP2MnemonicAlias<"fstmeax", "fstmiax">;
+def : VFP2MnemonicAlias<"fstmfdx", "fstmdbx">;
+
+//===----------------------------------------------------------------------===//
+// FP Binary Operations.
+//
+
+let TwoOperandAliasConstraint = "$Dn = $Dd" in
+def VADDD : ADbI<0b11100, 0b11, 0, 0,
+ (outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm),
+ IIC_fpALU64, "vadd", ".f64\t$Dd, $Dn, $Dm",
+ [(set DPR:$Dd, (fadd DPR:$Dn, (f64 DPR:$Dm)))]>;
+
+let TwoOperandAliasConstraint = "$Sn = $Sd" in
+def VADDS : ASbIn<0b11100, 0b11, 0, 0,
+ (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
+ IIC_fpALU32, "vadd", ".f32\t$Sd, $Sn, $Sm",
+ [(set SPR:$Sd, (fadd SPR:$Sn, SPR:$Sm))]> {
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines on A8.
+ let D = VFPNeonA8Domain;
+}
+
+let TwoOperandAliasConstraint = "$Dn = $Dd" in
+def VSUBD : ADbI<0b11100, 0b11, 1, 0,
+ (outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm),
+ IIC_fpALU64, "vsub", ".f64\t$Dd, $Dn, $Dm",
+ [(set DPR:$Dd, (fsub DPR:$Dn, (f64 DPR:$Dm)))]>;
+
+let TwoOperandAliasConstraint = "$Sn = $Sd" in
+def VSUBS : ASbIn<0b11100, 0b11, 1, 0,
+ (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
+ IIC_fpALU32, "vsub", ".f32\t$Sd, $Sn, $Sm",
+ [(set SPR:$Sd, (fsub SPR:$Sn, SPR:$Sm))]> {
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines on A8.
+ let D = VFPNeonA8Domain;
+}
+
+let TwoOperandAliasConstraint = "$Dn = $Dd" in
+def VDIVD : ADbI<0b11101, 0b00, 0, 0,
+ (outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm),
+ IIC_fpDIV64, "vdiv", ".f64\t$Dd, $Dn, $Dm",
+ [(set DPR:$Dd, (fdiv DPR:$Dn, (f64 DPR:$Dm)))]>;
+
+let TwoOperandAliasConstraint = "$Sn = $Sd" in
+def VDIVS : ASbI<0b11101, 0b00, 0, 0,
+ (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
+ IIC_fpDIV32, "vdiv", ".f32\t$Sd, $Sn, $Sm",
+ [(set SPR:$Sd, (fdiv SPR:$Sn, SPR:$Sm))]>;
+
+let TwoOperandAliasConstraint = "$Dn = $Dd" in
+def VMULD : ADbI<0b11100, 0b10, 0, 0,
+ (outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm),
+ IIC_fpMUL64, "vmul", ".f64\t$Dd, $Dn, $Dm",
+ [(set DPR:$Dd, (fmul DPR:$Dn, (f64 DPR:$Dm)))]>;
+
+let TwoOperandAliasConstraint = "$Sn = $Sd" in
+def VMULS : ASbIn<0b11100, 0b10, 0, 0,
+ (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
+ IIC_fpMUL32, "vmul", ".f32\t$Sd, $Sn, $Sm",
+ [(set SPR:$Sd, (fmul SPR:$Sn, SPR:$Sm))]> {
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines on A8.
+ let D = VFPNeonA8Domain;
+}
+
+def VNMULD : ADbI<0b11100, 0b10, 1, 0,
+ (outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm),
+ IIC_fpMUL64, "vnmul", ".f64\t$Dd, $Dn, $Dm",
+ [(set DPR:$Dd, (fneg (fmul DPR:$Dn, (f64 DPR:$Dm))))]>;
+
+def VNMULS : ASbI<0b11100, 0b10, 1, 0,
+ (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
+ IIC_fpMUL32, "vnmul", ".f32\t$Sd, $Sn, $Sm",
+ [(set SPR:$Sd, (fneg (fmul SPR:$Sn, SPR:$Sm)))]> {
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines on A8.
+ let D = VFPNeonA8Domain;
+}
+
+multiclass vsel_inst<string op, bits<2> opc, int CC> {
+ let DecoderNamespace = "VFPV8", PostEncoderMethod = "",
+ Uses = [CPSR], AddedComplexity = 4 in {
+ def S : ASbInp<0b11100, opc, 0,
+ (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
+ NoItinerary, !strconcat("vsel", op, ".f32\t$Sd, $Sn, $Sm"),
+ [(set SPR:$Sd, (ARMcmov SPR:$Sm, SPR:$Sn, CC))]>,
+ Requires<[HasFPARMv8]>;
+
+ def D : ADbInp<0b11100, opc, 0,
+ (outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm),
+ NoItinerary, !strconcat("vsel", op, ".f64\t$Dd, $Dn, $Dm"),
+ [(set DPR:$Dd, (ARMcmov (f64 DPR:$Dm), (f64 DPR:$Dn), CC))]>,
+ Requires<[HasFPARMv8, HasDPVFP]>;
+ }
+}
+
+// The CC constants here match ARMCC::CondCodes.
+defm VSELGT : vsel_inst<"gt", 0b11, 12>;
+defm VSELGE : vsel_inst<"ge", 0b10, 10>;
+defm VSELEQ : vsel_inst<"eq", 0b00, 0>;
+defm VSELVS : vsel_inst<"vs", 0b01, 6>;
+
+multiclass vmaxmin_inst<string op, bit opc, SDNode SD> {
+ let DecoderNamespace = "VFPV8", PostEncoderMethod = "" in {
+ def S : ASbInp<0b11101, 0b00, opc,
+ (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
+ NoItinerary, !strconcat(op, ".f32\t$Sd, $Sn, $Sm"),
+ [(set SPR:$Sd, (SD SPR:$Sn, SPR:$Sm))]>,
+ Requires<[HasFPARMv8]>;
+
+ def D : ADbInp<0b11101, 0b00, opc,
+ (outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm),
+ NoItinerary, !strconcat(op, ".f64\t$Dd, $Dn, $Dm"),
+ [(set DPR:$Dd, (f64 (SD (f64 DPR:$Dn), (f64 DPR:$Dm))))]>,
+ Requires<[HasFPARMv8, HasDPVFP]>;
+ }
+}
+
+defm VMAXNM : vmaxmin_inst<"vmaxnm", 0, fmaxnum>;
+defm VMINNM : vmaxmin_inst<"vminnm", 1, fminnum>;
+
+// Match reassociated forms only if not sign dependent rounding.
+def : Pat<(fmul (fneg DPR:$a), (f64 DPR:$b)),
+ (VNMULD DPR:$a, DPR:$b)>,
+ Requires<[NoHonorSignDependentRounding,HasDPVFP]>;
+def : Pat<(fmul (fneg SPR:$a), SPR:$b),
+ (VNMULS SPR:$a, SPR:$b)>, Requires<[NoHonorSignDependentRounding]>;
+
+// These are encoded as unary instructions.
+let Defs = [FPSCR_NZCV] in {
+def VCMPED : ADuI<0b11101, 0b11, 0b0100, 0b11, 0,
+ (outs), (ins DPR:$Dd, DPR:$Dm),
+ IIC_fpCMP64, "vcmpe", ".f64\t$Dd, $Dm",
+ [(arm_cmpfp DPR:$Dd, (f64 DPR:$Dm))]>;
+
+def VCMPES : ASuI<0b11101, 0b11, 0b0100, 0b11, 0,
+ (outs), (ins SPR:$Sd, SPR:$Sm),
+ IIC_fpCMP32, "vcmpe", ".f32\t$Sd, $Sm",
+ [(arm_cmpfp SPR:$Sd, SPR:$Sm)]> {
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines on A8.
+ let D = VFPNeonA8Domain;
+}
+
+// FIXME: Verify encoding after integrated assembler is working.
+def VCMPD : ADuI<0b11101, 0b11, 0b0100, 0b01, 0,
+ (outs), (ins DPR:$Dd, DPR:$Dm),
+ IIC_fpCMP64, "vcmp", ".f64\t$Dd, $Dm",
+ [/* For disassembly only; pattern left blank */]>;
+
+def VCMPS : ASuI<0b11101, 0b11, 0b0100, 0b01, 0,
+ (outs), (ins SPR:$Sd, SPR:$Sm),
+ IIC_fpCMP32, "vcmp", ".f32\t$Sd, $Sm",
+ [/* For disassembly only; pattern left blank */]> {
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines on A8.
+ let D = VFPNeonA8Domain;
+}
+} // Defs = [FPSCR_NZCV]
+
+//===----------------------------------------------------------------------===//
+// FP Unary Operations.
+//
+
+def VABSD : ADuI<0b11101, 0b11, 0b0000, 0b11, 0,
+ (outs DPR:$Dd), (ins DPR:$Dm),
+ IIC_fpUNA64, "vabs", ".f64\t$Dd, $Dm",
+ [(set DPR:$Dd, (fabs (f64 DPR:$Dm)))]>;
+
+def VABSS : ASuIn<0b11101, 0b11, 0b0000, 0b11, 0,
+ (outs SPR:$Sd), (ins SPR:$Sm),
+ IIC_fpUNA32, "vabs", ".f32\t$Sd, $Sm",
+ [(set SPR:$Sd, (fabs SPR:$Sm))]> {
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines on A8.
+ let D = VFPNeonA8Domain;
+}
+
+let Defs = [FPSCR_NZCV] in {
+def VCMPEZD : ADuI<0b11101, 0b11, 0b0101, 0b11, 0,
+ (outs), (ins DPR:$Dd),
+ IIC_fpCMP64, "vcmpe", ".f64\t$Dd, #0",
+ [(arm_cmpfp0 (f64 DPR:$Dd))]> {
+ let Inst{3-0} = 0b0000;
+ let Inst{5} = 0;
+}
+
+def VCMPEZS : ASuI<0b11101, 0b11, 0b0101, 0b11, 0,
+ (outs), (ins SPR:$Sd),
+ IIC_fpCMP32, "vcmpe", ".f32\t$Sd, #0",
+ [(arm_cmpfp0 SPR:$Sd)]> {
+ let Inst{3-0} = 0b0000;
+ let Inst{5} = 0;
+
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines on A8.
+ let D = VFPNeonA8Domain;
+}
+
+// FIXME: Verify encoding after integrated assembler is working.
+def VCMPZD : ADuI<0b11101, 0b11, 0b0101, 0b01, 0,
+ (outs), (ins DPR:$Dd),
+ IIC_fpCMP64, "vcmp", ".f64\t$Dd, #0",
+ [/* For disassembly only; pattern left blank */]> {
+ let Inst{3-0} = 0b0000;
+ let Inst{5} = 0;
+}
+
+def VCMPZS : ASuI<0b11101, 0b11, 0b0101, 0b01, 0,
+ (outs), (ins SPR:$Sd),
+ IIC_fpCMP32, "vcmp", ".f32\t$Sd, #0",
+ [/* For disassembly only; pattern left blank */]> {
+ let Inst{3-0} = 0b0000;
+ let Inst{5} = 0;
+
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines on A8.
+ let D = VFPNeonA8Domain;
+}
+} // Defs = [FPSCR_NZCV]
+
+def VCVTDS : ASuI<0b11101, 0b11, 0b0111, 0b11, 0,
+ (outs DPR:$Dd), (ins SPR:$Sm),
+ IIC_fpCVTDS, "vcvt", ".f64.f32\t$Dd, $Sm",
+ [(set DPR:$Dd, (fextend SPR:$Sm))]> {
+ // Instruction operands.
+ bits<5> Dd;
+ bits<5> Sm;
+
+ // Encode instruction operands.
+ let Inst{3-0} = Sm{4-1};
+ let Inst{5} = Sm{0};
+ let Inst{15-12} = Dd{3-0};
+ let Inst{22} = Dd{4};
+
+ let Predicates = [HasVFP2, HasDPVFP];
+}
+
+// Special case encoding: bits 11-8 is 0b1011.
+def VCVTSD : VFPAI<(outs SPR:$Sd), (ins DPR:$Dm), VFPUnaryFrm,
+ IIC_fpCVTSD, "vcvt", ".f32.f64\t$Sd, $Dm",
+ [(set SPR:$Sd, (fround DPR:$Dm))]> {
+ // Instruction operands.
+ bits<5> Sd;
+ bits<5> Dm;
+
+ // Encode instruction operands.
+ let Inst{3-0} = Dm{3-0};
+ let Inst{5} = Dm{4};
+ let Inst{15-12} = Sd{4-1};
+ let Inst{22} = Sd{0};
+
+ let Inst{27-23} = 0b11101;
+ let Inst{21-16} = 0b110111;
+ let Inst{11-8} = 0b1011;
+ let Inst{7-6} = 0b11;
+ let Inst{4} = 0;
+
+ let Predicates = [HasVFP2, HasDPVFP];
+}
+
+// Between half, single and double-precision. For disassembly only.
+
+// FIXME: Verify encoding after integrated assembler is working.
+def VCVTBHS: ASuI<0b11101, 0b11, 0b0010, 0b01, 0, (outs SPR:$Sd), (ins SPR:$Sm),
+ /* FIXME */ IIC_fpCVTSH, "vcvtb", ".f32.f16\t$Sd, $Sm",
+ [/* For disassembly only; pattern left blank */]>,
+ Requires<[HasFP16]>;
+
+def VCVTBSH: ASuI<0b11101, 0b11, 0b0011, 0b01, 0, (outs SPR:$Sd), (ins SPR:$Sm),
+ /* FIXME */ IIC_fpCVTHS, "vcvtb", ".f16.f32\t$Sd, $Sm",
+ [/* For disassembly only; pattern left blank */]>,
+ Requires<[HasFP16]>;
+
+def VCVTTHS: ASuI<0b11101, 0b11, 0b0010, 0b11, 0, (outs SPR:$Sd), (ins SPR:$Sm),
+ /* FIXME */ IIC_fpCVTSH, "vcvtt", ".f32.f16\t$Sd, $Sm",
+ [/* For disassembly only; pattern left blank */]>,
+ Requires<[HasFP16]>;
+
+def VCVTTSH: ASuI<0b11101, 0b11, 0b0011, 0b11, 0, (outs SPR:$Sd), (ins SPR:$Sm),
+ /* FIXME */ IIC_fpCVTHS, "vcvtt", ".f16.f32\t$Sd, $Sm",
+ [/* For disassembly only; pattern left blank */]>,
+ Requires<[HasFP16]>;
+
+def VCVTBHD : ADuI<0b11101, 0b11, 0b0010, 0b01, 0,
+ (outs DPR:$Dd), (ins SPR:$Sm),
+ NoItinerary, "vcvtb", ".f64.f16\t$Dd, $Sm",
+ []>, Requires<[HasFPARMv8, HasDPVFP]> {
+ // Instruction operands.
+ bits<5> Sm;
+
+ // Encode instruction operands.
+ let Inst{3-0} = Sm{4-1};
+ let Inst{5} = Sm{0};
+}
+
+def VCVTBDH : ADuI<0b11101, 0b11, 0b0011, 0b01, 0,
+ (outs SPR:$Sd), (ins DPR:$Dm),
+ NoItinerary, "vcvtb", ".f16.f64\t$Sd, $Dm",
+ []>, Requires<[HasFPARMv8, HasDPVFP]> {
+ // Instruction operands.
+ bits<5> Sd;
+ bits<5> Dm;
+
+ // Encode instruction operands.
+ let Inst{3-0} = Dm{3-0};
+ let Inst{5} = Dm{4};
+ let Inst{15-12} = Sd{4-1};
+ let Inst{22} = Sd{0};
+}
+
+def VCVTTHD : ADuI<0b11101, 0b11, 0b0010, 0b11, 0,
+ (outs DPR:$Dd), (ins SPR:$Sm),
+ NoItinerary, "vcvtt", ".f64.f16\t$Dd, $Sm",
+ []>, Requires<[HasFPARMv8, HasDPVFP]> {
+ // Instruction operands.
+ bits<5> Sm;
+
+ // Encode instruction operands.
+ let Inst{3-0} = Sm{4-1};
+ let Inst{5} = Sm{0};
+}
+
+def VCVTTDH : ADuI<0b11101, 0b11, 0b0011, 0b11, 0,
+ (outs SPR:$Sd), (ins DPR:$Dm),
+ NoItinerary, "vcvtt", ".f16.f64\t$Sd, $Dm",
+ []>, Requires<[HasFPARMv8, HasDPVFP]> {
+ // Instruction operands.
+ bits<5> Sd;
+ bits<5> Dm;
+
+ // Encode instruction operands.
+ let Inst{15-12} = Sd{4-1};
+ let Inst{22} = Sd{0};
+ let Inst{3-0} = Dm{3-0};
+ let Inst{5} = Dm{4};
+}
+
+def : Pat<(fp_to_f16 SPR:$a),
+ (i32 (COPY_TO_REGCLASS (VCVTBSH SPR:$a), GPR))>;
+
+def : Pat<(fp_to_f16 (f64 DPR:$a)),
+ (i32 (COPY_TO_REGCLASS (VCVTBDH DPR:$a), GPR))>;
+
+def : Pat<(f16_to_fp GPR:$a),
+ (VCVTBHS (COPY_TO_REGCLASS GPR:$a, SPR))>;
+
+def : Pat<(f64 (f16_to_fp GPR:$a)),
+ (VCVTBHD (COPY_TO_REGCLASS GPR:$a, SPR))>;
+
+multiclass vcvt_inst<string opc, bits<2> rm,
+ SDPatternOperator node = null_frag> {
+ let PostEncoderMethod = "", DecoderNamespace = "VFPV8" in {
+ def SS : ASuInp<0b11101, 0b11, 0b1100, 0b11, 0,
+ (outs SPR:$Sd), (ins SPR:$Sm),
+ NoItinerary, !strconcat("vcvt", opc, ".s32.f32\t$Sd, $Sm"),
+ []>,
+ Requires<[HasFPARMv8]> {
+ let Inst{17-16} = rm;
+ }
+
+ def US : ASuInp<0b11101, 0b11, 0b1100, 0b01, 0,
+ (outs SPR:$Sd), (ins SPR:$Sm),
+ NoItinerary, !strconcat("vcvt", opc, ".u32.f32\t$Sd, $Sm"),
+ []>,
+ Requires<[HasFPARMv8]> {
+ let Inst{17-16} = rm;
+ }
+
+ def SD : ASuInp<0b11101, 0b11, 0b1100, 0b11, 0,
+ (outs SPR:$Sd), (ins DPR:$Dm),
+ NoItinerary, !strconcat("vcvt", opc, ".s32.f64\t$Sd, $Dm"),
+ []>,
+ Requires<[HasFPARMv8, HasDPVFP]> {
+ bits<5> Dm;
+
+ let Inst{17-16} = rm;
+
+ // Encode instruction operands
+ let Inst{3-0} = Dm{3-0};
+ let Inst{5} = Dm{4};
+ let Inst{8} = 1;
+ }
+
+ def UD : ASuInp<0b11101, 0b11, 0b1100, 0b01, 0,
+ (outs SPR:$Sd), (ins DPR:$Dm),
+ NoItinerary, !strconcat("vcvt", opc, ".u32.f64\t$Sd, $Dm"),
+ []>,
+ Requires<[HasFPARMv8, HasDPVFP]> {
+ bits<5> Dm;
+
+ let Inst{17-16} = rm;
+
+ // Encode instruction operands
+ let Inst{3-0} = Dm{3-0};
+ let Inst{5} = Dm{4};
+ let Inst{8} = 1;
+ }
+ }
+
+ let Predicates = [HasFPARMv8] in {
+ def : Pat<(i32 (fp_to_sint (node SPR:$a))),
+ (COPY_TO_REGCLASS
+ (!cast<Instruction>(NAME#"SS") SPR:$a),
+ GPR)>;
+ def : Pat<(i32 (fp_to_uint (node SPR:$a))),
+ (COPY_TO_REGCLASS
+ (!cast<Instruction>(NAME#"US") SPR:$a),
+ GPR)>;
+ }
+ let Predicates = [HasFPARMv8, HasDPVFP] in {
+ def : Pat<(i32 (fp_to_sint (node (f64 DPR:$a)))),
+ (COPY_TO_REGCLASS
+ (!cast<Instruction>(NAME#"SD") DPR:$a),
+ GPR)>;
+ def : Pat<(i32 (fp_to_uint (node (f64 DPR:$a)))),
+ (COPY_TO_REGCLASS
+ (!cast<Instruction>(NAME#"UD") DPR:$a),
+ GPR)>;
+ }
+}
+
+defm VCVTA : vcvt_inst<"a", 0b00, frnd>;
+defm VCVTN : vcvt_inst<"n", 0b01>;
+defm VCVTP : vcvt_inst<"p", 0b10, fceil>;
+defm VCVTM : vcvt_inst<"m", 0b11, ffloor>;
+
+def VNEGD : ADuI<0b11101, 0b11, 0b0001, 0b01, 0,
+ (outs DPR:$Dd), (ins DPR:$Dm),
+ IIC_fpUNA64, "vneg", ".f64\t$Dd, $Dm",
+ [(set DPR:$Dd, (fneg (f64 DPR:$Dm)))]>;
+
+def VNEGS : ASuIn<0b11101, 0b11, 0b0001, 0b01, 0,
+ (outs SPR:$Sd), (ins SPR:$Sm),
+ IIC_fpUNA32, "vneg", ".f32\t$Sd, $Sm",
+ [(set SPR:$Sd, (fneg SPR:$Sm))]> {
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines on A8.
+ let D = VFPNeonA8Domain;
+}
+
+multiclass vrint_inst_zrx<string opc, bit op, bit op2, SDPatternOperator node> {
+ def S : ASuI<0b11101, 0b11, 0b0110, 0b11, 0,
+ (outs SPR:$Sd), (ins SPR:$Sm),
+ NoItinerary, !strconcat("vrint", opc), ".f32\t$Sd, $Sm",
+ [(set (f32 SPR:$Sd), (node (f32 SPR:$Sm)))]>,
+ Requires<[HasFPARMv8]> {
+ let Inst{7} = op2;
+ let Inst{16} = op;
+ }
+ def D : ADuI<0b11101, 0b11, 0b0110, 0b11, 0,
+ (outs DPR:$Dd), (ins DPR:$Dm),
+ NoItinerary, !strconcat("vrint", opc), ".f64\t$Dd, $Dm",
+ [(set (f64 DPR:$Dd), (node (f64 DPR:$Dm)))]>,
+ Requires<[HasFPARMv8, HasDPVFP]> {
+ let Inst{7} = op2;
+ let Inst{16} = op;
+ }
+
+ def : InstAlias<!strconcat("vrint", opc, "$p.f32.f32\t$Sd, $Sm"),
+ (!cast<Instruction>(NAME#"S") SPR:$Sd, SPR:$Sm, pred:$p)>,
+ Requires<[HasFPARMv8]>;
+ def : InstAlias<!strconcat("vrint", opc, "$p.f64.f64\t$Dd, $Dm"),
+ (!cast<Instruction>(NAME#"D") DPR:$Dd, DPR:$Dm, pred:$p)>,
+ Requires<[HasFPARMv8,HasDPVFP]>;
+}
+
+defm VRINTZ : vrint_inst_zrx<"z", 0, 1, ftrunc>;
+defm VRINTR : vrint_inst_zrx<"r", 0, 0, fnearbyint>;
+defm VRINTX : vrint_inst_zrx<"x", 1, 0, frint>;
+
+multiclass vrint_inst_anpm<string opc, bits<2> rm,
+ SDPatternOperator node = null_frag> {
+ let PostEncoderMethod = "", DecoderNamespace = "VFPV8" in {
+ def S : ASuInp<0b11101, 0b11, 0b1000, 0b01, 0,
+ (outs SPR:$Sd), (ins SPR:$Sm),
+ NoItinerary, !strconcat("vrint", opc, ".f32\t$Sd, $Sm"),
+ [(set (f32 SPR:$Sd), (node (f32 SPR:$Sm)))]>,
+ Requires<[HasFPARMv8]> {
+ let Inst{17-16} = rm;
+ }
+ def D : ADuInp<0b11101, 0b11, 0b1000, 0b01, 0,
+ (outs DPR:$Dd), (ins DPR:$Dm),
+ NoItinerary, !strconcat("vrint", opc, ".f64\t$Dd, $Dm"),
+ [(set (f64 DPR:$Dd), (node (f64 DPR:$Dm)))]>,
+ Requires<[HasFPARMv8, HasDPVFP]> {
+ let Inst{17-16} = rm;
+ }
+ }
+
+ def : InstAlias<!strconcat("vrint", opc, ".f32.f32\t$Sd, $Sm"),
+ (!cast<Instruction>(NAME#"S") SPR:$Sd, SPR:$Sm)>,
+ Requires<[HasFPARMv8]>;
+ def : InstAlias<!strconcat("vrint", opc, ".f64.f64\t$Dd, $Dm"),
+ (!cast<Instruction>(NAME#"D") DPR:$Dd, DPR:$Dm)>,
+ Requires<[HasFPARMv8,HasDPVFP]>;
+}
+
+defm VRINTA : vrint_inst_anpm<"a", 0b00, frnd>;
+defm VRINTN : vrint_inst_anpm<"n", 0b01>;
+defm VRINTP : vrint_inst_anpm<"p", 0b10, fceil>;
+defm VRINTM : vrint_inst_anpm<"m", 0b11, ffloor>;
+
+def VSQRTD : ADuI<0b11101, 0b11, 0b0001, 0b11, 0,
+ (outs DPR:$Dd), (ins DPR:$Dm),
+ IIC_fpSQRT64, "vsqrt", ".f64\t$Dd, $Dm",
+ [(set DPR:$Dd, (fsqrt (f64 DPR:$Dm)))]>;
+
+def VSQRTS : ASuI<0b11101, 0b11, 0b0001, 0b11, 0,
+ (outs SPR:$Sd), (ins SPR:$Sm),
+ IIC_fpSQRT32, "vsqrt", ".f32\t$Sd, $Sm",
+ [(set SPR:$Sd, (fsqrt SPR:$Sm))]>;
+
+let hasSideEffects = 0 in {
+def VMOVD : ADuI<0b11101, 0b11, 0b0000, 0b01, 0,
+ (outs DPR:$Dd), (ins DPR:$Dm),
+ IIC_fpUNA64, "vmov", ".f64\t$Dd, $Dm", []>;
+
+def VMOVS : ASuI<0b11101, 0b11, 0b0000, 0b01, 0,
+ (outs SPR:$Sd), (ins SPR:$Sm),
+ IIC_fpUNA32, "vmov", ".f32\t$Sd, $Sm", []>;
+} // hasSideEffects
+
+//===----------------------------------------------------------------------===//
+// FP <-> GPR Copies. Int <-> FP Conversions.
+//
+
+def VMOVRS : AVConv2I<0b11100001, 0b1010,
+ (outs GPR:$Rt), (ins SPR:$Sn),
+ IIC_fpMOVSI, "vmov", "\t$Rt, $Sn",
+ [(set GPR:$Rt, (bitconvert SPR:$Sn))]> {
+ // Instruction operands.
+ bits<4> Rt;
+ bits<5> Sn;
+
+ // Encode instruction operands.
+ let Inst{19-16} = Sn{4-1};
+ let Inst{7} = Sn{0};
+ let Inst{15-12} = Rt;
+
+ let Inst{6-5} = 0b00;
+ let Inst{3-0} = 0b0000;
+
+ // Some single precision VFP instructions may be executed on both NEON and VFP
+ // pipelines.
+ let D = VFPNeonDomain;
+}
+
+// Bitcast i32 -> f32. NEON prefers to use VMOVDRR.
+def VMOVSR : AVConv4I<0b11100000, 0b1010,
+ (outs SPR:$Sn), (ins GPR:$Rt),
+ IIC_fpMOVIS, "vmov", "\t$Sn, $Rt",
+ [(set SPR:$Sn, (bitconvert GPR:$Rt))]>,
+ Requires<[HasVFP2, UseVMOVSR]> {
+ // Instruction operands.
+ bits<5> Sn;
+ bits<4> Rt;
+
+ // Encode instruction operands.
+ let Inst{19-16} = Sn{4-1};
+ let Inst{7} = Sn{0};
+ let Inst{15-12} = Rt;
+
+ let Inst{6-5} = 0b00;
+ let Inst{3-0} = 0b0000;
+
+ // Some single precision VFP instructions may be executed on both NEON and VFP
+ // pipelines.
+ let D = VFPNeonDomain;
+}
+
+let hasSideEffects = 0 in {
+def VMOVRRD : AVConv3I<0b11000101, 0b1011,
+ (outs GPR:$Rt, GPR:$Rt2), (ins DPR:$Dm),
+ IIC_fpMOVDI, "vmov", "\t$Rt, $Rt2, $Dm",
+ [/* FIXME: Can't write pattern for multiple result instr*/]> {
+ // Instruction operands.
+ bits<5> Dm;
+ bits<4> Rt;
+ bits<4> Rt2;
+
+ // Encode instruction operands.
+ let Inst{3-0} = Dm{3-0};
+ let Inst{5} = Dm{4};
+ let Inst{15-12} = Rt;
+ let Inst{19-16} = Rt2;
+
+ let Inst{7-6} = 0b00;
+
+ // Some single precision VFP instructions may be executed on both NEON and VFP
+ // pipelines.
+ let D = VFPNeonDomain;
+
+ // This instruction is equivalent to
+ // $Rt = EXTRACT_SUBREG $Dm, ssub_0
+ // $Rt2 = EXTRACT_SUBREG $Dm, ssub_1
+ let isExtractSubreg = 1;
+}
+
+def VMOVRRS : AVConv3I<0b11000101, 0b1010,
+ (outs GPR:$Rt, GPR:$Rt2), (ins SPR:$src1, SPR:$src2),
+ IIC_fpMOVDI, "vmov", "\t$Rt, $Rt2, $src1, $src2",
+ [/* For disassembly only; pattern left blank */]> {
+ bits<5> src1;
+ bits<4> Rt;
+ bits<4> Rt2;
+
+ // Encode instruction operands.
+ let Inst{3-0} = src1{4-1};
+ let Inst{5} = src1{0};
+ let Inst{15-12} = Rt;
+ let Inst{19-16} = Rt2;
+
+ let Inst{7-6} = 0b00;
+
+ // Some single precision VFP instructions may be executed on both NEON and VFP
+ // pipelines.
+ let D = VFPNeonDomain;
+ let DecoderMethod = "DecodeVMOVRRS";
+}
+} // hasSideEffects
+
+// FMDHR: GPR -> SPR
+// FMDLR: GPR -> SPR
+
+def VMOVDRR : AVConv5I<0b11000100, 0b1011,
+ (outs DPR:$Dm), (ins GPR:$Rt, GPR:$Rt2),
+ IIC_fpMOVID, "vmov", "\t$Dm, $Rt, $Rt2",
+ [(set DPR:$Dm, (arm_fmdrr GPR:$Rt, GPR:$Rt2))]> {
+ // Instruction operands.
+ bits<5> Dm;
+ bits<4> Rt;
+ bits<4> Rt2;
+
+ // Encode instruction operands.
+ let Inst{3-0} = Dm{3-0};
+ let Inst{5} = Dm{4};
+ let Inst{15-12} = Rt;
+ let Inst{19-16} = Rt2;
+
+ let Inst{7-6} = 0b00;
+
+ // Some single precision VFP instructions may be executed on both NEON and VFP
+ // pipelines.
+ let D = VFPNeonDomain;
+
+ // This instruction is equivalent to
+ // $Dm = REG_SEQUENCE $Rt, ssub_0, $Rt2, ssub_1
+ let isRegSequence = 1;
+}
+
+// Hoist an fabs or a fneg of a value coming from integer registers
+// and do the fabs/fneg on the integer value. This is never a lose
+// and could enable the conversion to float to be removed completely.
+def : Pat<(fabs (arm_fmdrr GPR:$Rl, GPR:$Rh)),
+ (VMOVDRR GPR:$Rl, (BFC GPR:$Rh, (i32 0x7FFFFFFF)))>,
+ Requires<[IsARM]>;
+def : Pat<(fabs (arm_fmdrr GPR:$Rl, GPR:$Rh)),
+ (VMOVDRR GPR:$Rl, (t2BFC GPR:$Rh, (i32 0x7FFFFFFF)))>,
+ Requires<[IsThumb2]>;
+def : Pat<(fneg (arm_fmdrr GPR:$Rl, GPR:$Rh)),
+ (VMOVDRR GPR:$Rl, (EORri GPR:$Rh, (i32 0x80000000)))>,
+ Requires<[IsARM]>;
+def : Pat<(fneg (arm_fmdrr GPR:$Rl, GPR:$Rh)),
+ (VMOVDRR GPR:$Rl, (t2EORri GPR:$Rh, (i32 0x80000000)))>,
+ Requires<[IsThumb2]>;
+
+let hasSideEffects = 0 in
+def VMOVSRR : AVConv5I<0b11000100, 0b1010,
+ (outs SPR:$dst1, SPR:$dst2), (ins GPR:$src1, GPR:$src2),
+ IIC_fpMOVID, "vmov", "\t$dst1, $dst2, $src1, $src2",
+ [/* For disassembly only; pattern left blank */]> {
+ // Instruction operands.
+ bits<5> dst1;
+ bits<4> src1;
+ bits<4> src2;
+
+ // Encode instruction operands.
+ let Inst{3-0} = dst1{4-1};
+ let Inst{5} = dst1{0};
+ let Inst{15-12} = src1;
+ let Inst{19-16} = src2;
+
+ let Inst{7-6} = 0b00;
+
+ // Some single precision VFP instructions may be executed on both NEON and VFP
+ // pipelines.
+ let D = VFPNeonDomain;
+
+ let DecoderMethod = "DecodeVMOVSRR";
+}
+
+// FMRDH: SPR -> GPR
+// FMRDL: SPR -> GPR
+// FMRRS: SPR -> GPR
+// FMRX: SPR system reg -> GPR
+// FMSRR: GPR -> SPR
+// FMXR: GPR -> VFP system reg
+
+
+// Int -> FP:
+
+class AVConv1IDs_Encode<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3,
+ bits<4> opcod4, dag oops, dag iops,
+ InstrItinClass itin, string opc, string asm,
+ list<dag> pattern>
+ : AVConv1I<opcod1, opcod2, opcod3, opcod4, oops, iops, itin, opc, asm,
+ pattern> {
+ // Instruction operands.
+ bits<5> Dd;
+ bits<5> Sm;
+
+ // Encode instruction operands.
+ let Inst{3-0} = Sm{4-1};
+ let Inst{5} = Sm{0};
+ let Inst{15-12} = Dd{3-0};
+ let Inst{22} = Dd{4};
+
+ let Predicates = [HasVFP2, HasDPVFP];
+}
+
+class AVConv1InSs_Encode<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3,
+ bits<4> opcod4, dag oops, dag iops,InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : AVConv1In<opcod1, opcod2, opcod3, opcod4, oops, iops, itin, opc, asm,
+ pattern> {
+ // Instruction operands.
+ bits<5> Sd;
+ bits<5> Sm;
+
+ // Encode instruction operands.
+ let Inst{3-0} = Sm{4-1};
+ let Inst{5} = Sm{0};
+ let Inst{15-12} = Sd{4-1};
+ let Inst{22} = Sd{0};
+}
+
+def VSITOD : AVConv1IDs_Encode<0b11101, 0b11, 0b1000, 0b1011,
+ (outs DPR:$Dd), (ins SPR:$Sm),
+ IIC_fpCVTID, "vcvt", ".f64.s32\t$Dd, $Sm",
+ []> {
+ let Inst{7} = 1; // s32
+}
+
+let Predicates=[HasVFP2, HasDPVFP] in {
+ def : VFPPat<(f64 (sint_to_fp GPR:$a)),
+ (VSITOD (COPY_TO_REGCLASS GPR:$a, SPR))>;
+
+ def : VFPPat<(f64 (sint_to_fp (i32 (alignedload32 addrmode5:$a)))),
+ (VSITOD (VLDRS addrmode5:$a))>;
+}
+
+def VSITOS : AVConv1InSs_Encode<0b11101, 0b11, 0b1000, 0b1010,
+ (outs SPR:$Sd),(ins SPR:$Sm),
+ IIC_fpCVTIS, "vcvt", ".f32.s32\t$Sd, $Sm",
+ []> {
+ let Inst{7} = 1; // s32
+
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines on A8.
+ let D = VFPNeonA8Domain;
+}
+
+def : VFPNoNEONPat<(f32 (sint_to_fp GPR:$a)),
+ (VSITOS (COPY_TO_REGCLASS GPR:$a, SPR))>;
+
+def : VFPNoNEONPat<(f32 (sint_to_fp (i32 (alignedload32 addrmode5:$a)))),
+ (VSITOS (VLDRS addrmode5:$a))>;
+
+def VUITOD : AVConv1IDs_Encode<0b11101, 0b11, 0b1000, 0b1011,
+ (outs DPR:$Dd), (ins SPR:$Sm),
+ IIC_fpCVTID, "vcvt", ".f64.u32\t$Dd, $Sm",
+ []> {
+ let Inst{7} = 0; // u32
+}
+
+let Predicates=[HasVFP2, HasDPVFP] in {
+ def : VFPPat<(f64 (uint_to_fp GPR:$a)),
+ (VUITOD (COPY_TO_REGCLASS GPR:$a, SPR))>;
+
+ def : VFPPat<(f64 (uint_to_fp (i32 (alignedload32 addrmode5:$a)))),
+ (VUITOD (VLDRS addrmode5:$a))>;
+}
+
+def VUITOS : AVConv1InSs_Encode<0b11101, 0b11, 0b1000, 0b1010,
+ (outs SPR:$Sd), (ins SPR:$Sm),
+ IIC_fpCVTIS, "vcvt", ".f32.u32\t$Sd, $Sm",
+ []> {
+ let Inst{7} = 0; // u32
+
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines on A8.
+ let D = VFPNeonA8Domain;
+}
+
+def : VFPNoNEONPat<(f32 (uint_to_fp GPR:$a)),
+ (VUITOS (COPY_TO_REGCLASS GPR:$a, SPR))>;
+
+def : VFPNoNEONPat<(f32 (uint_to_fp (i32 (alignedload32 addrmode5:$a)))),
+ (VUITOS (VLDRS addrmode5:$a))>;
+
+// FP -> Int:
+
+class AVConv1IsD_Encode<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3,
+ bits<4> opcod4, dag oops, dag iops,
+ InstrItinClass itin, string opc, string asm,
+ list<dag> pattern>
+ : AVConv1I<opcod1, opcod2, opcod3, opcod4, oops, iops, itin, opc, asm,
+ pattern> {
+ // Instruction operands.
+ bits<5> Sd;
+ bits<5> Dm;
+
+ // Encode instruction operands.
+ let Inst{3-0} = Dm{3-0};
+ let Inst{5} = Dm{4};
+ let Inst{15-12} = Sd{4-1};
+ let Inst{22} = Sd{0};
+
+ let Predicates = [HasVFP2, HasDPVFP];
+}
+
+class AVConv1InsS_Encode<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3,
+ bits<4> opcod4, dag oops, dag iops,
+ InstrItinClass itin, string opc, string asm,
+ list<dag> pattern>
+ : AVConv1In<opcod1, opcod2, opcod3, opcod4, oops, iops, itin, opc, asm,
+ pattern> {
+ // Instruction operands.
+ bits<5> Sd;
+ bits<5> Sm;
+
+ // Encode instruction operands.
+ let Inst{3-0} = Sm{4-1};
+ let Inst{5} = Sm{0};
+ let Inst{15-12} = Sd{4-1};
+ let Inst{22} = Sd{0};
+}
+
+// Always set Z bit in the instruction, i.e. "round towards zero" variants.
+def VTOSIZD : AVConv1IsD_Encode<0b11101, 0b11, 0b1101, 0b1011,
+ (outs SPR:$Sd), (ins DPR:$Dm),
+ IIC_fpCVTDI, "vcvt", ".s32.f64\t$Sd, $Dm",
+ []> {
+ let Inst{7} = 1; // Z bit
+}
+
+let Predicates=[HasVFP2, HasDPVFP] in {
+ def : VFPPat<(i32 (fp_to_sint (f64 DPR:$a))),
+ (COPY_TO_REGCLASS (VTOSIZD DPR:$a), GPR)>;
+
+ def : VFPPat<(alignedstore32 (i32 (fp_to_sint (f64 DPR:$a))), addrmode5:$ptr),
+ (VSTRS (VTOSIZD DPR:$a), addrmode5:$ptr)>;
+}
+
+def VTOSIZS : AVConv1InsS_Encode<0b11101, 0b11, 0b1101, 0b1010,
+ (outs SPR:$Sd), (ins SPR:$Sm),
+ IIC_fpCVTSI, "vcvt", ".s32.f32\t$Sd, $Sm",
+ []> {
+ let Inst{7} = 1; // Z bit
+
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines on A8.
+ let D = VFPNeonA8Domain;
+}
+
+def : VFPNoNEONPat<(i32 (fp_to_sint SPR:$a)),
+ (COPY_TO_REGCLASS (VTOSIZS SPR:$a), GPR)>;
+
+def : VFPNoNEONPat<(alignedstore32 (i32 (fp_to_sint (f32 SPR:$a))),
+ addrmode5:$ptr),
+ (VSTRS (VTOSIZS SPR:$a), addrmode5:$ptr)>;
+
+def VTOUIZD : AVConv1IsD_Encode<0b11101, 0b11, 0b1100, 0b1011,
+ (outs SPR:$Sd), (ins DPR:$Dm),
+ IIC_fpCVTDI, "vcvt", ".u32.f64\t$Sd, $Dm",
+ []> {
+ let Inst{7} = 1; // Z bit
+}
+
+let Predicates=[HasVFP2, HasDPVFP] in {
+ def : VFPPat<(i32 (fp_to_uint (f64 DPR:$a))),
+ (COPY_TO_REGCLASS (VTOUIZD DPR:$a), GPR)>;
+
+ def : VFPPat<(alignedstore32 (i32 (fp_to_uint (f64 DPR:$a))), addrmode5:$ptr),
+ (VSTRS (VTOUIZD DPR:$a), addrmode5:$ptr)>;
+}
+
+def VTOUIZS : AVConv1InsS_Encode<0b11101, 0b11, 0b1100, 0b1010,
+ (outs SPR:$Sd), (ins SPR:$Sm),
+ IIC_fpCVTSI, "vcvt", ".u32.f32\t$Sd, $Sm",
+ []> {
+ let Inst{7} = 1; // Z bit
+
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines on A8.
+ let D = VFPNeonA8Domain;
+}
+
+def : VFPNoNEONPat<(i32 (fp_to_uint SPR:$a)),
+ (COPY_TO_REGCLASS (VTOUIZS SPR:$a), GPR)>;
+
+def : VFPNoNEONPat<(alignedstore32 (i32 (fp_to_uint (f32 SPR:$a))),
+ addrmode5:$ptr),
+ (VSTRS (VTOUIZS SPR:$a), addrmode5:$ptr)>;
+
+// And the Z bit '0' variants, i.e. use the rounding mode specified by FPSCR.
+let Uses = [FPSCR] in {
+// FIXME: Verify encoding after integrated assembler is working.
+def VTOSIRD : AVConv1IsD_Encode<0b11101, 0b11, 0b1101, 0b1011,
+ (outs SPR:$Sd), (ins DPR:$Dm),
+ IIC_fpCVTDI, "vcvtr", ".s32.f64\t$Sd, $Dm",
+ [(set SPR:$Sd, (int_arm_vcvtr (f64 DPR:$Dm)))]>{
+ let Inst{7} = 0; // Z bit
+}
+
+def VTOSIRS : AVConv1InsS_Encode<0b11101, 0b11, 0b1101, 0b1010,
+ (outs SPR:$Sd), (ins SPR:$Sm),
+ IIC_fpCVTSI, "vcvtr", ".s32.f32\t$Sd, $Sm",
+ [(set SPR:$Sd, (int_arm_vcvtr SPR:$Sm))]> {
+ let Inst{7} = 0; // Z bit
+}
+
+def VTOUIRD : AVConv1IsD_Encode<0b11101, 0b11, 0b1100, 0b1011,
+ (outs SPR:$Sd), (ins DPR:$Dm),
+ IIC_fpCVTDI, "vcvtr", ".u32.f64\t$Sd, $Dm",
+ [(set SPR:$Sd, (int_arm_vcvtru(f64 DPR:$Dm)))]>{
+ let Inst{7} = 0; // Z bit
+}
+
+def VTOUIRS : AVConv1InsS_Encode<0b11101, 0b11, 0b1100, 0b1010,
+ (outs SPR:$Sd), (ins SPR:$Sm),
+ IIC_fpCVTSI, "vcvtr", ".u32.f32\t$Sd, $Sm",
+ [(set SPR:$Sd, (int_arm_vcvtru SPR:$Sm))]> {
+ let Inst{7} = 0; // Z bit
+}
+}
+
+// Convert between floating-point and fixed-point
+// Data type for fixed-point naming convention:
+// S16 (U=0, sx=0) -> SH
+// U16 (U=1, sx=0) -> UH
+// S32 (U=0, sx=1) -> SL
+// U32 (U=1, sx=1) -> UL
+
+let Constraints = "$a = $dst" in {
+
+// FP to Fixed-Point:
+
+// Single Precision register
+class AVConv1XInsS_Encode<bits<5> op1, bits<2> op2, bits<4> op3, bits<4> op4,
+ bit op5, dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : AVConv1XI<op1, op2, op3, op4, op5, oops, iops, itin, opc, asm, pattern>,
+ Sched<[WriteCvtFP]> {
+ bits<5> dst;
+ // if dp_operation then UInt(D:Vd) else UInt(Vd:D);
+ let Inst{22} = dst{0};
+ let Inst{15-12} = dst{4-1};
+}
+
+// Double Precision register
+class AVConv1XInsD_Encode<bits<5> op1, bits<2> op2, bits<4> op3, bits<4> op4,
+ bit op5, dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : AVConv1XI<op1, op2, op3, op4, op5, oops, iops, itin, opc, asm, pattern>,
+ Sched<[WriteCvtFP]> {
+ bits<5> dst;
+ // if dp_operation then UInt(D:Vd) else UInt(Vd:D);
+ let Inst{22} = dst{4};
+ let Inst{15-12} = dst{3-0};
+
+ let Predicates = [HasVFP2, HasDPVFP];
+}
+
+def VTOSHS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1110, 0b1010, 0,
+ (outs SPR:$dst), (ins SPR:$a, fbits16:$fbits),
+ IIC_fpCVTSI, "vcvt", ".s16.f32\t$dst, $a, $fbits", []> {
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines on A8.
+ let D = VFPNeonA8Domain;
+}
+
+def VTOUHS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1111, 0b1010, 0,
+ (outs SPR:$dst), (ins SPR:$a, fbits16:$fbits),
+ IIC_fpCVTSI, "vcvt", ".u16.f32\t$dst, $a, $fbits", []> {
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines on A8.
+ let D = VFPNeonA8Domain;
+}
+
+def VTOSLS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1110, 0b1010, 1,
+ (outs SPR:$dst), (ins SPR:$a, fbits32:$fbits),
+ IIC_fpCVTSI, "vcvt", ".s32.f32\t$dst, $a, $fbits", []> {
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines on A8.
+ let D = VFPNeonA8Domain;
+}
+
+def VTOULS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1111, 0b1010, 1,
+ (outs SPR:$dst), (ins SPR:$a, fbits32:$fbits),
+ IIC_fpCVTSI, "vcvt", ".u32.f32\t$dst, $a, $fbits", []> {
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines on A8.
+ let D = VFPNeonA8Domain;
+}
+
+def VTOSHD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1110, 0b1011, 0,
+ (outs DPR:$dst), (ins DPR:$a, fbits16:$fbits),
+ IIC_fpCVTDI, "vcvt", ".s16.f64\t$dst, $a, $fbits", []>;
+
+def VTOUHD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1111, 0b1011, 0,
+ (outs DPR:$dst), (ins DPR:$a, fbits16:$fbits),
+ IIC_fpCVTDI, "vcvt", ".u16.f64\t$dst, $a, $fbits", []>;
+
+def VTOSLD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1110, 0b1011, 1,
+ (outs DPR:$dst), (ins DPR:$a, fbits32:$fbits),
+ IIC_fpCVTDI, "vcvt", ".s32.f64\t$dst, $a, $fbits", []>;
+
+def VTOULD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1111, 0b1011, 1,
+ (outs DPR:$dst), (ins DPR:$a, fbits32:$fbits),
+ IIC_fpCVTDI, "vcvt", ".u32.f64\t$dst, $a, $fbits", []>;
+
+// Fixed-Point to FP:
+
+def VSHTOS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1010, 0b1010, 0,
+ (outs SPR:$dst), (ins SPR:$a, fbits16:$fbits),
+ IIC_fpCVTIS, "vcvt", ".f32.s16\t$dst, $a, $fbits", []> {
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines on A8.
+ let D = VFPNeonA8Domain;
+}
+
+def VUHTOS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1011, 0b1010, 0,
+ (outs SPR:$dst), (ins SPR:$a, fbits16:$fbits),
+ IIC_fpCVTIS, "vcvt", ".f32.u16\t$dst, $a, $fbits", []> {
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines on A8.
+ let D = VFPNeonA8Domain;
+}
+
+def VSLTOS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1010, 0b1010, 1,
+ (outs SPR:$dst), (ins SPR:$a, fbits32:$fbits),
+ IIC_fpCVTIS, "vcvt", ".f32.s32\t$dst, $a, $fbits", []> {
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines on A8.
+ let D = VFPNeonA8Domain;
+}
+
+def VULTOS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1011, 0b1010, 1,
+ (outs SPR:$dst), (ins SPR:$a, fbits32:$fbits),
+ IIC_fpCVTIS, "vcvt", ".f32.u32\t$dst, $a, $fbits", []> {
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines on A8.
+ let D = VFPNeonA8Domain;
+}
+
+def VSHTOD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1010, 0b1011, 0,
+ (outs DPR:$dst), (ins DPR:$a, fbits16:$fbits),
+ IIC_fpCVTID, "vcvt", ".f64.s16\t$dst, $a, $fbits", []>;
+
+def VUHTOD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1011, 0b1011, 0,
+ (outs DPR:$dst), (ins DPR:$a, fbits16:$fbits),
+ IIC_fpCVTID, "vcvt", ".f64.u16\t$dst, $a, $fbits", []>;
+
+def VSLTOD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1010, 0b1011, 1,
+ (outs DPR:$dst), (ins DPR:$a, fbits32:$fbits),
+ IIC_fpCVTID, "vcvt", ".f64.s32\t$dst, $a, $fbits", []>;
+
+def VULTOD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1011, 0b1011, 1,
+ (outs DPR:$dst), (ins DPR:$a, fbits32:$fbits),
+ IIC_fpCVTID, "vcvt", ".f64.u32\t$dst, $a, $fbits", []>;
+
+} // End of 'let Constraints = "$a = $dst" in'
+
+//===----------------------------------------------------------------------===//
+// FP Multiply-Accumulate Operations.
+//
+
+def VMLAD : ADbI<0b11100, 0b00, 0, 0,
+ (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm),
+ IIC_fpMAC64, "vmla", ".f64\t$Dd, $Dn, $Dm",
+ [(set DPR:$Dd, (fadd_mlx (fmul_su DPR:$Dn, DPR:$Dm),
+ (f64 DPR:$Ddin)))]>,
+ RegConstraint<"$Ddin = $Dd">,
+ Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>;
+
+def VMLAS : ASbIn<0b11100, 0b00, 0, 0,
+ (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
+ IIC_fpMAC32, "vmla", ".f32\t$Sd, $Sn, $Sm",
+ [(set SPR:$Sd, (fadd_mlx (fmul_su SPR:$Sn, SPR:$Sm),
+ SPR:$Sdin))]>,
+ RegConstraint<"$Sdin = $Sd">,
+ Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]> {
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines on A8.
+ let D = VFPNeonA8Domain;
+}
+
+def : Pat<(fadd_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))),
+ (VMLAD DPR:$dstin, DPR:$a, DPR:$b)>,
+ Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>;
+def : Pat<(fadd_mlx SPR:$dstin, (fmul_su SPR:$a, SPR:$b)),
+ (VMLAS SPR:$dstin, SPR:$a, SPR:$b)>,
+ Requires<[HasVFP2,DontUseNEONForFP, UseFPVMLx,DontUseFusedMAC]>;
+
+def VMLSD : ADbI<0b11100, 0b00, 1, 0,
+ (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm),
+ IIC_fpMAC64, "vmls", ".f64\t$Dd, $Dn, $Dm",
+ [(set DPR:$Dd, (fadd_mlx (fneg (fmul_su DPR:$Dn,DPR:$Dm)),
+ (f64 DPR:$Ddin)))]>,
+ RegConstraint<"$Ddin = $Dd">,
+ Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>;
+
+def VMLSS : ASbIn<0b11100, 0b00, 1, 0,
+ (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
+ IIC_fpMAC32, "vmls", ".f32\t$Sd, $Sn, $Sm",
+ [(set SPR:$Sd, (fadd_mlx (fneg (fmul_su SPR:$Sn, SPR:$Sm)),
+ SPR:$Sdin))]>,
+ RegConstraint<"$Sdin = $Sd">,
+ Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]> {
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines on A8.
+ let D = VFPNeonA8Domain;
+}
+
+def : Pat<(fsub_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))),
+ (VMLSD DPR:$dstin, DPR:$a, DPR:$b)>,
+ Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>;
+def : Pat<(fsub_mlx SPR:$dstin, (fmul_su SPR:$a, SPR:$b)),
+ (VMLSS SPR:$dstin, SPR:$a, SPR:$b)>,
+ Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]>;
+
+def VNMLAD : ADbI<0b11100, 0b01, 1, 0,
+ (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm),
+ IIC_fpMAC64, "vnmla", ".f64\t$Dd, $Dn, $Dm",
+ [(set DPR:$Dd,(fsub_mlx (fneg (fmul_su DPR:$Dn,DPR:$Dm)),
+ (f64 DPR:$Ddin)))]>,
+ RegConstraint<"$Ddin = $Dd">,
+ Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>;
+
+def VNMLAS : ASbI<0b11100, 0b01, 1, 0,
+ (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
+ IIC_fpMAC32, "vnmla", ".f32\t$Sd, $Sn, $Sm",
+ [(set SPR:$Sd, (fsub_mlx (fneg (fmul_su SPR:$Sn, SPR:$Sm)),
+ SPR:$Sdin))]>,
+ RegConstraint<"$Sdin = $Sd">,
+ Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]> {
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines on A8.
+ let D = VFPNeonA8Domain;
+}
+
+def : Pat<(fsub_mlx (fneg (fmul_su DPR:$a, (f64 DPR:$b))), DPR:$dstin),
+ (VNMLAD DPR:$dstin, DPR:$a, DPR:$b)>,
+ Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>;
+def : Pat<(fsub_mlx (fneg (fmul_su SPR:$a, SPR:$b)), SPR:$dstin),
+ (VNMLAS SPR:$dstin, SPR:$a, SPR:$b)>,
+ Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]>;
+
+def VNMLSD : ADbI<0b11100, 0b01, 0, 0,
+ (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm),
+ IIC_fpMAC64, "vnmls", ".f64\t$Dd, $Dn, $Dm",
+ [(set DPR:$Dd, (fsub_mlx (fmul_su DPR:$Dn, DPR:$Dm),
+ (f64 DPR:$Ddin)))]>,
+ RegConstraint<"$Ddin = $Dd">,
+ Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>;
+
+def VNMLSS : ASbI<0b11100, 0b01, 0, 0,
+ (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
+ IIC_fpMAC32, "vnmls", ".f32\t$Sd, $Sn, $Sm",
+ [(set SPR:$Sd, (fsub_mlx (fmul_su SPR:$Sn, SPR:$Sm), SPR:$Sdin))]>,
+ RegConstraint<"$Sdin = $Sd">,
+ Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]> {
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines on A8.
+ let D = VFPNeonA8Domain;
+}
+
+def : Pat<(fsub_mlx (fmul_su DPR:$a, (f64 DPR:$b)), DPR:$dstin),
+ (VNMLSD DPR:$dstin, DPR:$a, DPR:$b)>,
+ Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>;
+def : Pat<(fsub_mlx (fmul_su SPR:$a, SPR:$b), SPR:$dstin),
+ (VNMLSS SPR:$dstin, SPR:$a, SPR:$b)>,
+ Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]>;
+
+//===----------------------------------------------------------------------===//
+// Fused FP Multiply-Accumulate Operations.
+//
+def VFMAD : ADbI<0b11101, 0b10, 0, 0,
+ (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm),
+ IIC_fpFMAC64, "vfma", ".f64\t$Dd, $Dn, $Dm",
+ [(set DPR:$Dd, (fadd_mlx (fmul_su DPR:$Dn, DPR:$Dm),
+ (f64 DPR:$Ddin)))]>,
+ RegConstraint<"$Ddin = $Dd">,
+ Requires<[HasVFP4,HasDPVFP,UseFusedMAC]>;
+
+def VFMAS : ASbIn<0b11101, 0b10, 0, 0,
+ (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
+ IIC_fpFMAC32, "vfma", ".f32\t$Sd, $Sn, $Sm",
+ [(set SPR:$Sd, (fadd_mlx (fmul_su SPR:$Sn, SPR:$Sm),
+ SPR:$Sdin))]>,
+ RegConstraint<"$Sdin = $Sd">,
+ Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]> {
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines.
+}
+
+def : Pat<(fadd_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))),
+ (VFMAD DPR:$dstin, DPR:$a, DPR:$b)>,
+ Requires<[HasVFP4,HasDPVFP,UseFusedMAC]>;
+def : Pat<(fadd_mlx SPR:$dstin, (fmul_su SPR:$a, SPR:$b)),
+ (VFMAS SPR:$dstin, SPR:$a, SPR:$b)>,
+ Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]>;
+
+// Match @llvm.fma.* intrinsics
+// (fma x, y, z) -> (vfms z, x, y)
+def : Pat<(f64 (fma DPR:$Dn, DPR:$Dm, DPR:$Ddin)),
+ (VFMAD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>,
+ Requires<[HasVFP4,HasDPVFP]>;
+def : Pat<(f32 (fma SPR:$Sn, SPR:$Sm, SPR:$Sdin)),
+ (VFMAS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>,
+ Requires<[HasVFP4]>;
+
+def VFMSD : ADbI<0b11101, 0b10, 1, 0,
+ (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm),
+ IIC_fpFMAC64, "vfms", ".f64\t$Dd, $Dn, $Dm",
+ [(set DPR:$Dd, (fadd_mlx (fneg (fmul_su DPR:$Dn,DPR:$Dm)),
+ (f64 DPR:$Ddin)))]>,
+ RegConstraint<"$Ddin = $Dd">,
+ Requires<[HasVFP4,HasDPVFP,UseFusedMAC]>;
+
+def VFMSS : ASbIn<0b11101, 0b10, 1, 0,
+ (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
+ IIC_fpFMAC32, "vfms", ".f32\t$Sd, $Sn, $Sm",
+ [(set SPR:$Sd, (fadd_mlx (fneg (fmul_su SPR:$Sn, SPR:$Sm)),
+ SPR:$Sdin))]>,
+ RegConstraint<"$Sdin = $Sd">,
+ Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]> {
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines.
+}
+
+def : Pat<(fsub_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))),
+ (VFMSD DPR:$dstin, DPR:$a, DPR:$b)>,
+ Requires<[HasVFP4,HasDPVFP,UseFusedMAC]>;
+def : Pat<(fsub_mlx SPR:$dstin, (fmul_su SPR:$a, SPR:$b)),
+ (VFMSS SPR:$dstin, SPR:$a, SPR:$b)>,
+ Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]>;
+
+// Match @llvm.fma.* intrinsics
+// (fma (fneg x), y, z) -> (vfms z, x, y)
+def : Pat<(f64 (fma (fneg DPR:$Dn), DPR:$Dm, DPR:$Ddin)),
+ (VFMSD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>,
+ Requires<[HasVFP4,HasDPVFP]>;
+def : Pat<(f32 (fma (fneg SPR:$Sn), SPR:$Sm, SPR:$Sdin)),
+ (VFMSS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>,
+ Requires<[HasVFP4]>;
+// (fma x, (fneg y), z) -> (vfms z, x, y)
+def : Pat<(f64 (fma DPR:$Dn, (fneg DPR:$Dm), DPR:$Ddin)),
+ (VFMSD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>,
+ Requires<[HasVFP4,HasDPVFP]>;
+def : Pat<(f32 (fma SPR:$Sn, (fneg SPR:$Sm), SPR:$Sdin)),
+ (VFMSS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>,
+ Requires<[HasVFP4]>;
+
+def VFNMAD : ADbI<0b11101, 0b01, 1, 0,
+ (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm),
+ IIC_fpFMAC64, "vfnma", ".f64\t$Dd, $Dn, $Dm",
+ [(set DPR:$Dd,(fsub_mlx (fneg (fmul_su DPR:$Dn,DPR:$Dm)),
+ (f64 DPR:$Ddin)))]>,
+ RegConstraint<"$Ddin = $Dd">,
+ Requires<[HasVFP4,HasDPVFP,UseFusedMAC]>;
+
+def VFNMAS : ASbI<0b11101, 0b01, 1, 0,
+ (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
+ IIC_fpFMAC32, "vfnma", ".f32\t$Sd, $Sn, $Sm",
+ [(set SPR:$Sd, (fsub_mlx (fneg (fmul_su SPR:$Sn, SPR:$Sm)),
+ SPR:$Sdin))]>,
+ RegConstraint<"$Sdin = $Sd">,
+ Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]> {
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines.
+}
+
+def : Pat<(fsub_mlx (fneg (fmul_su DPR:$a, (f64 DPR:$b))), DPR:$dstin),
+ (VFNMAD DPR:$dstin, DPR:$a, DPR:$b)>,
+ Requires<[HasVFP4,HasDPVFP,UseFusedMAC]>;
+def : Pat<(fsub_mlx (fneg (fmul_su SPR:$a, SPR:$b)), SPR:$dstin),
+ (VFNMAS SPR:$dstin, SPR:$a, SPR:$b)>,
+ Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]>;
+
+// Match @llvm.fma.* intrinsics
+// (fneg (fma x, y, z)) -> (vfnma z, x, y)
+def : Pat<(fneg (fma (f64 DPR:$Dn), (f64 DPR:$Dm), (f64 DPR:$Ddin))),
+ (VFNMAD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>,
+ Requires<[HasVFP4,HasDPVFP]>;
+def : Pat<(fneg (fma (f32 SPR:$Sn), (f32 SPR:$Sm), (f32 SPR:$Sdin))),
+ (VFNMAS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>,
+ Requires<[HasVFP4]>;
+// (fma (fneg x), y, (fneg z)) -> (vfnma z, x, y)
+def : Pat<(f64 (fma (fneg DPR:$Dn), DPR:$Dm, (fneg DPR:$Ddin))),
+ (VFNMAD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>,
+ Requires<[HasVFP4,HasDPVFP]>;
+def : Pat<(f32 (fma (fneg SPR:$Sn), SPR:$Sm, (fneg SPR:$Sdin))),
+ (VFNMAS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>,
+ Requires<[HasVFP4]>;
+
+def VFNMSD : ADbI<0b11101, 0b01, 0, 0,
+ (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm),
+ IIC_fpFMAC64, "vfnms", ".f64\t$Dd, $Dn, $Dm",
+ [(set DPR:$Dd, (fsub_mlx (fmul_su DPR:$Dn, DPR:$Dm),
+ (f64 DPR:$Ddin)))]>,
+ RegConstraint<"$Ddin = $Dd">,
+ Requires<[HasVFP4,HasDPVFP,UseFusedMAC]>;
+
+def VFNMSS : ASbI<0b11101, 0b01, 0, 0,
+ (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
+ IIC_fpFMAC32, "vfnms", ".f32\t$Sd, $Sn, $Sm",
+ [(set SPR:$Sd, (fsub_mlx (fmul_su SPR:$Sn, SPR:$Sm), SPR:$Sdin))]>,
+ RegConstraint<"$Sdin = $Sd">,
+ Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]> {
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines.
+}
+
+def : Pat<(fsub_mlx (fmul_su DPR:$a, (f64 DPR:$b)), DPR:$dstin),
+ (VFNMSD DPR:$dstin, DPR:$a, DPR:$b)>,
+ Requires<[HasVFP4,HasDPVFP,UseFusedMAC]>;
+def : Pat<(fsub_mlx (fmul_su SPR:$a, SPR:$b), SPR:$dstin),
+ (VFNMSS SPR:$dstin, SPR:$a, SPR:$b)>,
+ Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]>;
+
+// Match @llvm.fma.* intrinsics
+
+// (fma x, y, (fneg z)) -> (vfnms z, x, y))
+def : Pat<(f64 (fma DPR:$Dn, DPR:$Dm, (fneg DPR:$Ddin))),
+ (VFNMSD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>,
+ Requires<[HasVFP4,HasDPVFP]>;
+def : Pat<(f32 (fma SPR:$Sn, SPR:$Sm, (fneg SPR:$Sdin))),
+ (VFNMSS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>,
+ Requires<[HasVFP4]>;
+// (fneg (fma (fneg x), y, z)) -> (vfnms z, x, y)
+def : Pat<(fneg (f64 (fma (fneg DPR:$Dn), DPR:$Dm, DPR:$Ddin))),
+ (VFNMSD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>,
+ Requires<[HasVFP4,HasDPVFP]>;
+def : Pat<(fneg (f32 (fma (fneg SPR:$Sn), SPR:$Sm, SPR:$Sdin))),
+ (VFNMSS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>,
+ Requires<[HasVFP4]>;
+// (fneg (fma x, (fneg y), z) -> (vfnms z, x, y)
+def : Pat<(fneg (f64 (fma DPR:$Dn, (fneg DPR:$Dm), DPR:$Ddin))),
+ (VFNMSD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>,
+ Requires<[HasVFP4,HasDPVFP]>;
+def : Pat<(fneg (f32 (fma SPR:$Sn, (fneg SPR:$Sm), SPR:$Sdin))),
+ (VFNMSS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>,
+ Requires<[HasVFP4]>;
+
+//===----------------------------------------------------------------------===//
+// FP Conditional moves.
+//
+
+let hasSideEffects = 0 in {
+def VMOVDcc : PseudoInst<(outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm, cmovpred:$p),
+ IIC_fpUNA64,
+ [(set (f64 DPR:$Dd),
+ (ARMcmov DPR:$Dn, DPR:$Dm, cmovpred:$p))]>,
+ RegConstraint<"$Dn = $Dd">, Requires<[HasVFP2,HasDPVFP]>;
+
+def VMOVScc : PseudoInst<(outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm, cmovpred:$p),
+ IIC_fpUNA32,
+ [(set (f32 SPR:$Sd),
+ (ARMcmov SPR:$Sn, SPR:$Sm, cmovpred:$p))]>,
+ RegConstraint<"$Sn = $Sd">, Requires<[HasVFP2]>;
+} // hasSideEffects
+
+//===----------------------------------------------------------------------===//
+// Move from VFP System Register to ARM core register.
+//
+
+class MovFromVFP<bits<4> opc19_16, dag oops, dag iops, string opc, string asm,
+ list<dag> pattern>:
+ VFPAI<oops, iops, VFPMiscFrm, IIC_fpSTAT, opc, asm, pattern> {
+
+ // Instruction operand.
+ bits<4> Rt;
+
+ let Inst{27-20} = 0b11101111;
+ let Inst{19-16} = opc19_16;
+ let Inst{15-12} = Rt;
+ let Inst{11-8} = 0b1010;
+ let Inst{7} = 0;
+ let Inst{6-5} = 0b00;
+ let Inst{4} = 1;
+ let Inst{3-0} = 0b0000;
+}
+
+// APSR is the application level alias of CPSR. This FPSCR N, Z, C, V flags
+// to APSR.
+let Defs = [CPSR], Uses = [FPSCR_NZCV], Rt = 0b1111 /* apsr_nzcv */ in
+def FMSTAT : MovFromVFP<0b0001 /* fpscr */, (outs), (ins),
+ "vmrs", "\tAPSR_nzcv, fpscr", [(arm_fmstat)]>;
+
+// Application level FPSCR -> GPR
+let hasSideEffects = 1, Uses = [FPSCR] in
+def VMRS : MovFromVFP<0b0001 /* fpscr */, (outs GPR:$Rt), (ins),
+ "vmrs", "\t$Rt, fpscr",
+ [(set GPR:$Rt, (int_arm_get_fpscr))]>;
+
+// System level FPEXC, FPSID -> GPR
+let Uses = [FPSCR] in {
+ def VMRS_FPEXC : MovFromVFP<0b1000 /* fpexc */, (outs GPR:$Rt), (ins),
+ "vmrs", "\t$Rt, fpexc", []>;
+ def VMRS_FPSID : MovFromVFP<0b0000 /* fpsid */, (outs GPR:$Rt), (ins),
+ "vmrs", "\t$Rt, fpsid", []>;
+ def VMRS_MVFR0 : MovFromVFP<0b0111 /* mvfr0 */, (outs GPR:$Rt), (ins),
+ "vmrs", "\t$Rt, mvfr0", []>;
+ def VMRS_MVFR1 : MovFromVFP<0b0110 /* mvfr1 */, (outs GPR:$Rt), (ins),
+ "vmrs", "\t$Rt, mvfr1", []>;
+ def VMRS_MVFR2 : MovFromVFP<0b0101 /* mvfr2 */, (outs GPR:$Rt), (ins),
+ "vmrs", "\t$Rt, mvfr2", []>, Requires<[HasFPARMv8]>;
+ def VMRS_FPINST : MovFromVFP<0b1001 /* fpinst */, (outs GPR:$Rt), (ins),
+ "vmrs", "\t$Rt, fpinst", []>;
+ def VMRS_FPINST2 : MovFromVFP<0b1010 /* fpinst2 */, (outs GPR:$Rt), (ins),
+ "vmrs", "\t$Rt, fpinst2", []>;
+}
+
+//===----------------------------------------------------------------------===//
+// Move from ARM core register to VFP System Register.
+//
+
+class MovToVFP<bits<4> opc19_16, dag oops, dag iops, string opc, string asm,
+ list<dag> pattern>:
+ VFPAI<oops, iops, VFPMiscFrm, IIC_fpSTAT, opc, asm, pattern> {
+
+ // Instruction operand.
+ bits<4> src;
+
+ // Encode instruction operand.
+ let Inst{15-12} = src;
+
+ let Inst{27-20} = 0b11101110;
+ let Inst{19-16} = opc19_16;
+ let Inst{11-8} = 0b1010;
+ let Inst{7} = 0;
+ let Inst{4} = 1;
+}
+
+let Defs = [FPSCR] in {
+ // Application level GPR -> FPSCR
+ def VMSR : MovToVFP<0b0001 /* fpscr */, (outs), (ins GPR:$src),
+ "vmsr", "\tfpscr, $src", [(int_arm_set_fpscr GPR:$src)]>;
+ // System level GPR -> FPEXC
+ def VMSR_FPEXC : MovToVFP<0b1000 /* fpexc */, (outs), (ins GPR:$src),
+ "vmsr", "\tfpexc, $src", []>;
+ // System level GPR -> FPSID
+ def VMSR_FPSID : MovToVFP<0b0000 /* fpsid */, (outs), (ins GPR:$src),
+ "vmsr", "\tfpsid, $src", []>;
+
+ def VMSR_FPINST : MovToVFP<0b1001 /* fpinst */, (outs), (ins GPR:$src),
+ "vmsr", "\tfpinst, $src", []>;
+ def VMSR_FPINST2 : MovToVFP<0b1010 /* fpinst2 */, (outs), (ins GPR:$src),
+ "vmsr", "\tfpinst2, $src", []>;
+}
+
+//===----------------------------------------------------------------------===//
+// Misc.
+//
+
+// Materialize FP immediates. VFP3 only.
+let isReMaterializable = 1 in {
+def FCONSTD : VFPAI<(outs DPR:$Dd), (ins vfp_f64imm:$imm),
+ VFPMiscFrm, IIC_fpUNA64,
+ "vmov", ".f64\t$Dd, $imm",
+ [(set DPR:$Dd, vfp_f64imm:$imm)]>,
+ Requires<[HasVFP3,HasDPVFP]> {
+ bits<5> Dd;
+ bits<8> imm;
+
+ let Inst{27-23} = 0b11101;
+ let Inst{22} = Dd{4};
+ let Inst{21-20} = 0b11;
+ let Inst{19-16} = imm{7-4};
+ let Inst{15-12} = Dd{3-0};
+ let Inst{11-9} = 0b101;
+ let Inst{8} = 1; // Double precision.
+ let Inst{7-4} = 0b0000;
+ let Inst{3-0} = imm{3-0};
+}
+
+def FCONSTS : VFPAI<(outs SPR:$Sd), (ins vfp_f32imm:$imm),
+ VFPMiscFrm, IIC_fpUNA32,
+ "vmov", ".f32\t$Sd, $imm",
+ [(set SPR:$Sd, vfp_f32imm:$imm)]>, Requires<[HasVFP3]> {
+ bits<5> Sd;
+ bits<8> imm;
+
+ let Inst{27-23} = 0b11101;
+ let Inst{22} = Sd{0};
+ let Inst{21-20} = 0b11;
+ let Inst{19-16} = imm{7-4};
+ let Inst{15-12} = Sd{4-1};
+ let Inst{11-9} = 0b101;
+ let Inst{8} = 0; // Single precision.
+ let Inst{7-4} = 0b0000;
+ let Inst{3-0} = imm{3-0};
+}
+}
+
+//===----------------------------------------------------------------------===//
+// Assembler aliases.
+//
+// A few mnemonic aliases for pre-unifixed syntax. We don't guarantee to
+// support them all, but supporting at least some of the basics is
+// good to be friendly.
+def : VFP2MnemonicAlias<"flds", "vldr">;
+def : VFP2MnemonicAlias<"fldd", "vldr">;
+def : VFP2MnemonicAlias<"fmrs", "vmov">;
+def : VFP2MnemonicAlias<"fmsr", "vmov">;
+def : VFP2MnemonicAlias<"fsqrts", "vsqrt">;
+def : VFP2MnemonicAlias<"fsqrtd", "vsqrt">;
+def : VFP2MnemonicAlias<"fadds", "vadd.f32">;
+def : VFP2MnemonicAlias<"faddd", "vadd.f64">;
+def : VFP2MnemonicAlias<"fmrdd", "vmov">;
+def : VFP2MnemonicAlias<"fmrds", "vmov">;
+def : VFP2MnemonicAlias<"fmrrd", "vmov">;
+def : VFP2MnemonicAlias<"fmdrr", "vmov">;
+def : VFP2MnemonicAlias<"fmuls", "vmul.f32">;
+def : VFP2MnemonicAlias<"fmuld", "vmul.f64">;
+def : VFP2MnemonicAlias<"fnegs", "vneg.f32">;
+def : VFP2MnemonicAlias<"fnegd", "vneg.f64">;
+def : VFP2MnemonicAlias<"ftosizd", "vcvt.s32.f64">;
+def : VFP2MnemonicAlias<"ftosid", "vcvtr.s32.f64">;
+def : VFP2MnemonicAlias<"ftosizs", "vcvt.s32.f32">;
+def : VFP2MnemonicAlias<"ftosis", "vcvtr.s32.f32">;
+def : VFP2MnemonicAlias<"ftouizd", "vcvt.u32.f64">;
+def : VFP2MnemonicAlias<"ftouid", "vcvtr.u32.f64">;
+def : VFP2MnemonicAlias<"ftouizs", "vcvt.u32.f32">;
+def : VFP2MnemonicAlias<"ftouis", "vcvtr.u32.f32">;
+def : VFP2MnemonicAlias<"fsitod", "vcvt.f64.s32">;
+def : VFP2MnemonicAlias<"fsitos", "vcvt.f32.s32">;
+def : VFP2MnemonicAlias<"fuitod", "vcvt.f64.u32">;
+def : VFP2MnemonicAlias<"fuitos", "vcvt.f32.u32">;
+def : VFP2MnemonicAlias<"fsts", "vstr">;
+def : VFP2MnemonicAlias<"fstd", "vstr">;
+def : VFP2MnemonicAlias<"fmacd", "vmla.f64">;
+def : VFP2MnemonicAlias<"fmacs", "vmla.f32">;
+def : VFP2MnemonicAlias<"fcpys", "vmov.f32">;
+def : VFP2MnemonicAlias<"fcpyd", "vmov.f64">;
+def : VFP2MnemonicAlias<"fcmps", "vcmp.f32">;
+def : VFP2MnemonicAlias<"fcmpd", "vcmp.f64">;
+def : VFP2MnemonicAlias<"fdivs", "vdiv.f32">;
+def : VFP2MnemonicAlias<"fdivd", "vdiv.f64">;
+def : VFP2MnemonicAlias<"fmrx", "vmrs">;
+def : VFP2MnemonicAlias<"fmxr", "vmsr">;
+
+// Be friendly and accept the old form of zero-compare
+def : VFP2DPInstAlias<"fcmpzd${p} $val", (VCMPZD DPR:$val, pred:$p)>;
+def : VFP2InstAlias<"fcmpzs${p} $val", (VCMPZS SPR:$val, pred:$p)>;
+
+
+def : VFP2InstAlias<"fmstat${p}", (FMSTAT pred:$p)>;
+def : VFP2InstAlias<"fadds${p} $Sd, $Sn, $Sm",
+ (VADDS SPR:$Sd, SPR:$Sn, SPR:$Sm, pred:$p)>;
+def : VFP2DPInstAlias<"faddd${p} $Dd, $Dn, $Dm",
+ (VADDD DPR:$Dd, DPR:$Dn, DPR:$Dm, pred:$p)>;
+def : VFP2InstAlias<"fsubs${p} $Sd, $Sn, $Sm",
+ (VSUBS SPR:$Sd, SPR:$Sn, SPR:$Sm, pred:$p)>;
+def : VFP2DPInstAlias<"fsubd${p} $Dd, $Dn, $Dm",
+ (VSUBD DPR:$Dd, DPR:$Dn, DPR:$Dm, pred:$p)>;
+
+// No need for the size suffix on VSQRT. It's implied by the register classes.
+def : VFP2InstAlias<"vsqrt${p} $Sd, $Sm", (VSQRTS SPR:$Sd, SPR:$Sm, pred:$p)>;
+def : VFP2DPInstAlias<"vsqrt${p} $Dd, $Dm", (VSQRTD DPR:$Dd, DPR:$Dm, pred:$p)>;
+
+// VLDR/VSTR accept an optional type suffix.
+def : VFP2InstAlias<"vldr${p}.32 $Sd, $addr",
+ (VLDRS SPR:$Sd, addrmode5:$addr, pred:$p)>;
+def : VFP2InstAlias<"vstr${p}.32 $Sd, $addr",
+ (VSTRS SPR:$Sd, addrmode5:$addr, pred:$p)>;
+def : VFP2InstAlias<"vldr${p}.64 $Dd, $addr",
+ (VLDRD DPR:$Dd, addrmode5:$addr, pred:$p)>;
+def : VFP2InstAlias<"vstr${p}.64 $Dd, $addr",
+ (VSTRD DPR:$Dd, addrmode5:$addr, pred:$p)>;
+
+// VMOV can accept optional 32-bit or less data type suffix suffix.
+def : VFP2InstAlias<"vmov${p}.8 $Rt, $Sn",
+ (VMOVRS GPR:$Rt, SPR:$Sn, pred:$p)>;
+def : VFP2InstAlias<"vmov${p}.16 $Rt, $Sn",
+ (VMOVRS GPR:$Rt, SPR:$Sn, pred:$p)>;
+def : VFP2InstAlias<"vmov${p}.32 $Rt, $Sn",
+ (VMOVRS GPR:$Rt, SPR:$Sn, pred:$p)>;
+def : VFP2InstAlias<"vmov${p}.8 $Sn, $Rt",
+ (VMOVSR SPR:$Sn, GPR:$Rt, pred:$p)>;
+def : VFP2InstAlias<"vmov${p}.16 $Sn, $Rt",
+ (VMOVSR SPR:$Sn, GPR:$Rt, pred:$p)>;
+def : VFP2InstAlias<"vmov${p}.32 $Sn, $Rt",
+ (VMOVSR SPR:$Sn, GPR:$Rt, pred:$p)>;
+
+def : VFP2InstAlias<"vmov${p}.f64 $Rt, $Rt2, $Dn",
+ (VMOVRRD GPR:$Rt, GPR:$Rt2, DPR:$Dn, pred:$p)>;
+def : VFP2InstAlias<"vmov${p}.f64 $Dn, $Rt, $Rt2",
+ (VMOVDRR DPR:$Dn, GPR:$Rt, GPR:$Rt2, pred:$p)>;
+
+// VMOVS doesn't need the .f32 to disambiguate from the NEON encoding the way
+// VMOVD does.
+def : VFP2InstAlias<"vmov${p} $Sd, $Sm",
+ (VMOVS SPR:$Sd, SPR:$Sm, pred:$p)>;
+
+// FCONSTD/FCONSTS alias for vmov.f64/vmov.f32
+// These aliases provide added functionality over vmov.f instructions by
+// allowing users to write assembly containing encoded floating point constants
+// (e.g. #0x70 vs #1.0). Without these alises there is no way for the
+// assembler to accept encoded fp constants (but the equivalent fp-literal is
+// accepted directly by vmovf).
+def : VFP3InstAlias<"fconstd${p} $Dd, $val",
+ (FCONSTD DPR:$Dd, vfp_f64imm:$val, pred:$p)>;
+def : VFP3InstAlias<"fconsts${p} $Sd, $val",
+ (FCONSTS SPR:$Sd, vfp_f32imm:$val, pred:$p)>;
diff --git a/contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
new file mode 100644
index 0000000..6e7e47b
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
@@ -0,0 +1,2338 @@
+//===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file This file contains a pass that performs load / store related peephole
+/// optimizations. This pass should be run after register allocation.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARM.h"
+#include "ARMBaseInstrInfo.h"
+#include "ARMBaseRegisterInfo.h"
+#include "ARMISelLowering.h"
+#include "ARMMachineFunctionInfo.h"
+#include "ARMSubtarget.h"
+#include "MCTargetDesc/ARMAddressingModes.h"
+#include "ThumbRegisterInfo.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterClassInfo.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/CodeGen/LivePhysRegs.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "arm-ldst-opt"
+
+STATISTIC(NumLDMGened , "Number of ldm instructions generated");
+STATISTIC(NumSTMGened , "Number of stm instructions generated");
+STATISTIC(NumVLDMGened, "Number of vldm instructions generated");
+STATISTIC(NumVSTMGened, "Number of vstm instructions generated");
+STATISTIC(NumLdStMoved, "Number of load / store instructions moved");
+STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation");
+STATISTIC(NumSTRDFormed,"Number of strd created before allocation");
+STATISTIC(NumLDRD2LDM, "Number of ldrd instructions turned back into ldm");
+STATISTIC(NumSTRD2STM, "Number of strd instructions turned back into stm");
+STATISTIC(NumLDRD2LDR, "Number of ldrd instructions turned back into ldr's");
+STATISTIC(NumSTRD2STR, "Number of strd instructions turned back into str's");
+
+namespace llvm {
+void initializeARMLoadStoreOptPass(PassRegistry &);
+}
+
+#define ARM_LOAD_STORE_OPT_NAME "ARM load / store optimization pass"
+
+namespace {
+ /// Post- register allocation pass the combine load / store instructions to
+ /// form ldm / stm instructions.
+ struct ARMLoadStoreOpt : public MachineFunctionPass {
+ static char ID;
+ ARMLoadStoreOpt() : MachineFunctionPass(ID) {
+ initializeARMLoadStoreOptPass(*PassRegistry::getPassRegistry());
+ }
+
+ const MachineFunction *MF;
+ const TargetInstrInfo *TII;
+ const TargetRegisterInfo *TRI;
+ const ARMSubtarget *STI;
+ const TargetLowering *TL;
+ ARMFunctionInfo *AFI;
+ LivePhysRegs LiveRegs;
+ RegisterClassInfo RegClassInfo;
+ MachineBasicBlock::const_iterator LiveRegPos;
+ bool LiveRegsValid;
+ bool RegClassInfoValid;
+ bool isThumb1, isThumb2;
+
+ bool runOnMachineFunction(MachineFunction &Fn) override;
+
+ const char *getPassName() const override {
+ return ARM_LOAD_STORE_OPT_NAME;
+ }
+
+ private:
+ /// A set of load/store MachineInstrs with same base register sorted by
+ /// offset.
+ struct MemOpQueueEntry {
+ MachineInstr *MI;
+ int Offset; ///< Load/Store offset.
+ unsigned Position; ///< Position as counted from end of basic block.
+ MemOpQueueEntry(MachineInstr *MI, int Offset, unsigned Position)
+ : MI(MI), Offset(Offset), Position(Position) {}
+ };
+ typedef SmallVector<MemOpQueueEntry,8> MemOpQueue;
+
+ /// A set of MachineInstrs that fulfill (nearly all) conditions to get
+ /// merged into a LDM/STM.
+ struct MergeCandidate {
+ /// List of instructions ordered by load/store offset.
+ SmallVector<MachineInstr*, 4> Instrs;
+ /// Index in Instrs of the instruction being latest in the schedule.
+ unsigned LatestMIIdx;
+ /// Index in Instrs of the instruction being earliest in the schedule.
+ unsigned EarliestMIIdx;
+ /// Index into the basic block where the merged instruction will be
+ /// inserted. (See MemOpQueueEntry.Position)
+ unsigned InsertPos;
+ /// Whether the instructions can be merged into a ldm/stm instruction.
+ bool CanMergeToLSMulti;
+ /// Whether the instructions can be merged into a ldrd/strd instruction.
+ bool CanMergeToLSDouble;
+ };
+ SpecificBumpPtrAllocator<MergeCandidate> Allocator;
+ SmallVector<const MergeCandidate*,4> Candidates;
+ SmallVector<MachineInstr*,4> MergeBaseCandidates;
+
+ void moveLiveRegsBefore(const MachineBasicBlock &MBB,
+ MachineBasicBlock::const_iterator Before);
+ unsigned findFreeReg(const TargetRegisterClass &RegClass);
+ void UpdateBaseRegUses(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ DebugLoc DL, unsigned Base, unsigned WordOffset,
+ ARMCC::CondCodes Pred, unsigned PredReg);
+ MachineInstr *CreateLoadStoreMulti(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator InsertBefore, int Offset, unsigned Base,
+ bool BaseKill, unsigned Opcode, ARMCC::CondCodes Pred, unsigned PredReg,
+ DebugLoc DL, ArrayRef<std::pair<unsigned, bool>> Regs);
+ MachineInstr *CreateLoadStoreDouble(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator InsertBefore, int Offset, unsigned Base,
+ bool BaseKill, unsigned Opcode, ARMCC::CondCodes Pred, unsigned PredReg,
+ DebugLoc DL, ArrayRef<std::pair<unsigned, bool>> Regs) const;
+ void FormCandidates(const MemOpQueue &MemOps);
+ MachineInstr *MergeOpsUpdate(const MergeCandidate &Cand);
+ bool FixInvalidRegPairOp(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI);
+ bool MergeBaseUpdateLoadStore(MachineInstr *MI);
+ bool MergeBaseUpdateLSMultiple(MachineInstr *MI);
+ bool MergeBaseUpdateLSDouble(MachineInstr &MI) const;
+ bool LoadStoreMultipleOpti(MachineBasicBlock &MBB);
+ bool MergeReturnIntoLDM(MachineBasicBlock &MBB);
+ bool CombineMovBx(MachineBasicBlock &MBB);
+ };
+ char ARMLoadStoreOpt::ID = 0;
+}
+
+INITIALIZE_PASS(ARMLoadStoreOpt, "arm-load-store-opt", ARM_LOAD_STORE_OPT_NAME, false, false)
+
+static bool definesCPSR(const MachineInstr *MI) {
+ for (const auto &MO : MI->operands()) {
+ if (!MO.isReg())
+ continue;
+ if (MO.isDef() && MO.getReg() == ARM::CPSR && !MO.isDead())
+ // If the instruction has live CPSR def, then it's not safe to fold it
+ // into load / store.
+ return true;
+ }
+
+ return false;
+}
+
+static int getMemoryOpOffset(const MachineInstr *MI) {
+ unsigned Opcode = MI->getOpcode();
+ bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD;
+ unsigned NumOperands = MI->getDesc().getNumOperands();
+ unsigned OffField = MI->getOperand(NumOperands-3).getImm();
+
+ if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 ||
+ Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 ||
+ Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8 ||
+ Opcode == ARM::LDRi12 || Opcode == ARM::STRi12)
+ return OffField;
+
+ // Thumb1 immediate offsets are scaled by 4
+ if (Opcode == ARM::tLDRi || Opcode == ARM::tSTRi ||
+ Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi)
+ return OffField * 4;
+
+ int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField)
+ : ARM_AM::getAM5Offset(OffField) * 4;
+ ARM_AM::AddrOpc Op = isAM3 ? ARM_AM::getAM3Op(OffField)
+ : ARM_AM::getAM5Op(OffField);
+
+ if (Op == ARM_AM::sub)
+ return -Offset;
+
+ return Offset;
+}
+
+static const MachineOperand &getLoadStoreBaseOp(const MachineInstr &MI) {
+ return MI.getOperand(1);
+}
+
+static const MachineOperand &getLoadStoreRegOp(const MachineInstr &MI) {
+ return MI.getOperand(0);
+}
+
+static int getLoadStoreMultipleOpcode(unsigned Opcode, ARM_AM::AMSubMode Mode) {
+ switch (Opcode) {
+ default: llvm_unreachable("Unhandled opcode!");
+ case ARM::LDRi12:
+ ++NumLDMGened;
+ switch (Mode) {
+ default: llvm_unreachable("Unhandled submode!");
+ case ARM_AM::ia: return ARM::LDMIA;
+ case ARM_AM::da: return ARM::LDMDA;
+ case ARM_AM::db: return ARM::LDMDB;
+ case ARM_AM::ib: return ARM::LDMIB;
+ }
+ case ARM::STRi12:
+ ++NumSTMGened;
+ switch (Mode) {
+ default: llvm_unreachable("Unhandled submode!");
+ case ARM_AM::ia: return ARM::STMIA;
+ case ARM_AM::da: return ARM::STMDA;
+ case ARM_AM::db: return ARM::STMDB;
+ case ARM_AM::ib: return ARM::STMIB;
+ }
+ case ARM::tLDRi:
+ case ARM::tLDRspi:
+ // tLDMIA is writeback-only - unless the base register is in the input
+ // reglist.
+ ++NumLDMGened;
+ switch (Mode) {
+ default: llvm_unreachable("Unhandled submode!");
+ case ARM_AM::ia: return ARM::tLDMIA;
+ }
+ case ARM::tSTRi:
+ case ARM::tSTRspi:
+ // There is no non-writeback tSTMIA either.
+ ++NumSTMGened;
+ switch (Mode) {
+ default: llvm_unreachable("Unhandled submode!");
+ case ARM_AM::ia: return ARM::tSTMIA_UPD;
+ }
+ case ARM::t2LDRi8:
+ case ARM::t2LDRi12:
+ ++NumLDMGened;
+ switch (Mode) {
+ default: llvm_unreachable("Unhandled submode!");
+ case ARM_AM::ia: return ARM::t2LDMIA;
+ case ARM_AM::db: return ARM::t2LDMDB;
+ }
+ case ARM::t2STRi8:
+ case ARM::t2STRi12:
+ ++NumSTMGened;
+ switch (Mode) {
+ default: llvm_unreachable("Unhandled submode!");
+ case ARM_AM::ia: return ARM::t2STMIA;
+ case ARM_AM::db: return ARM::t2STMDB;
+ }
+ case ARM::VLDRS:
+ ++NumVLDMGened;
+ switch (Mode) {
+ default: llvm_unreachable("Unhandled submode!");
+ case ARM_AM::ia: return ARM::VLDMSIA;
+ case ARM_AM::db: return 0; // Only VLDMSDB_UPD exists.
+ }
+ case ARM::VSTRS:
+ ++NumVSTMGened;
+ switch (Mode) {
+ default: llvm_unreachable("Unhandled submode!");
+ case ARM_AM::ia: return ARM::VSTMSIA;
+ case ARM_AM::db: return 0; // Only VSTMSDB_UPD exists.
+ }
+ case ARM::VLDRD:
+ ++NumVLDMGened;
+ switch (Mode) {
+ default: llvm_unreachable("Unhandled submode!");
+ case ARM_AM::ia: return ARM::VLDMDIA;
+ case ARM_AM::db: return 0; // Only VLDMDDB_UPD exists.
+ }
+ case ARM::VSTRD:
+ ++NumVSTMGened;
+ switch (Mode) {
+ default: llvm_unreachable("Unhandled submode!");
+ case ARM_AM::ia: return ARM::VSTMDIA;
+ case ARM_AM::db: return 0; // Only VSTMDDB_UPD exists.
+ }
+ }
+}
+
+static ARM_AM::AMSubMode getLoadStoreMultipleSubMode(unsigned Opcode) {
+ switch (Opcode) {
+ default: llvm_unreachable("Unhandled opcode!");
+ case ARM::LDMIA_RET:
+ case ARM::LDMIA:
+ case ARM::LDMIA_UPD:
+ case ARM::STMIA:
+ case ARM::STMIA_UPD:
+ case ARM::tLDMIA:
+ case ARM::tLDMIA_UPD:
+ case ARM::tSTMIA_UPD:
+ case ARM::t2LDMIA_RET:
+ case ARM::t2LDMIA:
+ case ARM::t2LDMIA_UPD:
+ case ARM::t2STMIA:
+ case ARM::t2STMIA_UPD:
+ case ARM::VLDMSIA:
+ case ARM::VLDMSIA_UPD:
+ case ARM::VSTMSIA:
+ case ARM::VSTMSIA_UPD:
+ case ARM::VLDMDIA:
+ case ARM::VLDMDIA_UPD:
+ case ARM::VSTMDIA:
+ case ARM::VSTMDIA_UPD:
+ return ARM_AM::ia;
+
+ case ARM::LDMDA:
+ case ARM::LDMDA_UPD:
+ case ARM::STMDA:
+ case ARM::STMDA_UPD:
+ return ARM_AM::da;
+
+ case ARM::LDMDB:
+ case ARM::LDMDB_UPD:
+ case ARM::STMDB:
+ case ARM::STMDB_UPD:
+ case ARM::t2LDMDB:
+ case ARM::t2LDMDB_UPD:
+ case ARM::t2STMDB:
+ case ARM::t2STMDB_UPD:
+ case ARM::VLDMSDB_UPD:
+ case ARM::VSTMSDB_UPD:
+ case ARM::VLDMDDB_UPD:
+ case ARM::VSTMDDB_UPD:
+ return ARM_AM::db;
+
+ case ARM::LDMIB:
+ case ARM::LDMIB_UPD:
+ case ARM::STMIB:
+ case ARM::STMIB_UPD:
+ return ARM_AM::ib;
+ }
+}
+
+static bool isT1i32Load(unsigned Opc) {
+ return Opc == ARM::tLDRi || Opc == ARM::tLDRspi;
+}
+
+static bool isT2i32Load(unsigned Opc) {
+ return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8;
+}
+
+static bool isi32Load(unsigned Opc) {
+ return Opc == ARM::LDRi12 || isT1i32Load(Opc) || isT2i32Load(Opc) ;
+}
+
+static bool isT1i32Store(unsigned Opc) {
+ return Opc == ARM::tSTRi || Opc == ARM::tSTRspi;
+}
+
+static bool isT2i32Store(unsigned Opc) {
+ return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8;
+}
+
+static bool isi32Store(unsigned Opc) {
+ return Opc == ARM::STRi12 || isT1i32Store(Opc) || isT2i32Store(Opc);
+}
+
+static bool isLoadSingle(unsigned Opc) {
+ return isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD;
+}
+
+static unsigned getImmScale(unsigned Opc) {
+ switch (Opc) {
+ default: llvm_unreachable("Unhandled opcode!");
+ case ARM::tLDRi:
+ case ARM::tSTRi:
+ case ARM::tLDRspi:
+ case ARM::tSTRspi:
+ return 1;
+ case ARM::tLDRHi:
+ case ARM::tSTRHi:
+ return 2;
+ case ARM::tLDRBi:
+ case ARM::tSTRBi:
+ return 4;
+ }
+}
+
+static unsigned getLSMultipleTransferSize(const MachineInstr *MI) {
+ switch (MI->getOpcode()) {
+ default: return 0;
+ case ARM::LDRi12:
+ case ARM::STRi12:
+ case ARM::tLDRi:
+ case ARM::tSTRi:
+ case ARM::tLDRspi:
+ case ARM::tSTRspi:
+ case ARM::t2LDRi8:
+ case ARM::t2LDRi12:
+ case ARM::t2STRi8:
+ case ARM::t2STRi12:
+ case ARM::VLDRS:
+ case ARM::VSTRS:
+ return 4;
+ case ARM::VLDRD:
+ case ARM::VSTRD:
+ return 8;
+ case ARM::LDMIA:
+ case ARM::LDMDA:
+ case ARM::LDMDB:
+ case ARM::LDMIB:
+ case ARM::STMIA:
+ case ARM::STMDA:
+ case ARM::STMDB:
+ case ARM::STMIB:
+ case ARM::tLDMIA:
+ case ARM::tLDMIA_UPD:
+ case ARM::tSTMIA_UPD:
+ case ARM::t2LDMIA:
+ case ARM::t2LDMDB:
+ case ARM::t2STMIA:
+ case ARM::t2STMDB:
+ case ARM::VLDMSIA:
+ case ARM::VSTMSIA:
+ return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 4;
+ case ARM::VLDMDIA:
+ case ARM::VSTMDIA:
+ return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 8;
+ }
+}
+
+/// Update future uses of the base register with the offset introduced
+/// due to writeback. This function only works on Thumb1.
+void
+ARMLoadStoreOpt::UpdateBaseRegUses(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ DebugLoc DL, unsigned Base,
+ unsigned WordOffset,
+ ARMCC::CondCodes Pred, unsigned PredReg) {
+ assert(isThumb1 && "Can only update base register uses for Thumb1!");
+ // Start updating any instructions with immediate offsets. Insert a SUB before
+ // the first non-updateable instruction (if any).
+ for (; MBBI != MBB.end(); ++MBBI) {
+ bool InsertSub = false;
+ unsigned Opc = MBBI->getOpcode();
+
+ if (MBBI->readsRegister(Base)) {
+ int Offset;
+ bool IsLoad =
+ Opc == ARM::tLDRi || Opc == ARM::tLDRHi || Opc == ARM::tLDRBi;
+ bool IsStore =
+ Opc == ARM::tSTRi || Opc == ARM::tSTRHi || Opc == ARM::tSTRBi;
+
+ if (IsLoad || IsStore) {
+ // Loads and stores with immediate offsets can be updated, but only if
+ // the new offset isn't negative.
+ // The MachineOperand containing the offset immediate is the last one
+ // before predicates.
+ MachineOperand &MO =
+ MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3);
+ // The offsets are scaled by 1, 2 or 4 depending on the Opcode.
+ Offset = MO.getImm() - WordOffset * getImmScale(Opc);
+
+ // If storing the base register, it needs to be reset first.
+ unsigned InstrSrcReg = getLoadStoreRegOp(*MBBI).getReg();
+
+ if (Offset >= 0 && !(IsStore && InstrSrcReg == Base))
+ MO.setImm(Offset);
+ else
+ InsertSub = true;
+
+ } else if ((Opc == ARM::tSUBi8 || Opc == ARM::tADDi8) &&
+ !definesCPSR(MBBI)) {
+ // SUBS/ADDS using this register, with a dead def of the CPSR.
+ // Merge it with the update; if the merged offset is too large,
+ // insert a new sub instead.
+ MachineOperand &MO =
+ MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3);
+ Offset = (Opc == ARM::tSUBi8) ?
+ MO.getImm() + WordOffset * 4 :
+ MO.getImm() - WordOffset * 4 ;
+ if (Offset >= 0 && TL->isLegalAddImmediate(Offset)) {
+ // FIXME: Swap ADDS<->SUBS if Offset < 0, erase instruction if
+ // Offset == 0.
+ MO.setImm(Offset);
+ // The base register has now been reset, so exit early.
+ return;
+ } else {
+ InsertSub = true;
+ }
+
+ } else {
+ // Can't update the instruction.
+ InsertSub = true;
+ }
+
+ } else if (definesCPSR(MBBI) || MBBI->isCall() || MBBI->isBranch()) {
+ // Since SUBS sets the condition flags, we can't place the base reset
+ // after an instruction that has a live CPSR def.
+ // The base register might also contain an argument for a function call.
+ InsertSub = true;
+ }
+
+ if (InsertSub) {
+ // An instruction above couldn't be updated, so insert a sub.
+ AddDefaultT1CC(BuildMI(MBB, MBBI, DL, TII->get(ARM::tSUBi8), Base), true)
+ .addReg(Base).addImm(WordOffset * 4).addImm(Pred).addReg(PredReg);
+ return;
+ }
+
+ if (MBBI->killsRegister(Base) || MBBI->definesRegister(Base))
+ // Register got killed. Stop updating.
+ return;
+ }
+
+ // End of block was reached.
+ if (MBB.succ_size() > 0) {
+ // FIXME: Because of a bug, live registers are sometimes missing from
+ // the successor blocks' live-in sets. This means we can't trust that
+ // information and *always* have to reset at the end of a block.
+ // See PR21029.
+ if (MBBI != MBB.end()) --MBBI;
+ AddDefaultT1CC(
+ BuildMI(MBB, MBBI, DL, TII->get(ARM::tSUBi8), Base), true)
+ .addReg(Base).addImm(WordOffset * 4).addImm(Pred).addReg(PredReg);
+ }
+}
+
+/// Return the first register of class \p RegClass that is not in \p Regs.
+unsigned ARMLoadStoreOpt::findFreeReg(const TargetRegisterClass &RegClass) {
+ if (!RegClassInfoValid) {
+ RegClassInfo.runOnMachineFunction(*MF);
+ RegClassInfoValid = true;
+ }
+
+ for (unsigned Reg : RegClassInfo.getOrder(&RegClass))
+ if (!LiveRegs.contains(Reg))
+ return Reg;
+ return 0;
+}
+
+/// Compute live registers just before instruction \p Before (in normal schedule
+/// direction). Computes backwards so multiple queries in the same block must
+/// come in reverse order.
+void ARMLoadStoreOpt::moveLiveRegsBefore(const MachineBasicBlock &MBB,
+ MachineBasicBlock::const_iterator Before) {
+ // Initialize if we never queried in this block.
+ if (!LiveRegsValid) {
+ LiveRegs.init(TRI);
+ LiveRegs.addLiveOuts(&MBB, true);
+ LiveRegPos = MBB.end();
+ LiveRegsValid = true;
+ }
+ // Move backward just before the "Before" position.
+ while (LiveRegPos != Before) {
+ --LiveRegPos;
+ LiveRegs.stepBackward(*LiveRegPos);
+ }
+}
+
+static bool ContainsReg(const ArrayRef<std::pair<unsigned, bool>> &Regs,
+ unsigned Reg) {
+ for (const std::pair<unsigned, bool> &R : Regs)
+ if (R.first == Reg)
+ return true;
+ return false;
+}
+
+/// Create and insert a LDM or STM with Base as base register and registers in
+/// Regs as the register operands that would be loaded / stored. It returns
+/// true if the transformation is done.
+MachineInstr *ARMLoadStoreOpt::CreateLoadStoreMulti(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator InsertBefore, int Offset, unsigned Base,
+ bool BaseKill, unsigned Opcode, ARMCC::CondCodes Pred, unsigned PredReg,
+ DebugLoc DL, ArrayRef<std::pair<unsigned, bool>> Regs) {
+ unsigned NumRegs = Regs.size();
+ assert(NumRegs > 1);
+
+ // For Thumb1 targets, it might be necessary to clobber the CPSR to merge.
+ // Compute liveness information for that register to make the decision.
+ bool SafeToClobberCPSR = !isThumb1 ||
+ (MBB.computeRegisterLiveness(TRI, ARM::CPSR, InsertBefore, 20) ==
+ MachineBasicBlock::LQR_Dead);
+
+ bool Writeback = isThumb1; // Thumb1 LDM/STM have base reg writeback.
+
+ // Exception: If the base register is in the input reglist, Thumb1 LDM is
+ // non-writeback.
+ // It's also not possible to merge an STR of the base register in Thumb1.
+ if (isThumb1 && isi32Load(Opcode) && ContainsReg(Regs, Base)) {
+ assert(Base != ARM::SP && "Thumb1 does not allow SP in register list");
+ if (Opcode == ARM::tLDRi) {
+ Writeback = false;
+ } else if (Opcode == ARM::tSTRi) {
+ return nullptr;
+ }
+ }
+
+ ARM_AM::AMSubMode Mode = ARM_AM::ia;
+ // VFP and Thumb2 do not support IB or DA modes. Thumb1 only supports IA.
+ bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
+ bool haveIBAndDA = isNotVFP && !isThumb2 && !isThumb1;
+
+ if (Offset == 4 && haveIBAndDA) {
+ Mode = ARM_AM::ib;
+ } else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA) {
+ Mode = ARM_AM::da;
+ } else if (Offset == -4 * (int)NumRegs && isNotVFP && !isThumb1) {
+ // VLDM/VSTM do not support DB mode without also updating the base reg.
+ Mode = ARM_AM::db;
+ } else if (Offset != 0 || Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi) {
+ // Check if this is a supported opcode before inserting instructions to
+ // calculate a new base register.
+ if (!getLoadStoreMultipleOpcode(Opcode, Mode)) return nullptr;
+
+ // If starting offset isn't zero, insert a MI to materialize a new base.
+ // But only do so if it is cost effective, i.e. merging more than two
+ // loads / stores.
+ if (NumRegs <= 2)
+ return nullptr;
+
+ // On Thumb1, it's not worth materializing a new base register without
+ // clobbering the CPSR (i.e. not using ADDS/SUBS).
+ if (!SafeToClobberCPSR)
+ return nullptr;
+
+ unsigned NewBase;
+ if (isi32Load(Opcode)) {
+ // If it is a load, then just use one of the destination registers
+ // as the new base. Will no longer be writeback in Thumb1.
+ NewBase = Regs[NumRegs-1].first;
+ Writeback = false;
+ } else {
+ // Find a free register that we can use as scratch register.
+ moveLiveRegsBefore(MBB, InsertBefore);
+ // The merged instruction does not exist yet but will use several Regs if
+ // it is a Store.
+ if (!isLoadSingle(Opcode))
+ for (const std::pair<unsigned, bool> &R : Regs)
+ LiveRegs.addReg(R.first);
+
+ NewBase = findFreeReg(isThumb1 ? ARM::tGPRRegClass : ARM::GPRRegClass);
+ if (NewBase == 0)
+ return nullptr;
+ }
+
+ int BaseOpc =
+ isThumb2 ? ARM::t2ADDri :
+ (isThumb1 && Base == ARM::SP) ? ARM::tADDrSPi :
+ (isThumb1 && Offset < 8) ? ARM::tADDi3 :
+ isThumb1 ? ARM::tADDi8 : ARM::ADDri;
+
+ if (Offset < 0) {
+ Offset = - Offset;
+ BaseOpc =
+ isThumb2 ? ARM::t2SUBri :
+ (isThumb1 && Offset < 8 && Base != ARM::SP) ? ARM::tSUBi3 :
+ isThumb1 ? ARM::tSUBi8 : ARM::SUBri;
+ }
+
+ if (!TL->isLegalAddImmediate(Offset))
+ // FIXME: Try add with register operand?
+ return nullptr; // Probably not worth it then.
+
+ // We can only append a kill flag to the add/sub input if the value is not
+ // used in the register list of the stm as well.
+ bool KillOldBase = BaseKill &&
+ (!isi32Store(Opcode) || !ContainsReg(Regs, Base));
+
+ if (isThumb1) {
+ // Thumb1: depending on immediate size, use either
+ // ADDS NewBase, Base, #imm3
+ // or
+ // MOV NewBase, Base
+ // ADDS NewBase, #imm8.
+ if (Base != NewBase &&
+ (BaseOpc == ARM::tADDi8 || BaseOpc == ARM::tSUBi8)) {
+ // Need to insert a MOV to the new base first.
+ if (isARMLowRegister(NewBase) && isARMLowRegister(Base) &&
+ !STI->hasV6Ops()) {
+ // thumbv4t doesn't have lo->lo copies, and we can't predicate tMOVSr
+ if (Pred != ARMCC::AL)
+ return nullptr;
+ BuildMI(MBB, InsertBefore, DL, TII->get(ARM::tMOVSr), NewBase)
+ .addReg(Base, getKillRegState(KillOldBase));
+ } else
+ BuildMI(MBB, InsertBefore, DL, TII->get(ARM::tMOVr), NewBase)
+ .addReg(Base, getKillRegState(KillOldBase))
+ .addImm(Pred).addReg(PredReg);
+
+ // The following ADDS/SUBS becomes an update.
+ Base = NewBase;
+ KillOldBase = true;
+ }
+ if (BaseOpc == ARM::tADDrSPi) {
+ assert(Offset % 4 == 0 && "tADDrSPi offset is scaled by 4");
+ BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase)
+ .addReg(Base, getKillRegState(KillOldBase)).addImm(Offset/4)
+ .addImm(Pred).addReg(PredReg);
+ } else
+ AddDefaultT1CC(
+ BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase), true)
+ .addReg(Base, getKillRegState(KillOldBase)).addImm(Offset)
+ .addImm(Pred).addReg(PredReg);
+ } else {
+ BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase)
+ .addReg(Base, getKillRegState(KillOldBase)).addImm(Offset)
+ .addImm(Pred).addReg(PredReg).addReg(0);
+ }
+ Base = NewBase;
+ BaseKill = true; // New base is always killed straight away.
+ }
+
+ bool isDef = isLoadSingle(Opcode);
+
+ // Get LS multiple opcode. Note that for Thumb1 this might be an opcode with
+ // base register writeback.
+ Opcode = getLoadStoreMultipleOpcode(Opcode, Mode);
+ if (!Opcode)
+ return nullptr;
+
+ // Check if a Thumb1 LDM/STM merge is safe. This is the case if:
+ // - There is no writeback (LDM of base register),
+ // - the base register is killed by the merged instruction,
+ // - or it's safe to overwrite the condition flags, i.e. to insert a SUBS
+ // to reset the base register.
+ // Otherwise, don't merge.
+ // It's safe to return here since the code to materialize a new base register
+ // above is also conditional on SafeToClobberCPSR.
+ if (isThumb1 && !SafeToClobberCPSR && Writeback && !BaseKill)
+ return nullptr;
+
+ MachineInstrBuilder MIB;
+
+ if (Writeback) {
+ assert(isThumb1 && "expected Writeback only inThumb1");
+ if (Opcode == ARM::tLDMIA) {
+ assert(!(ContainsReg(Regs, Base)) && "Thumb1 can't LDM ! with Base in Regs");
+ // Update tLDMIA with writeback if necessary.
+ Opcode = ARM::tLDMIA_UPD;
+ }
+
+ MIB = BuildMI(MBB, InsertBefore, DL, TII->get(Opcode));
+
+ // Thumb1: we might need to set base writeback when building the MI.
+ MIB.addReg(Base, getDefRegState(true))
+ .addReg(Base, getKillRegState(BaseKill));
+
+ // The base isn't dead after a merged instruction with writeback.
+ // Insert a sub instruction after the newly formed instruction to reset.
+ if (!BaseKill)
+ UpdateBaseRegUses(MBB, InsertBefore, DL, Base, NumRegs, Pred, PredReg);
+
+ } else {
+ // No writeback, simply build the MachineInstr.
+ MIB = BuildMI(MBB, InsertBefore, DL, TII->get(Opcode));
+ MIB.addReg(Base, getKillRegState(BaseKill));
+ }
+
+ MIB.addImm(Pred).addReg(PredReg);
+
+ for (const std::pair<unsigned, bool> &R : Regs)
+ MIB.addReg(R.first, getDefRegState(isDef) | getKillRegState(R.second));
+
+ return MIB.getInstr();
+}
+
+MachineInstr *ARMLoadStoreOpt::CreateLoadStoreDouble(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator InsertBefore, int Offset, unsigned Base,
+ bool BaseKill, unsigned Opcode, ARMCC::CondCodes Pred, unsigned PredReg,
+ DebugLoc DL, ArrayRef<std::pair<unsigned, bool>> Regs) const {
+ bool IsLoad = isi32Load(Opcode);
+ assert((IsLoad || isi32Store(Opcode)) && "Must have integer load or store");
+ unsigned LoadStoreOpcode = IsLoad ? ARM::t2LDRDi8 : ARM::t2STRDi8;
+
+ assert(Regs.size() == 2);
+ MachineInstrBuilder MIB = BuildMI(MBB, InsertBefore, DL,
+ TII->get(LoadStoreOpcode));
+ if (IsLoad) {
+ MIB.addReg(Regs[0].first, RegState::Define)
+ .addReg(Regs[1].first, RegState::Define);
+ } else {
+ MIB.addReg(Regs[0].first, getKillRegState(Regs[0].second))
+ .addReg(Regs[1].first, getKillRegState(Regs[1].second));
+ }
+ MIB.addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
+ return MIB.getInstr();
+}
+
+/// Call MergeOps and update MemOps and merges accordingly on success.
+MachineInstr *ARMLoadStoreOpt::MergeOpsUpdate(const MergeCandidate &Cand) {
+ const MachineInstr *First = Cand.Instrs.front();
+ unsigned Opcode = First->getOpcode();
+ bool IsLoad = isLoadSingle(Opcode);
+ SmallVector<std::pair<unsigned, bool>, 8> Regs;
+ SmallVector<unsigned, 4> ImpDefs;
+ DenseSet<unsigned> KilledRegs;
+ DenseSet<unsigned> UsedRegs;
+ // Determine list of registers and list of implicit super-register defs.
+ for (const MachineInstr *MI : Cand.Instrs) {
+ const MachineOperand &MO = getLoadStoreRegOp(*MI);
+ unsigned Reg = MO.getReg();
+ bool IsKill = MO.isKill();
+ if (IsKill)
+ KilledRegs.insert(Reg);
+ Regs.push_back(std::make_pair(Reg, IsKill));
+ UsedRegs.insert(Reg);
+
+ if (IsLoad) {
+ // Collect any implicit defs of super-registers, after merging we can't
+ // be sure anymore that we properly preserved these live ranges and must
+ // removed these implicit operands.
+ for (const MachineOperand &MO : MI->implicit_operands()) {
+ if (!MO.isReg() || !MO.isDef() || MO.isDead())
+ continue;
+ assert(MO.isImplicit());
+ unsigned DefReg = MO.getReg();
+
+ if (std::find(ImpDefs.begin(), ImpDefs.end(), DefReg) != ImpDefs.end())
+ continue;
+ // We can ignore cases where the super-reg is read and written.
+ if (MI->readsRegister(DefReg))
+ continue;
+ ImpDefs.push_back(DefReg);
+ }
+ }
+ }
+
+ // Attempt the merge.
+ typedef MachineBasicBlock::iterator iterator;
+ MachineInstr *LatestMI = Cand.Instrs[Cand.LatestMIIdx];
+ iterator InsertBefore = std::next(iterator(LatestMI));
+ MachineBasicBlock &MBB = *LatestMI->getParent();
+ unsigned Offset = getMemoryOpOffset(First);
+ unsigned Base = getLoadStoreBaseOp(*First).getReg();
+ bool BaseKill = LatestMI->killsRegister(Base);
+ unsigned PredReg = 0;
+ ARMCC::CondCodes Pred = getInstrPredicate(First, PredReg);
+ DebugLoc DL = First->getDebugLoc();
+ MachineInstr *Merged = nullptr;
+ if (Cand.CanMergeToLSDouble)
+ Merged = CreateLoadStoreDouble(MBB, InsertBefore, Offset, Base, BaseKill,
+ Opcode, Pred, PredReg, DL, Regs);
+ if (!Merged && Cand.CanMergeToLSMulti)
+ Merged = CreateLoadStoreMulti(MBB, InsertBefore, Offset, Base, BaseKill,
+ Opcode, Pred, PredReg, DL, Regs);
+ if (!Merged)
+ return nullptr;
+
+ // Determine earliest instruction that will get removed. We then keep an
+ // iterator just above it so the following erases don't invalidated it.
+ iterator EarliestI(Cand.Instrs[Cand.EarliestMIIdx]);
+ bool EarliestAtBegin = false;
+ if (EarliestI == MBB.begin()) {
+ EarliestAtBegin = true;
+ } else {
+ EarliestI = std::prev(EarliestI);
+ }
+
+ // Remove instructions which have been merged.
+ for (MachineInstr *MI : Cand.Instrs)
+ MBB.erase(MI);
+
+ // Determine range between the earliest removed instruction and the new one.
+ if (EarliestAtBegin)
+ EarliestI = MBB.begin();
+ else
+ EarliestI = std::next(EarliestI);
+ auto FixupRange = make_range(EarliestI, iterator(Merged));
+
+ if (isLoadSingle(Opcode)) {
+ // If the previous loads defined a super-reg, then we have to mark earlier
+ // operands undef; Replicate the super-reg def on the merged instruction.
+ for (MachineInstr &MI : FixupRange) {
+ for (unsigned &ImpDefReg : ImpDefs) {
+ for (MachineOperand &MO : MI.implicit_operands()) {
+ if (!MO.isReg() || MO.getReg() != ImpDefReg)
+ continue;
+ if (MO.readsReg())
+ MO.setIsUndef();
+ else if (MO.isDef())
+ ImpDefReg = 0;
+ }
+ }
+ }
+
+ MachineInstrBuilder MIB(*Merged->getParent()->getParent(), Merged);
+ for (unsigned ImpDef : ImpDefs)
+ MIB.addReg(ImpDef, RegState::ImplicitDefine);
+ } else {
+ // Remove kill flags: We are possibly storing the values later now.
+ assert(isi32Store(Opcode) || Opcode == ARM::VSTRS || Opcode == ARM::VSTRD);
+ for (MachineInstr &MI : FixupRange) {
+ for (MachineOperand &MO : MI.uses()) {
+ if (!MO.isReg() || !MO.isKill())
+ continue;
+ if (UsedRegs.count(MO.getReg()))
+ MO.setIsKill(false);
+ }
+ }
+ assert(ImpDefs.empty());
+ }
+
+ return Merged;
+}
+
+static bool isValidLSDoubleOffset(int Offset) {
+ unsigned Value = abs(Offset);
+ // t2LDRDi8/t2STRDi8 supports an 8 bit immediate which is internally
+ // multiplied by 4.
+ return (Value % 4) == 0 && Value < 1024;
+}
+
+/// Find candidates for load/store multiple merge in list of MemOpQueueEntries.
+void ARMLoadStoreOpt::FormCandidates(const MemOpQueue &MemOps) {
+ const MachineInstr *FirstMI = MemOps[0].MI;
+ unsigned Opcode = FirstMI->getOpcode();
+ bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
+ unsigned Size = getLSMultipleTransferSize(FirstMI);
+
+ unsigned SIndex = 0;
+ unsigned EIndex = MemOps.size();
+ do {
+ // Look at the first instruction.
+ const MachineInstr *MI = MemOps[SIndex].MI;
+ int Offset = MemOps[SIndex].Offset;
+ const MachineOperand &PMO = getLoadStoreRegOp(*MI);
+ unsigned PReg = PMO.getReg();
+ unsigned PRegNum = PMO.isUndef() ? UINT_MAX : TRI->getEncodingValue(PReg);
+ unsigned Latest = SIndex;
+ unsigned Earliest = SIndex;
+ unsigned Count = 1;
+ bool CanMergeToLSDouble =
+ STI->isThumb2() && isNotVFP && isValidLSDoubleOffset(Offset);
+ // ARM errata 602117: LDRD with base in list may result in incorrect base
+ // register when interrupted or faulted.
+ if (STI->isCortexM3() && isi32Load(Opcode) &&
+ PReg == getLoadStoreBaseOp(*MI).getReg())
+ CanMergeToLSDouble = false;
+
+ bool CanMergeToLSMulti = true;
+ // On swift vldm/vstm starting with an odd register number as that needs
+ // more uops than single vldrs.
+ if (STI->isSwift() && !isNotVFP && (PRegNum % 2) == 1)
+ CanMergeToLSMulti = false;
+
+ // LDRD/STRD do not allow SP/PC. LDM/STM do not support it or have it
+ // deprecated; LDM to PC is fine but cannot happen here.
+ if (PReg == ARM::SP || PReg == ARM::PC)
+ CanMergeToLSMulti = CanMergeToLSDouble = false;
+
+ // Merge following instructions where possible.
+ for (unsigned I = SIndex+1; I < EIndex; ++I, ++Count) {
+ int NewOffset = MemOps[I].Offset;
+ if (NewOffset != Offset + (int)Size)
+ break;
+ const MachineOperand &MO = getLoadStoreRegOp(*MemOps[I].MI);
+ unsigned Reg = MO.getReg();
+ if (Reg == ARM::SP || Reg == ARM::PC)
+ break;
+
+ // See if the current load/store may be part of a multi load/store.
+ unsigned RegNum = MO.isUndef() ? UINT_MAX : TRI->getEncodingValue(Reg);
+ bool PartOfLSMulti = CanMergeToLSMulti;
+ if (PartOfLSMulti) {
+ // Register numbers must be in ascending order.
+ if (RegNum <= PRegNum)
+ PartOfLSMulti = false;
+ // For VFP / NEON load/store multiples, the registers must be
+ // consecutive and within the limit on the number of registers per
+ // instruction.
+ else if (!isNotVFP && RegNum != PRegNum+1)
+ PartOfLSMulti = false;
+ }
+ // See if the current load/store may be part of a double load/store.
+ bool PartOfLSDouble = CanMergeToLSDouble && Count <= 1;
+
+ if (!PartOfLSMulti && !PartOfLSDouble)
+ break;
+ CanMergeToLSMulti &= PartOfLSMulti;
+ CanMergeToLSDouble &= PartOfLSDouble;
+ // Track MemOp with latest and earliest position (Positions are
+ // counted in reverse).
+ unsigned Position = MemOps[I].Position;
+ if (Position < MemOps[Latest].Position)
+ Latest = I;
+ else if (Position > MemOps[Earliest].Position)
+ Earliest = I;
+ // Prepare for next MemOp.
+ Offset += Size;
+ PRegNum = RegNum;
+ }
+
+ // Form a candidate from the Ops collected so far.
+ MergeCandidate *Candidate = new(Allocator.Allocate()) MergeCandidate;
+ for (unsigned C = SIndex, CE = SIndex + Count; C < CE; ++C)
+ Candidate->Instrs.push_back(MemOps[C].MI);
+ Candidate->LatestMIIdx = Latest - SIndex;
+ Candidate->EarliestMIIdx = Earliest - SIndex;
+ Candidate->InsertPos = MemOps[Latest].Position;
+ if (Count == 1)
+ CanMergeToLSMulti = CanMergeToLSDouble = false;
+ Candidate->CanMergeToLSMulti = CanMergeToLSMulti;
+ Candidate->CanMergeToLSDouble = CanMergeToLSDouble;
+ Candidates.push_back(Candidate);
+ // Continue after the chain.
+ SIndex += Count;
+ } while (SIndex < EIndex);
+}
+
+static unsigned getUpdatingLSMultipleOpcode(unsigned Opc,
+ ARM_AM::AMSubMode Mode) {
+ switch (Opc) {
+ default: llvm_unreachable("Unhandled opcode!");
+ case ARM::LDMIA:
+ case ARM::LDMDA:
+ case ARM::LDMDB:
+ case ARM::LDMIB:
+ switch (Mode) {
+ default: llvm_unreachable("Unhandled submode!");
+ case ARM_AM::ia: return ARM::LDMIA_UPD;
+ case ARM_AM::ib: return ARM::LDMIB_UPD;
+ case ARM_AM::da: return ARM::LDMDA_UPD;
+ case ARM_AM::db: return ARM::LDMDB_UPD;
+ }
+ case ARM::STMIA:
+ case ARM::STMDA:
+ case ARM::STMDB:
+ case ARM::STMIB:
+ switch (Mode) {
+ default: llvm_unreachable("Unhandled submode!");
+ case ARM_AM::ia: return ARM::STMIA_UPD;
+ case ARM_AM::ib: return ARM::STMIB_UPD;
+ case ARM_AM::da: return ARM::STMDA_UPD;
+ case ARM_AM::db: return ARM::STMDB_UPD;
+ }
+ case ARM::t2LDMIA:
+ case ARM::t2LDMDB:
+ switch (Mode) {
+ default: llvm_unreachable("Unhandled submode!");
+ case ARM_AM::ia: return ARM::t2LDMIA_UPD;
+ case ARM_AM::db: return ARM::t2LDMDB_UPD;
+ }
+ case ARM::t2STMIA:
+ case ARM::t2STMDB:
+ switch (Mode) {
+ default: llvm_unreachable("Unhandled submode!");
+ case ARM_AM::ia: return ARM::t2STMIA_UPD;
+ case ARM_AM::db: return ARM::t2STMDB_UPD;
+ }
+ case ARM::VLDMSIA:
+ switch (Mode) {
+ default: llvm_unreachable("Unhandled submode!");
+ case ARM_AM::ia: return ARM::VLDMSIA_UPD;
+ case ARM_AM::db: return ARM::VLDMSDB_UPD;
+ }
+ case ARM::VLDMDIA:
+ switch (Mode) {
+ default: llvm_unreachable("Unhandled submode!");
+ case ARM_AM::ia: return ARM::VLDMDIA_UPD;
+ case ARM_AM::db: return ARM::VLDMDDB_UPD;
+ }
+ case ARM::VSTMSIA:
+ switch (Mode) {
+ default: llvm_unreachable("Unhandled submode!");
+ case ARM_AM::ia: return ARM::VSTMSIA_UPD;
+ case ARM_AM::db: return ARM::VSTMSDB_UPD;
+ }
+ case ARM::VSTMDIA:
+ switch (Mode) {
+ default: llvm_unreachable("Unhandled submode!");
+ case ARM_AM::ia: return ARM::VSTMDIA_UPD;
+ case ARM_AM::db: return ARM::VSTMDDB_UPD;
+ }
+ }
+}
+
+/// Check if the given instruction increments or decrements a register and
+/// return the amount it is incremented/decremented. Returns 0 if the CPSR flags
+/// generated by the instruction are possibly read as well.
+static int isIncrementOrDecrement(const MachineInstr &MI, unsigned Reg,
+ ARMCC::CondCodes Pred, unsigned PredReg) {
+ bool CheckCPSRDef;
+ int Scale;
+ switch (MI.getOpcode()) {
+ case ARM::tADDi8: Scale = 4; CheckCPSRDef = true; break;
+ case ARM::tSUBi8: Scale = -4; CheckCPSRDef = true; break;
+ case ARM::t2SUBri:
+ case ARM::SUBri: Scale = -1; CheckCPSRDef = true; break;
+ case ARM::t2ADDri:
+ case ARM::ADDri: Scale = 1; CheckCPSRDef = true; break;
+ case ARM::tADDspi: Scale = 4; CheckCPSRDef = false; break;
+ case ARM::tSUBspi: Scale = -4; CheckCPSRDef = false; break;
+ default: return 0;
+ }
+
+ unsigned MIPredReg;
+ if (MI.getOperand(0).getReg() != Reg ||
+ MI.getOperand(1).getReg() != Reg ||
+ getInstrPredicate(&MI, MIPredReg) != Pred ||
+ MIPredReg != PredReg)
+ return 0;
+
+ if (CheckCPSRDef && definesCPSR(&MI))
+ return 0;
+ return MI.getOperand(2).getImm() * Scale;
+}
+
+/// Searches for an increment or decrement of \p Reg before \p MBBI.
+static MachineBasicBlock::iterator
+findIncDecBefore(MachineBasicBlock::iterator MBBI, unsigned Reg,
+ ARMCC::CondCodes Pred, unsigned PredReg, int &Offset) {
+ Offset = 0;
+ MachineBasicBlock &MBB = *MBBI->getParent();
+ MachineBasicBlock::iterator BeginMBBI = MBB.begin();
+ MachineBasicBlock::iterator EndMBBI = MBB.end();
+ if (MBBI == BeginMBBI)
+ return EndMBBI;
+
+ // Skip debug values.
+ MachineBasicBlock::iterator PrevMBBI = std::prev(MBBI);
+ while (PrevMBBI->isDebugValue() && PrevMBBI != BeginMBBI)
+ --PrevMBBI;
+
+ Offset = isIncrementOrDecrement(*PrevMBBI, Reg, Pred, PredReg);
+ return Offset == 0 ? EndMBBI : PrevMBBI;
+}
+
+/// Searches for a increment or decrement of \p Reg after \p MBBI.
+static MachineBasicBlock::iterator
+findIncDecAfter(MachineBasicBlock::iterator MBBI, unsigned Reg,
+ ARMCC::CondCodes Pred, unsigned PredReg, int &Offset) {
+ Offset = 0;
+ MachineBasicBlock &MBB = *MBBI->getParent();
+ MachineBasicBlock::iterator EndMBBI = MBB.end();
+ MachineBasicBlock::iterator NextMBBI = std::next(MBBI);
+ // Skip debug values.
+ while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
+ ++NextMBBI;
+ if (NextMBBI == EndMBBI)
+ return EndMBBI;
+
+ Offset = isIncrementOrDecrement(*NextMBBI, Reg, Pred, PredReg);
+ return Offset == 0 ? EndMBBI : NextMBBI;
+}
+
+/// Fold proceeding/trailing inc/dec of base register into the
+/// LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible:
+///
+/// stmia rn, <ra, rb, rc>
+/// rn := rn + 4 * 3;
+/// =>
+/// stmia rn!, <ra, rb, rc>
+///
+/// rn := rn - 4 * 3;
+/// ldmia rn, <ra, rb, rc>
+/// =>
+/// ldmdb rn!, <ra, rb, rc>
+bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineInstr *MI) {
+ // Thumb1 is already using updating loads/stores.
+ if (isThumb1) return false;
+
+ const MachineOperand &BaseOP = MI->getOperand(0);
+ unsigned Base = BaseOP.getReg();
+ bool BaseKill = BaseOP.isKill();
+ unsigned PredReg = 0;
+ ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
+ unsigned Opcode = MI->getOpcode();
+ DebugLoc DL = MI->getDebugLoc();
+
+ // Can't use an updating ld/st if the base register is also a dest
+ // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined.
+ for (unsigned i = 2, e = MI->getNumOperands(); i != e; ++i)
+ if (MI->getOperand(i).getReg() == Base)
+ return false;
+
+ int Bytes = getLSMultipleTransferSize(MI);
+ MachineBasicBlock &MBB = *MI->getParent();
+ MachineBasicBlock::iterator MBBI(MI);
+ int Offset;
+ MachineBasicBlock::iterator MergeInstr
+ = findIncDecBefore(MBBI, Base, Pred, PredReg, Offset);
+ ARM_AM::AMSubMode Mode = getLoadStoreMultipleSubMode(Opcode);
+ if (Mode == ARM_AM::ia && Offset == -Bytes) {
+ Mode = ARM_AM::db;
+ } else if (Mode == ARM_AM::ib && Offset == -Bytes) {
+ Mode = ARM_AM::da;
+ } else {
+ MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset);
+ if (((Mode != ARM_AM::ia && Mode != ARM_AM::ib) || Offset != Bytes) &&
+ ((Mode != ARM_AM::da && Mode != ARM_AM::db) || Offset != -Bytes))
+ return false;
+ }
+ MBB.erase(MergeInstr);
+
+ unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode, Mode);
+ MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc))
+ .addReg(Base, getDefRegState(true)) // WB base register
+ .addReg(Base, getKillRegState(BaseKill))
+ .addImm(Pred).addReg(PredReg);
+
+ // Transfer the rest of operands.
+ for (unsigned OpNum = 3, e = MI->getNumOperands(); OpNum != e; ++OpNum)
+ MIB.addOperand(MI->getOperand(OpNum));
+
+ // Transfer memoperands.
+ MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
+
+ MBB.erase(MBBI);
+ return true;
+}
+
+static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc,
+ ARM_AM::AddrOpc Mode) {
+ switch (Opc) {
+ case ARM::LDRi12:
+ return ARM::LDR_PRE_IMM;
+ case ARM::STRi12:
+ return ARM::STR_PRE_IMM;
+ case ARM::VLDRS:
+ return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
+ case ARM::VLDRD:
+ return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
+ case ARM::VSTRS:
+ return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
+ case ARM::VSTRD:
+ return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
+ case ARM::t2LDRi8:
+ case ARM::t2LDRi12:
+ return ARM::t2LDR_PRE;
+ case ARM::t2STRi8:
+ case ARM::t2STRi12:
+ return ARM::t2STR_PRE;
+ default: llvm_unreachable("Unhandled opcode!");
+ }
+}
+
+static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc,
+ ARM_AM::AddrOpc Mode) {
+ switch (Opc) {
+ case ARM::LDRi12:
+ return ARM::LDR_POST_IMM;
+ case ARM::STRi12:
+ return ARM::STR_POST_IMM;
+ case ARM::VLDRS:
+ return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
+ case ARM::VLDRD:
+ return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
+ case ARM::VSTRS:
+ return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
+ case ARM::VSTRD:
+ return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
+ case ARM::t2LDRi8:
+ case ARM::t2LDRi12:
+ return ARM::t2LDR_POST;
+ case ARM::t2STRi8:
+ case ARM::t2STRi12:
+ return ARM::t2STR_POST;
+ default: llvm_unreachable("Unhandled opcode!");
+ }
+}
+
+/// Fold proceeding/trailing inc/dec of base register into the
+/// LDR/STR/FLD{D|S}/FST{D|S} op when possible:
+bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineInstr *MI) {
+ // Thumb1 doesn't have updating LDR/STR.
+ // FIXME: Use LDM/STM with single register instead.
+ if (isThumb1) return false;
+
+ unsigned Base = getLoadStoreBaseOp(*MI).getReg();
+ bool BaseKill = getLoadStoreBaseOp(*MI).isKill();
+ unsigned Opcode = MI->getOpcode();
+ DebugLoc DL = MI->getDebugLoc();
+ bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS ||
+ Opcode == ARM::VSTRD || Opcode == ARM::VSTRS);
+ bool isAM2 = (Opcode == ARM::LDRi12 || Opcode == ARM::STRi12);
+ if (isi32Load(Opcode) || isi32Store(Opcode))
+ if (MI->getOperand(2).getImm() != 0)
+ return false;
+ if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0)
+ return false;
+
+ // Can't do the merge if the destination register is the same as the would-be
+ // writeback register.
+ if (MI->getOperand(0).getReg() == Base)
+ return false;
+
+ unsigned PredReg = 0;
+ ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
+ int Bytes = getLSMultipleTransferSize(MI);
+ MachineBasicBlock &MBB = *MI->getParent();
+ MachineBasicBlock::iterator MBBI(MI);
+ int Offset;
+ MachineBasicBlock::iterator MergeInstr
+ = findIncDecBefore(MBBI, Base, Pred, PredReg, Offset);
+ unsigned NewOpc;
+ if (!isAM5 && Offset == Bytes) {
+ NewOpc = getPreIndexedLoadStoreOpcode(Opcode, ARM_AM::add);
+ } else if (Offset == -Bytes) {
+ NewOpc = getPreIndexedLoadStoreOpcode(Opcode, ARM_AM::sub);
+ } else {
+ MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset);
+ if (Offset == Bytes) {
+ NewOpc = getPostIndexedLoadStoreOpcode(Opcode, ARM_AM::add);
+ } else if (!isAM5 && Offset == -Bytes) {
+ NewOpc = getPostIndexedLoadStoreOpcode(Opcode, ARM_AM::sub);
+ } else
+ return false;
+ }
+ MBB.erase(MergeInstr);
+
+ ARM_AM::AddrOpc AddSub = Offset < 0 ? ARM_AM::sub : ARM_AM::add;
+
+ bool isLd = isLoadSingle(Opcode);
+ if (isAM5) {
+ // VLDM[SD]_UPD, VSTM[SD]_UPD
+ // (There are no base-updating versions of VLDR/VSTR instructions, but the
+ // updating load/store-multiple instructions can be used with only one
+ // register.)
+ MachineOperand &MO = MI->getOperand(0);
+ BuildMI(MBB, MBBI, DL, TII->get(NewOpc))
+ .addReg(Base, getDefRegState(true)) // WB base register
+ .addReg(Base, getKillRegState(isLd ? BaseKill : false))
+ .addImm(Pred).addReg(PredReg)
+ .addReg(MO.getReg(), (isLd ? getDefRegState(true) :
+ getKillRegState(MO.isKill())));
+ } else if (isLd) {
+ if (isAM2) {
+ // LDR_PRE, LDR_POST
+ if (NewOpc == ARM::LDR_PRE_IMM || NewOpc == ARM::LDRB_PRE_IMM) {
+ BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg())
+ .addReg(Base, RegState::Define)
+ .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
+ } else {
+ int Imm = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
+ BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg())
+ .addReg(Base, RegState::Define)
+ .addReg(Base).addReg(0).addImm(Imm).addImm(Pred).addReg(PredReg);
+ }
+ } else {
+ // t2LDR_PRE, t2LDR_POST
+ BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg())
+ .addReg(Base, RegState::Define)
+ .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
+ }
+ } else {
+ MachineOperand &MO = MI->getOperand(0);
+ // FIXME: post-indexed stores use am2offset_imm, which still encodes
+ // the vestigal zero-reg offset register. When that's fixed, this clause
+ // can be removed entirely.
+ if (isAM2 && NewOpc == ARM::STR_POST_IMM) {
+ int Imm = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
+ // STR_PRE, STR_POST
+ BuildMI(MBB, MBBI, DL, TII->get(NewOpc), Base)
+ .addReg(MO.getReg(), getKillRegState(MO.isKill()))
+ .addReg(Base).addReg(0).addImm(Imm).addImm(Pred).addReg(PredReg);
+ } else {
+ // t2STR_PRE, t2STR_POST
+ BuildMI(MBB, MBBI, DL, TII->get(NewOpc), Base)
+ .addReg(MO.getReg(), getKillRegState(MO.isKill()))
+ .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
+ }
+ }
+ MBB.erase(MBBI);
+
+ return true;
+}
+
+bool ARMLoadStoreOpt::MergeBaseUpdateLSDouble(MachineInstr &MI) const {
+ unsigned Opcode = MI.getOpcode();
+ assert((Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) &&
+ "Must have t2STRDi8 or t2LDRDi8");
+ if (MI.getOperand(3).getImm() != 0)
+ return false;
+
+ // Behaviour for writeback is undefined if base register is the same as one
+ // of the others.
+ const MachineOperand &BaseOp = MI.getOperand(2);
+ unsigned Base = BaseOp.getReg();
+ const MachineOperand &Reg0Op = MI.getOperand(0);
+ const MachineOperand &Reg1Op = MI.getOperand(1);
+ if (Reg0Op.getReg() == Base || Reg1Op.getReg() == Base)
+ return false;
+
+ unsigned PredReg;
+ ARMCC::CondCodes Pred = getInstrPredicate(&MI, PredReg);
+ MachineBasicBlock::iterator MBBI(MI);
+ MachineBasicBlock &MBB = *MI.getParent();
+ int Offset;
+ MachineBasicBlock::iterator MergeInstr = findIncDecBefore(MBBI, Base, Pred,
+ PredReg, Offset);
+ unsigned NewOpc;
+ if (Offset == 8 || Offset == -8) {
+ NewOpc = Opcode == ARM::t2LDRDi8 ? ARM::t2LDRD_PRE : ARM::t2STRD_PRE;
+ } else {
+ MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset);
+ if (Offset == 8 || Offset == -8) {
+ NewOpc = Opcode == ARM::t2LDRDi8 ? ARM::t2LDRD_POST : ARM::t2STRD_POST;
+ } else
+ return false;
+ }
+ MBB.erase(MergeInstr);
+
+ DebugLoc DL = MI.getDebugLoc();
+ MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc));
+ if (NewOpc == ARM::t2LDRD_PRE || NewOpc == ARM::t2LDRD_POST) {
+ MIB.addOperand(Reg0Op).addOperand(Reg1Op)
+ .addReg(BaseOp.getReg(), RegState::Define);
+ } else {
+ assert(NewOpc == ARM::t2STRD_PRE || NewOpc == ARM::t2STRD_POST);
+ MIB.addReg(BaseOp.getReg(), RegState::Define)
+ .addOperand(Reg0Op).addOperand(Reg1Op);
+ }
+ MIB.addReg(BaseOp.getReg(), RegState::Kill)
+ .addImm(Offset).addImm(Pred).addReg(PredReg);
+ assert(TII->get(Opcode).getNumOperands() == 6 &&
+ TII->get(NewOpc).getNumOperands() == 7 &&
+ "Unexpected number of operands in Opcode specification.");
+
+ // Transfer implicit operands.
+ for (const MachineOperand &MO : MI.implicit_operands())
+ MIB.addOperand(MO);
+ MIB->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+
+ MBB.erase(MBBI);
+ return true;
+}
+
+/// Returns true if instruction is a memory operation that this pass is capable
+/// of operating on.
+static bool isMemoryOp(const MachineInstr &MI) {
+ unsigned Opcode = MI.getOpcode();
+ switch (Opcode) {
+ case ARM::VLDRS:
+ case ARM::VSTRS:
+ case ARM::VLDRD:
+ case ARM::VSTRD:
+ case ARM::LDRi12:
+ case ARM::STRi12:
+ case ARM::tLDRi:
+ case ARM::tSTRi:
+ case ARM::tLDRspi:
+ case ARM::tSTRspi:
+ case ARM::t2LDRi8:
+ case ARM::t2LDRi12:
+ case ARM::t2STRi8:
+ case ARM::t2STRi12:
+ break;
+ default:
+ return false;
+ }
+ if (!MI.getOperand(1).isReg())
+ return false;
+
+ // When no memory operands are present, conservatively assume unaligned,
+ // volatile, unfoldable.
+ if (!MI.hasOneMemOperand())
+ return false;
+
+ const MachineMemOperand &MMO = **MI.memoperands_begin();
+
+ // Don't touch volatile memory accesses - we may be changing their order.
+ if (MMO.isVolatile())
+ return false;
+
+ // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is
+ // not.
+ if (MMO.getAlignment() < 4)
+ return false;
+
+ // str <undef> could probably be eliminated entirely, but for now we just want
+ // to avoid making a mess of it.
+ // FIXME: Use str <undef> as a wildcard to enable better stm folding.
+ if (MI.getOperand(0).isReg() && MI.getOperand(0).isUndef())
+ return false;
+
+ // Likewise don't mess with references to undefined addresses.
+ if (MI.getOperand(1).isUndef())
+ return false;
+
+ return true;
+}
+
+static void InsertLDR_STR(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI,
+ int Offset, bool isDef,
+ DebugLoc DL, unsigned NewOpc,
+ unsigned Reg, bool RegDeadKill, bool RegUndef,
+ unsigned BaseReg, bool BaseKill, bool BaseUndef,
+ bool OffKill, bool OffUndef,
+ ARMCC::CondCodes Pred, unsigned PredReg,
+ const TargetInstrInfo *TII, bool isT2) {
+ if (isDef) {
+ MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
+ TII->get(NewOpc))
+ .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill))
+ .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
+ MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
+ } else {
+ MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
+ TII->get(NewOpc))
+ .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef))
+ .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
+ MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
+ }
+}
+
+bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI) {
+ MachineInstr *MI = &*MBBI;
+ unsigned Opcode = MI->getOpcode();
+ if (Opcode != ARM::LDRD && Opcode != ARM::STRD && Opcode != ARM::t2LDRDi8)
+ return false;
+
+ const MachineOperand &BaseOp = MI->getOperand(2);
+ unsigned BaseReg = BaseOp.getReg();
+ unsigned EvenReg = MI->getOperand(0).getReg();
+ unsigned OddReg = MI->getOperand(1).getReg();
+ unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false);
+ unsigned OddRegNum = TRI->getDwarfRegNum(OddReg, false);
+
+ // ARM errata 602117: LDRD with base in list may result in incorrect base
+ // register when interrupted or faulted.
+ bool Errata602117 = EvenReg == BaseReg &&
+ (Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8) && STI->isCortexM3();
+ // ARM LDRD/STRD needs consecutive registers.
+ bool NonConsecutiveRegs = (Opcode == ARM::LDRD || Opcode == ARM::STRD) &&
+ (EvenRegNum % 2 != 0 || EvenRegNum + 1 != OddRegNum);
+
+ if (!Errata602117 && !NonConsecutiveRegs)
+ return false;
+
+ bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8;
+ bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8;
+ bool EvenDeadKill = isLd ?
+ MI->getOperand(0).isDead() : MI->getOperand(0).isKill();
+ bool EvenUndef = MI->getOperand(0).isUndef();
+ bool OddDeadKill = isLd ?
+ MI->getOperand(1).isDead() : MI->getOperand(1).isKill();
+ bool OddUndef = MI->getOperand(1).isUndef();
+ bool BaseKill = BaseOp.isKill();
+ bool BaseUndef = BaseOp.isUndef();
+ bool OffKill = isT2 ? false : MI->getOperand(3).isKill();
+ bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef();
+ int OffImm = getMemoryOpOffset(MI);
+ unsigned PredReg = 0;
+ ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
+
+ if (OddRegNum > EvenRegNum && OffImm == 0) {
+ // Ascending register numbers and no offset. It's safe to change it to a
+ // ldm or stm.
+ unsigned NewOpc = (isLd)
+ ? (isT2 ? ARM::t2LDMIA : ARM::LDMIA)
+ : (isT2 ? ARM::t2STMIA : ARM::STMIA);
+ if (isLd) {
+ BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
+ .addReg(BaseReg, getKillRegState(BaseKill))
+ .addImm(Pred).addReg(PredReg)
+ .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill))
+ .addReg(OddReg, getDefRegState(isLd) | getDeadRegState(OddDeadKill));
+ ++NumLDRD2LDM;
+ } else {
+ BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
+ .addReg(BaseReg, getKillRegState(BaseKill))
+ .addImm(Pred).addReg(PredReg)
+ .addReg(EvenReg,
+ getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef))
+ .addReg(OddReg,
+ getKillRegState(OddDeadKill) | getUndefRegState(OddUndef));
+ ++NumSTRD2STM;
+ }
+ } else {
+ // Split into two instructions.
+ unsigned NewOpc = (isLd)
+ ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
+ : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
+ // Be extra careful for thumb2. t2LDRi8 can't reference a zero offset,
+ // so adjust and use t2LDRi12 here for that.
+ unsigned NewOpc2 = (isLd)
+ ? (isT2 ? (OffImm+4 < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
+ : (isT2 ? (OffImm+4 < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
+ DebugLoc dl = MBBI->getDebugLoc();
+ // If this is a load and base register is killed, it may have been
+ // re-defed by the load, make sure the first load does not clobber it.
+ if (isLd &&
+ (BaseKill || OffKill) &&
+ (TRI->regsOverlap(EvenReg, BaseReg))) {
+ assert(!TRI->regsOverlap(OddReg, BaseReg));
+ InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2,
+ OddReg, OddDeadKill, false,
+ BaseReg, false, BaseUndef, false, OffUndef,
+ Pred, PredReg, TII, isT2);
+ InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
+ EvenReg, EvenDeadKill, false,
+ BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
+ Pred, PredReg, TII, isT2);
+ } else {
+ if (OddReg == EvenReg && EvenDeadKill) {
+ // If the two source operands are the same, the kill marker is
+ // probably on the first one. e.g.
+ // t2STRDi8 %R5<kill>, %R5, %R9<kill>, 0, 14, %reg0
+ EvenDeadKill = false;
+ OddDeadKill = true;
+ }
+ // Never kill the base register in the first instruction.
+ if (EvenReg == BaseReg)
+ EvenDeadKill = false;
+ InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
+ EvenReg, EvenDeadKill, EvenUndef,
+ BaseReg, false, BaseUndef, false, OffUndef,
+ Pred, PredReg, TII, isT2);
+ InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2,
+ OddReg, OddDeadKill, OddUndef,
+ BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
+ Pred, PredReg, TII, isT2);
+ }
+ if (isLd)
+ ++NumLDRD2LDR;
+ else
+ ++NumSTRD2STR;
+ }
+
+ MBBI = MBB.erase(MBBI);
+ return true;
+}
+
+/// An optimization pass to turn multiple LDR / STR ops of the same base and
+/// incrementing offset into LDM / STM ops.
+bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
+ MemOpQueue MemOps;
+ unsigned CurrBase = 0;
+ unsigned CurrOpc = ~0u;
+ ARMCC::CondCodes CurrPred = ARMCC::AL;
+ unsigned Position = 0;
+ assert(Candidates.size() == 0);
+ assert(MergeBaseCandidates.size() == 0);
+ LiveRegsValid = false;
+
+ for (MachineBasicBlock::iterator I = MBB.end(), MBBI; I != MBB.begin();
+ I = MBBI) {
+ // The instruction in front of the iterator is the one we look at.
+ MBBI = std::prev(I);
+ if (FixInvalidRegPairOp(MBB, MBBI))
+ continue;
+ ++Position;
+
+ if (isMemoryOp(*MBBI)) {
+ unsigned Opcode = MBBI->getOpcode();
+ const MachineOperand &MO = MBBI->getOperand(0);
+ unsigned Reg = MO.getReg();
+ unsigned Base = getLoadStoreBaseOp(*MBBI).getReg();
+ unsigned PredReg = 0;
+ ARMCC::CondCodes Pred = getInstrPredicate(MBBI, PredReg);
+ int Offset = getMemoryOpOffset(MBBI);
+ if (CurrBase == 0) {
+ // Start of a new chain.
+ CurrBase = Base;
+ CurrOpc = Opcode;
+ CurrPred = Pred;
+ MemOps.push_back(MemOpQueueEntry(MBBI, Offset, Position));
+ continue;
+ }
+ // Note: No need to match PredReg in the next if.
+ if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) {
+ // Watch out for:
+ // r4 := ldr [r0, #8]
+ // r4 := ldr [r0, #4]
+ // or
+ // r0 := ldr [r0]
+ // If a load overrides the base register or a register loaded by
+ // another load in our chain, we cannot take this instruction.
+ bool Overlap = false;
+ if (isLoadSingle(Opcode)) {
+ Overlap = (Base == Reg);
+ if (!Overlap) {
+ for (const MemOpQueueEntry &E : MemOps) {
+ if (TRI->regsOverlap(Reg, E.MI->getOperand(0).getReg())) {
+ Overlap = true;
+ break;
+ }
+ }
+ }
+ }
+
+ if (!Overlap) {
+ // Check offset and sort memory operation into the current chain.
+ if (Offset > MemOps.back().Offset) {
+ MemOps.push_back(MemOpQueueEntry(MBBI, Offset, Position));
+ continue;
+ } else {
+ MemOpQueue::iterator MI, ME;
+ for (MI = MemOps.begin(), ME = MemOps.end(); MI != ME; ++MI) {
+ if (Offset < MI->Offset) {
+ // Found a place to insert.
+ break;
+ }
+ if (Offset == MI->Offset) {
+ // Collision, abort.
+ MI = ME;
+ break;
+ }
+ }
+ if (MI != MemOps.end()) {
+ MemOps.insert(MI, MemOpQueueEntry(MBBI, Offset, Position));
+ continue;
+ }
+ }
+ }
+ }
+
+ // Don't advance the iterator; The op will start a new chain next.
+ MBBI = I;
+ --Position;
+ // Fallthrough to look into existing chain.
+ } else if (MBBI->isDebugValue()) {
+ continue;
+ } else if (MBBI->getOpcode() == ARM::t2LDRDi8 ||
+ MBBI->getOpcode() == ARM::t2STRDi8) {
+ // ARMPreAllocLoadStoreOpt has already formed some LDRD/STRD instructions
+ // remember them because we may still be able to merge add/sub into them.
+ MergeBaseCandidates.push_back(MBBI);
+ }
+
+
+ // If we are here then the chain is broken; Extract candidates for a merge.
+ if (MemOps.size() > 0) {
+ FormCandidates(MemOps);
+ // Reset for the next chain.
+ CurrBase = 0;
+ CurrOpc = ~0u;
+ CurrPred = ARMCC::AL;
+ MemOps.clear();
+ }
+ }
+ if (MemOps.size() > 0)
+ FormCandidates(MemOps);
+
+ // Sort candidates so they get processed from end to begin of the basic
+ // block later; This is necessary for liveness calculation.
+ auto LessThan = [](const MergeCandidate* M0, const MergeCandidate *M1) {
+ return M0->InsertPos < M1->InsertPos;
+ };
+ std::sort(Candidates.begin(), Candidates.end(), LessThan);
+
+ // Go through list of candidates and merge.
+ bool Changed = false;
+ for (const MergeCandidate *Candidate : Candidates) {
+ if (Candidate->CanMergeToLSMulti || Candidate->CanMergeToLSDouble) {
+ MachineInstr *Merged = MergeOpsUpdate(*Candidate);
+ // Merge preceding/trailing base inc/dec into the merged op.
+ if (Merged) {
+ Changed = true;
+ unsigned Opcode = Merged->getOpcode();
+ if (Opcode == ARM::t2STRDi8 || Opcode == ARM::t2LDRDi8)
+ MergeBaseUpdateLSDouble(*Merged);
+ else
+ MergeBaseUpdateLSMultiple(Merged);
+ } else {
+ for (MachineInstr *MI : Candidate->Instrs) {
+ if (MergeBaseUpdateLoadStore(MI))
+ Changed = true;
+ }
+ }
+ } else {
+ assert(Candidate->Instrs.size() == 1);
+ if (MergeBaseUpdateLoadStore(Candidate->Instrs.front()))
+ Changed = true;
+ }
+ }
+ Candidates.clear();
+ // Try to fold add/sub into the LDRD/STRD formed by ARMPreAllocLoadStoreOpt.
+ for (MachineInstr *MI : MergeBaseCandidates)
+ MergeBaseUpdateLSDouble(*MI);
+ MergeBaseCandidates.clear();
+
+ return Changed;
+}
+
+/// If this is a exit BB, try merging the return ops ("bx lr" and "mov pc, lr")
+/// into the preceding stack restore so it directly restore the value of LR
+/// into pc.
+/// ldmfd sp!, {..., lr}
+/// bx lr
+/// or
+/// ldmfd sp!, {..., lr}
+/// mov pc, lr
+/// =>
+/// ldmfd sp!, {..., pc}
+bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
+ // Thumb1 LDM doesn't allow high registers.
+ if (isThumb1) return false;
+ if (MBB.empty()) return false;
+
+ MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
+ if (MBBI != MBB.begin() &&
+ (MBBI->getOpcode() == ARM::BX_RET ||
+ MBBI->getOpcode() == ARM::tBX_RET ||
+ MBBI->getOpcode() == ARM::MOVPCLR)) {
+ MachineBasicBlock::iterator PrevI = std::prev(MBBI);
+ // Ignore any DBG_VALUE instructions.
+ while (PrevI->isDebugValue() && PrevI != MBB.begin())
+ --PrevI;
+ MachineInstr *PrevMI = PrevI;
+ unsigned Opcode = PrevMI->getOpcode();
+ if (Opcode == ARM::LDMIA_UPD || Opcode == ARM::LDMDA_UPD ||
+ Opcode == ARM::LDMDB_UPD || Opcode == ARM::LDMIB_UPD ||
+ Opcode == ARM::t2LDMIA_UPD || Opcode == ARM::t2LDMDB_UPD) {
+ MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1);
+ if (MO.getReg() != ARM::LR)
+ return false;
+ unsigned NewOpc = (isThumb2 ? ARM::t2LDMIA_RET : ARM::LDMIA_RET);
+ assert(((isThumb2 && Opcode == ARM::t2LDMIA_UPD) ||
+ Opcode == ARM::LDMIA_UPD) && "Unsupported multiple load-return!");
+ PrevMI->setDesc(TII->get(NewOpc));
+ MO.setReg(ARM::PC);
+ PrevMI->copyImplicitOps(*MBB.getParent(), &*MBBI);
+ MBB.erase(MBBI);
+ return true;
+ }
+ }
+ return false;
+}
+
+bool ARMLoadStoreOpt::CombineMovBx(MachineBasicBlock &MBB) {
+ MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
+ if (MBBI == MBB.begin() || MBBI == MBB.end() ||
+ MBBI->getOpcode() != ARM::tBX_RET)
+ return false;
+
+ MachineBasicBlock::iterator Prev = MBBI;
+ --Prev;
+ if (Prev->getOpcode() != ARM::tMOVr || !Prev->definesRegister(ARM::LR))
+ return false;
+
+ for (auto Use : Prev->uses())
+ if (Use.isKill()) {
+ AddDefaultPred(BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(ARM::tBX))
+ .addReg(Use.getReg(), RegState::Kill))
+ .copyImplicitOps(&*MBBI);
+ MBB.erase(MBBI);
+ MBB.erase(Prev);
+ return true;
+ }
+
+ llvm_unreachable("tMOVr doesn't kill a reg before tBX_RET?");
+}
+
+bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
+ MF = &Fn;
+ STI = &static_cast<const ARMSubtarget &>(Fn.getSubtarget());
+ TL = STI->getTargetLowering();
+ AFI = Fn.getInfo<ARMFunctionInfo>();
+ TII = STI->getInstrInfo();
+ TRI = STI->getRegisterInfo();
+
+ RegClassInfoValid = false;
+ isThumb2 = AFI->isThumb2Function();
+ isThumb1 = AFI->isThumbFunction() && !isThumb2;
+
+ bool Modified = false;
+ for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
+ ++MFI) {
+ MachineBasicBlock &MBB = *MFI;
+ Modified |= LoadStoreMultipleOpti(MBB);
+ if (STI->hasV5TOps())
+ Modified |= MergeReturnIntoLDM(MBB);
+ if (isThumb1)
+ Modified |= CombineMovBx(MBB);
+ }
+
+ Allocator.DestroyAll();
+ return Modified;
+}
+
+namespace llvm {
+void initializeARMPreAllocLoadStoreOptPass(PassRegistry &);
+}
+
+#define ARM_PREALLOC_LOAD_STORE_OPT_NAME \
+ "ARM pre- register allocation load / store optimization pass"
+
+namespace {
+ /// Pre- register allocation pass that move load / stores from consecutive
+ /// locations close to make it more likely they will be combined later.
+ struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{
+ static char ID;
+ ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {
+ initializeARMPreAllocLoadStoreOptPass(*PassRegistry::getPassRegistry());
+ }
+
+ const DataLayout *TD;
+ const TargetInstrInfo *TII;
+ const TargetRegisterInfo *TRI;
+ const ARMSubtarget *STI;
+ MachineRegisterInfo *MRI;
+ MachineFunction *MF;
+
+ bool runOnMachineFunction(MachineFunction &Fn) override;
+
+ const char *getPassName() const override {
+ return ARM_PREALLOC_LOAD_STORE_OPT_NAME;
+ }
+
+ private:
+ bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl,
+ unsigned &NewOpc, unsigned &EvenReg,
+ unsigned &OddReg, unsigned &BaseReg,
+ int &Offset,
+ unsigned &PredReg, ARMCC::CondCodes &Pred,
+ bool &isT2);
+ bool RescheduleOps(MachineBasicBlock *MBB,
+ SmallVectorImpl<MachineInstr *> &Ops,
+ unsigned Base, bool isLd,
+ DenseMap<MachineInstr*, unsigned> &MI2LocMap);
+ bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB);
+ };
+ char ARMPreAllocLoadStoreOpt::ID = 0;
+}
+
+INITIALIZE_PASS(ARMPreAllocLoadStoreOpt, "arm-prera-load-store-opt",
+ ARM_PREALLOC_LOAD_STORE_OPT_NAME, false, false)
+
+bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
+ TD = &Fn.getDataLayout();
+ STI = &static_cast<const ARMSubtarget &>(Fn.getSubtarget());
+ TII = STI->getInstrInfo();
+ TRI = STI->getRegisterInfo();
+ MRI = &Fn.getRegInfo();
+ MF = &Fn;
+
+ bool Modified = false;
+ for (MachineBasicBlock &MFI : Fn)
+ Modified |= RescheduleLoadStoreInstrs(&MFI);
+
+ return Modified;
+}
+
+static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base,
+ MachineBasicBlock::iterator I,
+ MachineBasicBlock::iterator E,
+ SmallPtrSetImpl<MachineInstr*> &MemOps,
+ SmallSet<unsigned, 4> &MemRegs,
+ const TargetRegisterInfo *TRI) {
+ // Are there stores / loads / calls between them?
+ // FIXME: This is overly conservative. We should make use of alias information
+ // some day.
+ SmallSet<unsigned, 4> AddedRegPressure;
+ while (++I != E) {
+ if (I->isDebugValue() || MemOps.count(&*I))
+ continue;
+ if (I->isCall() || I->isTerminator() || I->hasUnmodeledSideEffects())
+ return false;
+ if (isLd && I->mayStore())
+ return false;
+ if (!isLd) {
+ if (I->mayLoad())
+ return false;
+ // It's not safe to move the first 'str' down.
+ // str r1, [r0]
+ // strh r5, [r0]
+ // str r4, [r0, #+4]
+ if (I->mayStore())
+ return false;
+ }
+ for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) {
+ MachineOperand &MO = I->getOperand(j);
+ if (!MO.isReg())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (MO.isDef() && TRI->regsOverlap(Reg, Base))
+ return false;
+ if (Reg != Base && !MemRegs.count(Reg))
+ AddedRegPressure.insert(Reg);
+ }
+ }
+
+ // Estimate register pressure increase due to the transformation.
+ if (MemRegs.size() <= 4)
+ // Ok if we are moving small number of instructions.
+ return true;
+ return AddedRegPressure.size() <= MemRegs.size() * 2;
+}
+
+bool
+ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1,
+ DebugLoc &dl, unsigned &NewOpc,
+ unsigned &FirstReg,
+ unsigned &SecondReg,
+ unsigned &BaseReg, int &Offset,
+ unsigned &PredReg,
+ ARMCC::CondCodes &Pred,
+ bool &isT2) {
+ // Make sure we're allowed to generate LDRD/STRD.
+ if (!STI->hasV5TEOps())
+ return false;
+
+ // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD
+ unsigned Scale = 1;
+ unsigned Opcode = Op0->getOpcode();
+ if (Opcode == ARM::LDRi12) {
+ NewOpc = ARM::LDRD;
+ } else if (Opcode == ARM::STRi12) {
+ NewOpc = ARM::STRD;
+ } else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) {
+ NewOpc = ARM::t2LDRDi8;
+ Scale = 4;
+ isT2 = true;
+ } else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) {
+ NewOpc = ARM::t2STRDi8;
+ Scale = 4;
+ isT2 = true;
+ } else {
+ return false;
+ }
+
+ // Make sure the base address satisfies i64 ld / st alignment requirement.
+ // At the moment, we ignore the memoryoperand's value.
+ // If we want to use AliasAnalysis, we should check it accordingly.
+ if (!Op0->hasOneMemOperand() ||
+ (*Op0->memoperands_begin())->isVolatile())
+ return false;
+
+ unsigned Align = (*Op0->memoperands_begin())->getAlignment();
+ const Function *Func = MF->getFunction();
+ unsigned ReqAlign = STI->hasV6Ops()
+ ? TD->getABITypeAlignment(Type::getInt64Ty(Func->getContext()))
+ : 8; // Pre-v6 need 8-byte align
+ if (Align < ReqAlign)
+ return false;
+
+ // Then make sure the immediate offset fits.
+ int OffImm = getMemoryOpOffset(Op0);
+ if (isT2) {
+ int Limit = (1 << 8) * Scale;
+ if (OffImm >= Limit || (OffImm <= -Limit) || (OffImm & (Scale-1)))
+ return false;
+ Offset = OffImm;
+ } else {
+ ARM_AM::AddrOpc AddSub = ARM_AM::add;
+ if (OffImm < 0) {
+ AddSub = ARM_AM::sub;
+ OffImm = - OffImm;
+ }
+ int Limit = (1 << 8) * Scale;
+ if (OffImm >= Limit || (OffImm & (Scale-1)))
+ return false;
+ Offset = ARM_AM::getAM3Opc(AddSub, OffImm);
+ }
+ FirstReg = Op0->getOperand(0).getReg();
+ SecondReg = Op1->getOperand(0).getReg();
+ if (FirstReg == SecondReg)
+ return false;
+ BaseReg = Op0->getOperand(1).getReg();
+ Pred = getInstrPredicate(Op0, PredReg);
+ dl = Op0->getDebugLoc();
+ return true;
+}
+
+bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
+ SmallVectorImpl<MachineInstr *> &Ops,
+ unsigned Base, bool isLd,
+ DenseMap<MachineInstr*, unsigned> &MI2LocMap) {
+ bool RetVal = false;
+
+ // Sort by offset (in reverse order).
+ std::sort(Ops.begin(), Ops.end(),
+ [](const MachineInstr *LHS, const MachineInstr *RHS) {
+ int LOffset = getMemoryOpOffset(LHS);
+ int ROffset = getMemoryOpOffset(RHS);
+ assert(LHS == RHS || LOffset != ROffset);
+ return LOffset > ROffset;
+ });
+
+ // The loads / stores of the same base are in order. Scan them from first to
+ // last and check for the following:
+ // 1. Any def of base.
+ // 2. Any gaps.
+ while (Ops.size() > 1) {
+ unsigned FirstLoc = ~0U;
+ unsigned LastLoc = 0;
+ MachineInstr *FirstOp = nullptr;
+ MachineInstr *LastOp = nullptr;
+ int LastOffset = 0;
+ unsigned LastOpcode = 0;
+ unsigned LastBytes = 0;
+ unsigned NumMove = 0;
+ for (int i = Ops.size() - 1; i >= 0; --i) {
+ MachineInstr *Op = Ops[i];
+ unsigned Loc = MI2LocMap[Op];
+ if (Loc <= FirstLoc) {
+ FirstLoc = Loc;
+ FirstOp = Op;
+ }
+ if (Loc >= LastLoc) {
+ LastLoc = Loc;
+ LastOp = Op;
+ }
+
+ unsigned LSMOpcode
+ = getLoadStoreMultipleOpcode(Op->getOpcode(), ARM_AM::ia);
+ if (LastOpcode && LSMOpcode != LastOpcode)
+ break;
+
+ int Offset = getMemoryOpOffset(Op);
+ unsigned Bytes = getLSMultipleTransferSize(Op);
+ if (LastBytes) {
+ if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes))
+ break;
+ }
+ LastOffset = Offset;
+ LastBytes = Bytes;
+ LastOpcode = LSMOpcode;
+ if (++NumMove == 8) // FIXME: Tune this limit.
+ break;
+ }
+
+ if (NumMove <= 1)
+ Ops.pop_back();
+ else {
+ SmallPtrSet<MachineInstr*, 4> MemOps;
+ SmallSet<unsigned, 4> MemRegs;
+ for (int i = NumMove-1; i >= 0; --i) {
+ MemOps.insert(Ops[i]);
+ MemRegs.insert(Ops[i]->getOperand(0).getReg());
+ }
+
+ // Be conservative, if the instructions are too far apart, don't
+ // move them. We want to limit the increase of register pressure.
+ bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this.
+ if (DoMove)
+ DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp,
+ MemOps, MemRegs, TRI);
+ if (!DoMove) {
+ for (unsigned i = 0; i != NumMove; ++i)
+ Ops.pop_back();
+ } else {
+ // This is the new location for the loads / stores.
+ MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp;
+ while (InsertPos != MBB->end()
+ && (MemOps.count(InsertPos) || InsertPos->isDebugValue()))
+ ++InsertPos;
+
+ // If we are moving a pair of loads / stores, see if it makes sense
+ // to try to allocate a pair of registers that can form register pairs.
+ MachineInstr *Op0 = Ops.back();
+ MachineInstr *Op1 = Ops[Ops.size()-2];
+ unsigned FirstReg = 0, SecondReg = 0;
+ unsigned BaseReg = 0, PredReg = 0;
+ ARMCC::CondCodes Pred = ARMCC::AL;
+ bool isT2 = false;
+ unsigned NewOpc = 0;
+ int Offset = 0;
+ DebugLoc dl;
+ if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc,
+ FirstReg, SecondReg, BaseReg,
+ Offset, PredReg, Pred, isT2)) {
+ Ops.pop_back();
+ Ops.pop_back();
+
+ const MCInstrDesc &MCID = TII->get(NewOpc);
+ const TargetRegisterClass *TRC = TII->getRegClass(MCID, 0, TRI, *MF);
+ MRI->constrainRegClass(FirstReg, TRC);
+ MRI->constrainRegClass(SecondReg, TRC);
+
+ // Form the pair instruction.
+ if (isLd) {
+ MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
+ .addReg(FirstReg, RegState::Define)
+ .addReg(SecondReg, RegState::Define)
+ .addReg(BaseReg);
+ // FIXME: We're converting from LDRi12 to an insn that still
+ // uses addrmode2, so we need an explicit offset reg. It should
+ // always by reg0 since we're transforming LDRi12s.
+ if (!isT2)
+ MIB.addReg(0);
+ MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
+ MIB.setMemRefs(Op0->mergeMemRefsWith(*Op1));
+ DEBUG(dbgs() << "Formed " << *MIB << "\n");
+ ++NumLDRDFormed;
+ } else {
+ MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
+ .addReg(FirstReg)
+ .addReg(SecondReg)
+ .addReg(BaseReg);
+ // FIXME: We're converting from LDRi12 to an insn that still
+ // uses addrmode2, so we need an explicit offset reg. It should
+ // always by reg0 since we're transforming STRi12s.
+ if (!isT2)
+ MIB.addReg(0);
+ MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
+ MIB.setMemRefs(Op0->mergeMemRefsWith(*Op1));
+ DEBUG(dbgs() << "Formed " << *MIB << "\n");
+ ++NumSTRDFormed;
+ }
+ MBB->erase(Op0);
+ MBB->erase(Op1);
+
+ if (!isT2) {
+ // Add register allocation hints to form register pairs.
+ MRI->setRegAllocationHint(FirstReg, ARMRI::RegPairEven, SecondReg);
+ MRI->setRegAllocationHint(SecondReg, ARMRI::RegPairOdd, FirstReg);
+ }
+ } else {
+ for (unsigned i = 0; i != NumMove; ++i) {
+ MachineInstr *Op = Ops.back();
+ Ops.pop_back();
+ MBB->splice(InsertPos, MBB, Op);
+ }
+ }
+
+ NumLdStMoved += NumMove;
+ RetVal = true;
+ }
+ }
+ }
+
+ return RetVal;
+}
+
+bool
+ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
+ bool RetVal = false;
+
+ DenseMap<MachineInstr*, unsigned> MI2LocMap;
+ DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2LdsMap;
+ DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2StsMap;
+ SmallVector<unsigned, 4> LdBases;
+ SmallVector<unsigned, 4> StBases;
+
+ unsigned Loc = 0;
+ MachineBasicBlock::iterator MBBI = MBB->begin();
+ MachineBasicBlock::iterator E = MBB->end();
+ while (MBBI != E) {
+ for (; MBBI != E; ++MBBI) {
+ MachineInstr *MI = MBBI;
+ if (MI->isCall() || MI->isTerminator()) {
+ // Stop at barriers.
+ ++MBBI;
+ break;
+ }
+
+ if (!MI->isDebugValue())
+ MI2LocMap[MI] = ++Loc;
+
+ if (!isMemoryOp(*MI))
+ continue;
+ unsigned PredReg = 0;
+ if (getInstrPredicate(MI, PredReg) != ARMCC::AL)
+ continue;
+
+ int Opc = MI->getOpcode();
+ bool isLd = isLoadSingle(Opc);
+ unsigned Base = MI->getOperand(1).getReg();
+ int Offset = getMemoryOpOffset(MI);
+
+ bool StopHere = false;
+ if (isLd) {
+ DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
+ Base2LdsMap.find(Base);
+ if (BI != Base2LdsMap.end()) {
+ for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
+ if (Offset == getMemoryOpOffset(BI->second[i])) {
+ StopHere = true;
+ break;
+ }
+ }
+ if (!StopHere)
+ BI->second.push_back(MI);
+ } else {
+ Base2LdsMap[Base].push_back(MI);
+ LdBases.push_back(Base);
+ }
+ } else {
+ DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
+ Base2StsMap.find(Base);
+ if (BI != Base2StsMap.end()) {
+ for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
+ if (Offset == getMemoryOpOffset(BI->second[i])) {
+ StopHere = true;
+ break;
+ }
+ }
+ if (!StopHere)
+ BI->second.push_back(MI);
+ } else {
+ Base2StsMap[Base].push_back(MI);
+ StBases.push_back(Base);
+ }
+ }
+
+ if (StopHere) {
+ // Found a duplicate (a base+offset combination that's seen earlier).
+ // Backtrack.
+ --Loc;
+ break;
+ }
+ }
+
+ // Re-schedule loads.
+ for (unsigned i = 0, e = LdBases.size(); i != e; ++i) {
+ unsigned Base = LdBases[i];
+ SmallVectorImpl<MachineInstr *> &Lds = Base2LdsMap[Base];
+ if (Lds.size() > 1)
+ RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap);
+ }
+
+ // Re-schedule stores.
+ for (unsigned i = 0, e = StBases.size(); i != e; ++i) {
+ unsigned Base = StBases[i];
+ SmallVectorImpl<MachineInstr *> &Sts = Base2StsMap[Base];
+ if (Sts.size() > 1)
+ RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap);
+ }
+
+ if (MBBI != E) {
+ Base2LdsMap.clear();
+ Base2StsMap.clear();
+ LdBases.clear();
+ StBases.clear();
+ }
+ }
+
+ return RetVal;
+}
+
+
+/// Returns an instance of the load / store optimization pass.
+FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) {
+ if (PreAlloc)
+ return new ARMPreAllocLoadStoreOpt();
+ return new ARMLoadStoreOpt();
+}
+
diff --git a/contrib/llvm/lib/Target/ARM/ARMMCInstLower.cpp b/contrib/llvm/lib/Target/ARM/ARMMCInstLower.cpp
new file mode 100644
index 0000000..a2aca2d
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMMCInstLower.cpp
@@ -0,0 +1,163 @@
+//===-- ARMMCInstLower.cpp - Convert ARM MachineInstr to an MCInst --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains code to lower ARM MachineInstrs to their corresponding
+// MCInst records.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARM.h"
+#include "ARMAsmPrinter.h"
+#include "MCTargetDesc/ARMBaseInfo.h"
+#include "MCTargetDesc/ARMMCExpr.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Mangler.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+using namespace llvm;
+
+
+MCOperand ARMAsmPrinter::GetSymbolRef(const MachineOperand &MO,
+ const MCSymbol *Symbol) {
+ const MCExpr *Expr;
+ unsigned Option = MO.getTargetFlags() & ARMII::MO_OPTION_MASK;
+ switch (Option) {
+ default: {
+ Expr = MCSymbolRefExpr::create(Symbol, MCSymbolRefExpr::VK_None,
+ OutContext);
+ switch (Option) {
+ default: llvm_unreachable("Unknown target flag on symbol operand");
+ case ARMII::MO_NO_FLAG:
+ break;
+ case ARMII::MO_LO16:
+ Expr = MCSymbolRefExpr::create(Symbol, MCSymbolRefExpr::VK_None,
+ OutContext);
+ Expr = ARMMCExpr::createLower16(Expr, OutContext);
+ break;
+ case ARMII::MO_HI16:
+ Expr = MCSymbolRefExpr::create(Symbol, MCSymbolRefExpr::VK_None,
+ OutContext);
+ Expr = ARMMCExpr::createUpper16(Expr, OutContext);
+ break;
+ }
+ break;
+ }
+
+ case ARMII::MO_PLT:
+ Expr = MCSymbolRefExpr::create(Symbol, MCSymbolRefExpr::VK_PLT,
+ OutContext);
+ break;
+ }
+
+ if (!MO.isJTI() && MO.getOffset())
+ Expr = MCBinaryExpr::createAdd(Expr,
+ MCConstantExpr::create(MO.getOffset(),
+ OutContext),
+ OutContext);
+ return MCOperand::createExpr(Expr);
+
+}
+
+bool ARMAsmPrinter::lowerOperand(const MachineOperand &MO,
+ MCOperand &MCOp) {
+ switch (MO.getType()) {
+ default: llvm_unreachable("unknown operand type");
+ case MachineOperand::MO_Register:
+ // Ignore all non-CPSR implicit register operands.
+ if (MO.isImplicit() && MO.getReg() != ARM::CPSR)
+ return false;
+ assert(!MO.getSubReg() && "Subregs should be eliminated!");
+ MCOp = MCOperand::createReg(MO.getReg());
+ break;
+ case MachineOperand::MO_Immediate:
+ MCOp = MCOperand::createImm(MO.getImm());
+ break;
+ case MachineOperand::MO_MachineBasicBlock:
+ MCOp = MCOperand::createExpr(MCSymbolRefExpr::create(
+ MO.getMBB()->getSymbol(), OutContext));
+ break;
+ case MachineOperand::MO_GlobalAddress: {
+ MCOp = GetSymbolRef(MO,
+ GetARMGVSymbol(MO.getGlobal(), MO.getTargetFlags()));
+ break;
+ }
+ case MachineOperand::MO_ExternalSymbol:
+ MCOp = GetSymbolRef(MO,
+ GetExternalSymbolSymbol(MO.getSymbolName()));
+ break;
+ case MachineOperand::MO_JumpTableIndex:
+ MCOp = GetSymbolRef(MO, GetJTISymbol(MO.getIndex()));
+ break;
+ case MachineOperand::MO_ConstantPoolIndex:
+ MCOp = GetSymbolRef(MO, GetCPISymbol(MO.getIndex()));
+ break;
+ case MachineOperand::MO_BlockAddress:
+ MCOp = GetSymbolRef(MO, GetBlockAddressSymbol(MO.getBlockAddress()));
+ break;
+ case MachineOperand::MO_FPImmediate: {
+ APFloat Val = MO.getFPImm()->getValueAPF();
+ bool ignored;
+ Val.convert(APFloat::IEEEdouble, APFloat::rmTowardZero, &ignored);
+ MCOp = MCOperand::createFPImm(Val.convertToDouble());
+ break;
+ }
+ case MachineOperand::MO_RegisterMask:
+ // Ignore call clobbers.
+ return false;
+ }
+ return true;
+}
+
+void llvm::LowerARMMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
+ ARMAsmPrinter &AP) {
+ OutMI.setOpcode(MI->getOpcode());
+
+ // In the MC layer, we keep modified immediates in their encoded form
+ bool EncodeImms = false;
+ switch (MI->getOpcode()) {
+ default: break;
+ case ARM::MOVi:
+ case ARM::MVNi:
+ case ARM::CMPri:
+ case ARM::CMNri:
+ case ARM::TSTri:
+ case ARM::TEQri:
+ case ARM::MSRi:
+ case ARM::ADCri:
+ case ARM::ADDri:
+ case ARM::ADDSri:
+ case ARM::SBCri:
+ case ARM::SUBri:
+ case ARM::SUBSri:
+ case ARM::ANDri:
+ case ARM::ORRri:
+ case ARM::EORri:
+ case ARM::BICri:
+ case ARM::RSBri:
+ case ARM::RSBSri:
+ case ARM::RSCri:
+ EncodeImms = true;
+ break;
+ }
+
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+
+ MCOperand MCOp;
+ if (AP.lowerOperand(MO, MCOp)) {
+ if (MCOp.isImm() && EncodeImms) {
+ int32_t Enc = ARM_AM::getSOImmVal(MCOp.getImm());
+ if (Enc != -1)
+ MCOp.setImm(Enc);
+ }
+ OutMI.addOperand(MCOp);
+ }
+ }
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.cpp
new file mode 100644
index 0000000..ac0330f
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.cpp
@@ -0,0 +1,23 @@
+//===-- ARMMachineFunctionInfo.cpp - ARM machine function info ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARMMachineFunctionInfo.h"
+
+using namespace llvm;
+
+void ARMFunctionInfo::anchor() { }
+
+ARMFunctionInfo::ARMFunctionInfo(MachineFunction &MF)
+ : isThumb(MF.getSubtarget<ARMSubtarget>().isThumb()),
+ hasThumb2(MF.getSubtarget<ARMSubtarget>().hasThumb2()),
+ StByValParamsPadding(0), ArgRegsSaveSize(0), HasStackFrame(false),
+ RestoreSPFromFP(false), LRSpilledForFarJump(false),
+ FramePtrSpillOffset(0), GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0),
+ GPRCS1Size(0), GPRCS2Size(0), DPRCSSize(0),
+ PICLabelUId(0), VarArgsFrameIndex(0), HasITBlocks(false) {}
diff --git a/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h b/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h
new file mode 100644
index 0000000..d644797
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h
@@ -0,0 +1,226 @@
+//===-- ARMMachineFunctionInfo.h - ARM machine function info ----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares ARM-specific per-machine-function information.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_ARM_ARMMACHINEFUNCTIONINFO_H
+#define LLVM_LIB_TARGET_ARM_ARMMACHINEFUNCTIONINFO_H
+
+#include "ARMSubtarget.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+namespace llvm {
+
+/// ARMFunctionInfo - This class is derived from MachineFunctionInfo and
+/// contains private ARM-specific information for each MachineFunction.
+class ARMFunctionInfo : public MachineFunctionInfo {
+ virtual void anchor();
+
+ /// isThumb - True if this function is compiled under Thumb mode.
+ /// Used to initialized Align, so must precede it.
+ bool isThumb;
+
+ /// hasThumb2 - True if the target architecture supports Thumb2. Do not use
+ /// to determine if function is compiled under Thumb mode, for that use
+ /// 'isThumb'.
+ bool hasThumb2;
+
+ /// StByValParamsPadding - For parameter that is split between
+ /// GPRs and memory; while recovering GPRs part, when
+ /// StackAlignment > 4, and GPRs-part-size mod StackAlignment != 0,
+ /// we need to insert gap before parameter start address. It allows to
+ /// "attach" GPR-part to the part that was passed via stack.
+ unsigned StByValParamsPadding;
+
+ /// VarArgsRegSaveSize - Size of the register save area for vararg functions.
+ ///
+ unsigned ArgRegsSaveSize;
+
+ /// ReturnRegsCount - Number of registers used up in the return.
+ unsigned ReturnRegsCount;
+
+ /// HasStackFrame - True if this function has a stack frame. Set by
+ /// determineCalleeSaves().
+ bool HasStackFrame;
+
+ /// RestoreSPFromFP - True if epilogue should restore SP from FP. Set by
+ /// emitPrologue.
+ bool RestoreSPFromFP;
+
+ /// LRSpilledForFarJump - True if the LR register has been for spilled to
+ /// enable far jump.
+ bool LRSpilledForFarJump;
+
+ /// FramePtrSpillOffset - If HasStackFrame, this records the frame pointer
+ /// spill stack offset.
+ unsigned FramePtrSpillOffset;
+
+ /// GPRCS1Offset, GPRCS2Offset, DPRCSOffset - Starting offset of callee saved
+ /// register spills areas. For Mac OS X:
+ ///
+ /// GPR callee-saved (1) : r4, r5, r6, r7, lr
+ /// --------------------------------------------
+ /// GPR callee-saved (2) : r8, r10, r11
+ /// --------------------------------------------
+ /// DPR callee-saved : d8 - d15
+ ///
+ /// Also see AlignedDPRCSRegs below. Not all D-regs need to go in area 3.
+ /// Some may be spilled after the stack has been realigned.
+ unsigned GPRCS1Offset;
+ unsigned GPRCS2Offset;
+ unsigned DPRCSOffset;
+
+ /// GPRCS1Size, GPRCS2Size, DPRCSSize - Sizes of callee saved register spills
+ /// areas.
+ unsigned GPRCS1Size;
+ unsigned GPRCS2Size;
+ unsigned DPRCSAlignGapSize;
+ unsigned DPRCSSize;
+
+ /// NumAlignedDPRCS2Regs - The number of callee-saved DPRs that are saved in
+ /// the aligned portion of the stack frame. This is always a contiguous
+ /// sequence of D-registers starting from d8.
+ ///
+ /// We do not keep track of the frame indices used for these registers - they
+ /// behave like any other frame index in the aligned stack frame. These
+ /// registers also aren't included in DPRCSSize above.
+ unsigned NumAlignedDPRCS2Regs;
+
+ unsigned PICLabelUId;
+
+ /// VarArgsFrameIndex - FrameIndex for start of varargs area.
+ int VarArgsFrameIndex;
+
+ /// HasITBlocks - True if IT blocks have been inserted.
+ bool HasITBlocks;
+
+ /// CPEClones - Track constant pool entries clones created by Constant Island
+ /// pass.
+ DenseMap<unsigned, unsigned> CPEClones;
+
+ /// ArgumentStackSize - amount of bytes on stack consumed by the arguments
+ /// being passed on the stack
+ unsigned ArgumentStackSize;
+
+ /// CoalescedWeights - mapping of basic blocks to the rolling counter of
+ /// coalesced weights.
+ DenseMap<const MachineBasicBlock*, unsigned> CoalescedWeights;
+
+public:
+ ARMFunctionInfo() :
+ isThumb(false),
+ hasThumb2(false),
+ ArgRegsSaveSize(0), ReturnRegsCount(0), HasStackFrame(false),
+ RestoreSPFromFP(false),
+ LRSpilledForFarJump(false),
+ FramePtrSpillOffset(0), GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0),
+ GPRCS1Size(0), GPRCS2Size(0), DPRCSAlignGapSize(0), DPRCSSize(0),
+ NumAlignedDPRCS2Regs(0), PICLabelUId(0),
+ VarArgsFrameIndex(0), HasITBlocks(false) {}
+
+ explicit ARMFunctionInfo(MachineFunction &MF);
+
+ bool isThumbFunction() const { return isThumb; }
+ bool isThumb1OnlyFunction() const { return isThumb && !hasThumb2; }
+ bool isThumb2Function() const { return isThumb && hasThumb2; }
+
+ unsigned getStoredByValParamsPadding() const { return StByValParamsPadding; }
+ void setStoredByValParamsPadding(unsigned p) { StByValParamsPadding = p; }
+
+ unsigned getArgRegsSaveSize() const { return ArgRegsSaveSize; }
+ void setArgRegsSaveSize(unsigned s) { ArgRegsSaveSize = s; }
+
+ unsigned getReturnRegsCount() const { return ReturnRegsCount; }
+ void setReturnRegsCount(unsigned s) { ReturnRegsCount = s; }
+
+ bool hasStackFrame() const { return HasStackFrame; }
+ void setHasStackFrame(bool s) { HasStackFrame = s; }
+
+ bool shouldRestoreSPFromFP() const { return RestoreSPFromFP; }
+ void setShouldRestoreSPFromFP(bool s) { RestoreSPFromFP = s; }
+
+ bool isLRSpilledForFarJump() const { return LRSpilledForFarJump; }
+ void setLRIsSpilledForFarJump(bool s) { LRSpilledForFarJump = s; }
+
+ unsigned getFramePtrSpillOffset() const { return FramePtrSpillOffset; }
+ void setFramePtrSpillOffset(unsigned o) { FramePtrSpillOffset = o; }
+
+ unsigned getNumAlignedDPRCS2Regs() const { return NumAlignedDPRCS2Regs; }
+ void setNumAlignedDPRCS2Regs(unsigned n) { NumAlignedDPRCS2Regs = n; }
+
+ unsigned getGPRCalleeSavedArea1Offset() const { return GPRCS1Offset; }
+ unsigned getGPRCalleeSavedArea2Offset() const { return GPRCS2Offset; }
+ unsigned getDPRCalleeSavedAreaOffset() const { return DPRCSOffset; }
+
+ void setGPRCalleeSavedArea1Offset(unsigned o) { GPRCS1Offset = o; }
+ void setGPRCalleeSavedArea2Offset(unsigned o) { GPRCS2Offset = o; }
+ void setDPRCalleeSavedAreaOffset(unsigned o) { DPRCSOffset = o; }
+
+ unsigned getGPRCalleeSavedArea1Size() const { return GPRCS1Size; }
+ unsigned getGPRCalleeSavedArea2Size() const { return GPRCS2Size; }
+ unsigned getDPRCalleeSavedGapSize() const { return DPRCSAlignGapSize; }
+ unsigned getDPRCalleeSavedAreaSize() const { return DPRCSSize; }
+
+ void setGPRCalleeSavedArea1Size(unsigned s) { GPRCS1Size = s; }
+ void setGPRCalleeSavedArea2Size(unsigned s) { GPRCS2Size = s; }
+ void setDPRCalleeSavedGapSize(unsigned s) { DPRCSAlignGapSize = s; }
+ void setDPRCalleeSavedAreaSize(unsigned s) { DPRCSSize = s; }
+
+ unsigned getArgumentStackSize() const { return ArgumentStackSize; }
+ void setArgumentStackSize(unsigned size) { ArgumentStackSize = size; }
+
+ void initPICLabelUId(unsigned UId) {
+ PICLabelUId = UId;
+ }
+
+ unsigned getNumPICLabels() const {
+ return PICLabelUId;
+ }
+
+ unsigned createPICLabelUId() {
+ return PICLabelUId++;
+ }
+
+ int getVarArgsFrameIndex() const { return VarArgsFrameIndex; }
+ void setVarArgsFrameIndex(int Index) { VarArgsFrameIndex = Index; }
+
+ bool hasITBlocks() const { return HasITBlocks; }
+ void setHasITBlocks(bool h) { HasITBlocks = h; }
+
+ void recordCPEClone(unsigned CPIdx, unsigned CPCloneIdx) {
+ if (!CPEClones.insert(std::make_pair(CPCloneIdx, CPIdx)).second)
+ llvm_unreachable("Duplicate entries!");
+ }
+
+ unsigned getOriginalCPIdx(unsigned CloneIdx) const {
+ DenseMap<unsigned, unsigned>::const_iterator I = CPEClones.find(CloneIdx);
+ if (I != CPEClones.end())
+ return I->second;
+ else
+ return -1U;
+ }
+
+ DenseMap<const MachineBasicBlock*, unsigned>::iterator getCoalescedWeight(
+ MachineBasicBlock* MBB) {
+ auto It = CoalescedWeights.find(MBB);
+ if (It == CoalescedWeights.end()) {
+ It = CoalescedWeights.insert(std::make_pair(MBB, 0)).first;
+ }
+ return It;
+ }
+};
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/ARM/ARMOptimizeBarriersPass.cpp b/contrib/llvm/lib/Target/ARM/ARMOptimizeBarriersPass.cpp
new file mode 100644
index 0000000..30baf42
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMOptimizeBarriersPass.cpp
@@ -0,0 +1,99 @@
+//===-- ARMOptimizeBarriersPass - two DMBs without a memory access in between,
+//removed one -===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===------------------------------------------------------------------------------------------===//
+
+#include "ARM.h"
+#include "ARMInstrInfo.h"
+#include "ARMMachineFunctionInfo.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "double barriers"
+
+STATISTIC(NumDMBsRemoved, "Number of DMBs removed");
+
+namespace {
+class ARMOptimizeBarriersPass : public MachineFunctionPass {
+public:
+ static char ID;
+ ARMOptimizeBarriersPass() : MachineFunctionPass(ID) {}
+
+ bool runOnMachineFunction(MachineFunction &Fn) override;
+
+ const char *getPassName() const override {
+ return "optimise barriers pass";
+ }
+};
+char ARMOptimizeBarriersPass::ID = 0;
+}
+
+// Returns whether the instruction can safely move past a DMB instruction
+// The current implementation allows this iif MI does not have any possible
+// memory access
+static bool CanMovePastDMB(const MachineInstr *MI) {
+ return !(MI->mayLoad() ||
+ MI->mayStore() ||
+ MI->hasUnmodeledSideEffects() ||
+ MI->isCall() ||
+ MI->isReturn());
+}
+
+bool ARMOptimizeBarriersPass::runOnMachineFunction(MachineFunction &MF) {
+ // Vector to store the DMBs we will remove after the first iteration
+ std::vector<MachineInstr *> ToRemove;
+ // DMBType is the Imm value of the first operand. It determines whether it's a
+ // DMB ish, dmb sy, dmb osh, etc
+ int64_t DMBType = -1;
+
+ // Find a dmb. If we can move it until the next dmb, tag the second one for
+ // removal
+ for (auto &MBB : MF) {
+ // Will be true when we have seen a DMB, and not seen any instruction since
+ // that cannot move past a DMB
+ bool IsRemovableNextDMB = false;
+ for (auto &MI : MBB) {
+ if (MI.getOpcode() == ARM::DMB) {
+ if (IsRemovableNextDMB) {
+ // If the Imm of this DMB is the same as that of the last DMB, we can
+ // tag this second DMB for removal
+ if (MI.getOperand(0).getImm() == DMBType) {
+ ToRemove.push_back(&MI);
+ } else {
+ // If it has a different DMBType, we cannot remove it, but will scan
+ // for the next DMB, recording this DMB's type as last seen DMB type
+ DMBType = MI.getOperand(0).getImm();
+ }
+ } else {
+ // After we see a DMB, a next one is removable
+ IsRemovableNextDMB = true;
+ DMBType = MI.getOperand(0).getImm();
+ }
+ } else if (!CanMovePastDMB(&MI)) {
+ // If we find an instruction unable to pass past a DMB, a next DMB is
+ // not removable
+ IsRemovableNextDMB = false;
+ }
+ }
+ }
+ // Remove the tagged DMB
+ for (auto MI : ToRemove) {
+ MI->eraseFromParent();
+ ++NumDMBsRemoved;
+ }
+
+ return NumDMBsRemoved > 0;
+}
+
+/// createARMOptimizeBarriersPass - Returns an instance of the remove double
+/// barriers
+/// pass.
+FunctionPass *llvm::createARMOptimizeBarriersPass() {
+ return new ARMOptimizeBarriersPass();
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMPerfectShuffle.h b/contrib/llvm/lib/Target/ARM/ARMPerfectShuffle.h
new file mode 100644
index 0000000..3ff0bee
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMPerfectShuffle.h
@@ -0,0 +1,6591 @@
+//===-- ARMPerfectShuffle.h - NEON Perfect Shuffle Table --------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file, which was autogenerated by llvm-PerfectShuffle, contains data
+// for the optimal way to build a perfect shuffle using neon instructions.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_ARM_ARMPERFECTSHUFFLE_H
+#define LLVM_LIB_TARGET_ARM_ARMPERFECTSHUFFLE_H
+
+// 31 entries have cost 0
+// 242 entries have cost 1
+// 1447 entries have cost 2
+// 3602 entries have cost 3
+// 1237 entries have cost 4
+// 2 entries have cost 5
+
+// This table is 6561*4 = 26244 bytes in size.
+static const unsigned PerfectShuffleTable[6561+1] = {
+ 135053414U, // <0,0,0,0>: Cost 1 vdup0 LHS
+ 1543503974U, // <0,0,0,1>: Cost 2 vext2 <0,0,0,0>, LHS
+ 2618572962U, // <0,0,0,2>: Cost 3 vext2 <0,2,0,0>, <0,2,0,0>
+ 2568054923U, // <0,0,0,3>: Cost 3 vext1 <3,0,0,0>, <3,0,0,0>
+ 1476398390U, // <0,0,0,4>: Cost 2 vext1 <0,0,0,0>, RHS
+ 2550140624U, // <0,0,0,5>: Cost 3 vext1 <0,0,0,0>, <5,1,7,3>
+ 2550141434U, // <0,0,0,6>: Cost 3 vext1 <0,0,0,0>, <6,2,7,3>
+ 2591945711U, // <0,0,0,7>: Cost 3 vext1 <7,0,0,0>, <7,0,0,0>
+ 135053414U, // <0,0,0,u>: Cost 1 vdup0 LHS
+ 2886516736U, // <0,0,1,0>: Cost 3 vzipl LHS, <0,0,0,0>
+ 1812775014U, // <0,0,1,1>: Cost 2 vzipl LHS, LHS
+ 1618133094U, // <0,0,1,2>: Cost 2 vext3 <1,2,3,0>, LHS
+ 2625209292U, // <0,0,1,3>: Cost 3 vext2 <1,3,0,0>, <1,3,0,0>
+ 2886558034U, // <0,0,1,4>: Cost 3 vzipl LHS, <0,4,1,5>
+ 2617246864U, // <0,0,1,5>: Cost 3 vext2 <0,0,0,0>, <1,5,3,7>
+ 3659723031U, // <0,0,1,6>: Cost 4 vext1 <6,0,0,1>, <6,0,0,1>
+ 2591953904U, // <0,0,1,7>: Cost 3 vext1 <7,0,0,1>, <7,0,0,1>
+ 1812775581U, // <0,0,1,u>: Cost 2 vzipl LHS, LHS
+ 3020734464U, // <0,0,2,0>: Cost 3 vtrnl LHS, <0,0,0,0>
+ 3020734474U, // <0,0,2,1>: Cost 3 vtrnl LHS, <0,0,1,1>
+ 1946992742U, // <0,0,2,2>: Cost 2 vtrnl LHS, LHS
+ 2631181989U, // <0,0,2,3>: Cost 3 vext2 <2,3,0,0>, <2,3,0,0>
+ 3020734668U, // <0,0,2,4>: Cost 3 vtrnl LHS, <0,2,4,6>
+ 3826550569U, // <0,0,2,5>: Cost 4 vuzpl <0,2,0,2>, <2,4,5,6>
+ 2617247674U, // <0,0,2,6>: Cost 3 vext2 <0,0,0,0>, <2,6,3,7>
+ 2591962097U, // <0,0,2,7>: Cost 3 vext1 <7,0,0,2>, <7,0,0,2>
+ 1946992796U, // <0,0,2,u>: Cost 2 vtrnl LHS, LHS
+ 2635163787U, // <0,0,3,0>: Cost 3 vext2 <3,0,0,0>, <3,0,0,0>
+ 2686419196U, // <0,0,3,1>: Cost 3 vext3 <0,3,1,0>, <0,3,1,0>
+ 2686492933U, // <0,0,3,2>: Cost 3 vext3 <0,3,2,0>, <0,3,2,0>
+ 2617248156U, // <0,0,3,3>: Cost 3 vext2 <0,0,0,0>, <3,3,3,3>
+ 2617248258U, // <0,0,3,4>: Cost 3 vext2 <0,0,0,0>, <3,4,5,6>
+ 3826551298U, // <0,0,3,5>: Cost 4 vuzpl <0,2,0,2>, <3,4,5,6>
+ 3690990200U, // <0,0,3,6>: Cost 4 vext2 <0,0,0,0>, <3,6,0,7>
+ 3713551042U, // <0,0,3,7>: Cost 4 vext2 <3,7,0,0>, <3,7,0,0>
+ 2635163787U, // <0,0,3,u>: Cost 3 vext2 <3,0,0,0>, <3,0,0,0>
+ 2617248658U, // <0,0,4,0>: Cost 3 vext2 <0,0,0,0>, <4,0,5,1>
+ 2888450150U, // <0,0,4,1>: Cost 3 vzipl <0,4,1,5>, LHS
+ 3021570150U, // <0,0,4,2>: Cost 3 vtrnl <0,2,4,6>, LHS
+ 3641829519U, // <0,0,4,3>: Cost 4 vext1 <3,0,0,4>, <3,0,0,4>
+ 3021570252U, // <0,0,4,4>: Cost 3 vtrnl <0,2,4,6>, <0,2,4,6>
+ 1543507254U, // <0,0,4,5>: Cost 2 vext2 <0,0,0,0>, RHS
+ 2752810294U, // <0,0,4,6>: Cost 3 vuzpl <0,2,0,2>, RHS
+ 3786998152U, // <0,0,4,7>: Cost 4 vext3 <4,7,5,0>, <0,4,7,5>
+ 1543507497U, // <0,0,4,u>: Cost 2 vext2 <0,0,0,0>, RHS
+ 2684354972U, // <0,0,5,0>: Cost 3 vext3 <0,0,0,0>, <0,5,0,7>
+ 2617249488U, // <0,0,5,1>: Cost 3 vext2 <0,0,0,0>, <5,1,7,3>
+ 3765617070U, // <0,0,5,2>: Cost 4 vext3 <1,2,3,0>, <0,5,2,7>
+ 3635865780U, // <0,0,5,3>: Cost 4 vext1 <2,0,0,5>, <3,0,4,5>
+ 2617249734U, // <0,0,5,4>: Cost 3 vext2 <0,0,0,0>, <5,4,7,6>
+ 2617249796U, // <0,0,5,5>: Cost 3 vext2 <0,0,0,0>, <5,5,5,5>
+ 2718712274U, // <0,0,5,6>: Cost 3 vext3 <5,6,7,0>, <0,5,6,7>
+ 2617249960U, // <0,0,5,7>: Cost 3 vext2 <0,0,0,0>, <5,7,5,7>
+ 2720039396U, // <0,0,5,u>: Cost 3 vext3 <5,u,7,0>, <0,5,u,7>
+ 2684355053U, // <0,0,6,0>: Cost 3 vext3 <0,0,0,0>, <0,6,0,7>
+ 3963609190U, // <0,0,6,1>: Cost 4 vzipl <0,6,2,7>, LHS
+ 2617250298U, // <0,0,6,2>: Cost 3 vext2 <0,0,0,0>, <6,2,7,3>
+ 3796435464U, // <0,0,6,3>: Cost 4 vext3 <6,3,7,0>, <0,6,3,7>
+ 3659762998U, // <0,0,6,4>: Cost 4 vext1 <6,0,0,6>, RHS
+ 3659763810U, // <0,0,6,5>: Cost 4 vext1 <6,0,0,6>, <5,6,7,0>
+ 2617250616U, // <0,0,6,6>: Cost 3 vext2 <0,0,0,0>, <6,6,6,6>
+ 2657727309U, // <0,0,6,7>: Cost 3 vext2 <6,7,0,0>, <6,7,0,0>
+ 2658390942U, // <0,0,6,u>: Cost 3 vext2 <6,u,0,0>, <6,u,0,0>
+ 2659054575U, // <0,0,7,0>: Cost 3 vext2 <7,0,0,0>, <7,0,0,0>
+ 3635880854U, // <0,0,7,1>: Cost 4 vext1 <2,0,0,7>, <1,2,3,0>
+ 3635881401U, // <0,0,7,2>: Cost 4 vext1 <2,0,0,7>, <2,0,0,7>
+ 3734787298U, // <0,0,7,3>: Cost 4 vext2 <7,3,0,0>, <7,3,0,0>
+ 2617251174U, // <0,0,7,4>: Cost 3 vext2 <0,0,0,0>, <7,4,5,6>
+ 3659772002U, // <0,0,7,5>: Cost 4 vext1 <6,0,0,7>, <5,6,7,0>
+ 3659772189U, // <0,0,7,6>: Cost 4 vext1 <6,0,0,7>, <6,0,0,7>
+ 2617251436U, // <0,0,7,7>: Cost 3 vext2 <0,0,0,0>, <7,7,7,7>
+ 2659054575U, // <0,0,7,u>: Cost 3 vext2 <7,0,0,0>, <7,0,0,0>
+ 135053414U, // <0,0,u,0>: Cost 1 vdup0 LHS
+ 1817419878U, // <0,0,u,1>: Cost 2 vzipl LHS, LHS
+ 1947435110U, // <0,0,u,2>: Cost 2 vtrnl LHS, LHS
+ 2568120467U, // <0,0,u,3>: Cost 3 vext1 <3,0,0,u>, <3,0,0,u>
+ 1476463926U, // <0,0,u,4>: Cost 2 vext1 <0,0,0,u>, RHS
+ 1543510170U, // <0,0,u,5>: Cost 2 vext2 <0,0,0,0>, RHS
+ 2752813210U, // <0,0,u,6>: Cost 3 vuzpl <0,2,0,2>, RHS
+ 2592011255U, // <0,0,u,7>: Cost 3 vext1 <7,0,0,u>, <7,0,0,u>
+ 135053414U, // <0,0,u,u>: Cost 1 vdup0 LHS
+ 2618581002U, // <0,1,0,0>: Cost 3 vext2 <0,2,0,1>, <0,0,1,1>
+ 1557446758U, // <0,1,0,1>: Cost 2 vext2 <2,3,0,1>, LHS
+ 2618581155U, // <0,1,0,2>: Cost 3 vext2 <0,2,0,1>, <0,2,0,1>
+ 2690548468U, // <0,1,0,3>: Cost 3 vext3 <1,0,3,0>, <1,0,3,0>
+ 2626543954U, // <0,1,0,4>: Cost 3 vext2 <1,5,0,1>, <0,4,1,5>
+ 4094985216U, // <0,1,0,5>: Cost 4 vtrnl <0,2,0,2>, <1,3,5,7>
+ 2592019278U, // <0,1,0,6>: Cost 3 vext1 <7,0,1,0>, <6,7,0,1>
+ 2592019448U, // <0,1,0,7>: Cost 3 vext1 <7,0,1,0>, <7,0,1,0>
+ 1557447325U, // <0,1,0,u>: Cost 2 vext2 <2,3,0,1>, LHS
+ 1476476938U, // <0,1,1,0>: Cost 2 vext1 <0,0,1,1>, <0,0,1,1>
+ 2886517556U, // <0,1,1,1>: Cost 3 vzipl LHS, <1,1,1,1>
+ 2886517654U, // <0,1,1,2>: Cost 3 vzipl LHS, <1,2,3,0>
+ 2886517720U, // <0,1,1,3>: Cost 3 vzipl LHS, <1,3,1,3>
+ 1476480310U, // <0,1,1,4>: Cost 2 vext1 <0,0,1,1>, RHS
+ 2886558864U, // <0,1,1,5>: Cost 3 vzipl LHS, <1,5,3,7>
+ 2550223354U, // <0,1,1,6>: Cost 3 vext1 <0,0,1,1>, <6,2,7,3>
+ 2550223856U, // <0,1,1,7>: Cost 3 vext1 <0,0,1,1>, <7,0,0,1>
+ 1476482862U, // <0,1,1,u>: Cost 2 vext1 <0,0,1,1>, LHS
+ 1494401126U, // <0,1,2,0>: Cost 2 vext1 <3,0,1,2>, LHS
+ 3020735284U, // <0,1,2,1>: Cost 3 vtrnl LHS, <1,1,1,1>
+ 2562172349U, // <0,1,2,2>: Cost 3 vext1 <2,0,1,2>, <2,0,1,2>
+ 835584U, // <0,1,2,3>: Cost 0 copy LHS
+ 1494404406U, // <0,1,2,4>: Cost 2 vext1 <3,0,1,2>, RHS
+ 3020735488U, // <0,1,2,5>: Cost 3 vtrnl LHS, <1,3,5,7>
+ 2631190458U, // <0,1,2,6>: Cost 3 vext2 <2,3,0,1>, <2,6,3,7>
+ 1518294010U, // <0,1,2,7>: Cost 2 vext1 <7,0,1,2>, <7,0,1,2>
+ 835584U, // <0,1,2,u>: Cost 0 copy LHS
+ 2692318156U, // <0,1,3,0>: Cost 3 vext3 <1,3,0,0>, <1,3,0,0>
+ 2691875800U, // <0,1,3,1>: Cost 3 vext3 <1,2,3,0>, <1,3,1,3>
+ 2691875806U, // <0,1,3,2>: Cost 3 vext3 <1,2,3,0>, <1,3,2,0>
+ 2692539367U, // <0,1,3,3>: Cost 3 vext3 <1,3,3,0>, <1,3,3,0>
+ 2562182454U, // <0,1,3,4>: Cost 3 vext1 <2,0,1,3>, RHS
+ 2691875840U, // <0,1,3,5>: Cost 3 vext3 <1,2,3,0>, <1,3,5,7>
+ 2692760578U, // <0,1,3,6>: Cost 3 vext3 <1,3,6,0>, <1,3,6,0>
+ 2639817411U, // <0,1,3,7>: Cost 3 vext2 <3,7,0,1>, <3,7,0,1>
+ 2691875863U, // <0,1,3,u>: Cost 3 vext3 <1,2,3,0>, <1,3,u,3>
+ 2568159334U, // <0,1,4,0>: Cost 3 vext1 <3,0,1,4>, LHS
+ 4095312692U, // <0,1,4,1>: Cost 4 vtrnl <0,2,4,6>, <1,1,1,1>
+ 2568160934U, // <0,1,4,2>: Cost 3 vext1 <3,0,1,4>, <2,3,0,1>
+ 2568161432U, // <0,1,4,3>: Cost 3 vext1 <3,0,1,4>, <3,0,1,4>
+ 2568162614U, // <0,1,4,4>: Cost 3 vext1 <3,0,1,4>, RHS
+ 1557450038U, // <0,1,4,5>: Cost 2 vext2 <2,3,0,1>, RHS
+ 2754235702U, // <0,1,4,6>: Cost 3 vuzpl <0,4,1,5>, RHS
+ 2592052220U, // <0,1,4,7>: Cost 3 vext1 <7,0,1,4>, <7,0,1,4>
+ 1557450281U, // <0,1,4,u>: Cost 2 vext2 <2,3,0,1>, RHS
+ 3765617775U, // <0,1,5,0>: Cost 4 vext3 <1,2,3,0>, <1,5,0,1>
+ 2647781007U, // <0,1,5,1>: Cost 3 vext2 <5,1,0,1>, <5,1,0,1>
+ 3704934138U, // <0,1,5,2>: Cost 4 vext2 <2,3,0,1>, <5,2,3,0>
+ 2691875984U, // <0,1,5,3>: Cost 3 vext3 <1,2,3,0>, <1,5,3,7>
+ 2657734598U, // <0,1,5,4>: Cost 3 vext2 <6,7,0,1>, <5,4,7,6>
+ 2650435539U, // <0,1,5,5>: Cost 3 vext2 <5,5,0,1>, <5,5,0,1>
+ 2651099172U, // <0,1,5,6>: Cost 3 vext2 <5,6,0,1>, <5,6,0,1>
+ 2651762805U, // <0,1,5,7>: Cost 3 vext2 <5,7,0,1>, <5,7,0,1>
+ 2691876029U, // <0,1,5,u>: Cost 3 vext3 <1,2,3,0>, <1,5,u,7>
+ 2592063590U, // <0,1,6,0>: Cost 3 vext1 <7,0,1,6>, LHS
+ 3765617871U, // <0,1,6,1>: Cost 4 vext3 <1,2,3,0>, <1,6,1,7>
+ 2654417337U, // <0,1,6,2>: Cost 3 vext2 <6,2,0,1>, <6,2,0,1>
+ 3765617889U, // <0,1,6,3>: Cost 4 vext3 <1,2,3,0>, <1,6,3,7>
+ 2592066870U, // <0,1,6,4>: Cost 3 vext1 <7,0,1,6>, RHS
+ 3765617907U, // <0,1,6,5>: Cost 4 vext3 <1,2,3,0>, <1,6,5,7>
+ 2657071869U, // <0,1,6,6>: Cost 3 vext2 <6,6,0,1>, <6,6,0,1>
+ 1583993678U, // <0,1,6,7>: Cost 2 vext2 <6,7,0,1>, <6,7,0,1>
+ 1584657311U, // <0,1,6,u>: Cost 2 vext2 <6,u,0,1>, <6,u,0,1>
+ 2657735672U, // <0,1,7,0>: Cost 3 vext2 <6,7,0,1>, <7,0,1,0>
+ 2657735808U, // <0,1,7,1>: Cost 3 vext2 <6,7,0,1>, <7,1,7,1>
+ 2631193772U, // <0,1,7,2>: Cost 3 vext2 <2,3,0,1>, <7,2,3,0>
+ 2661053667U, // <0,1,7,3>: Cost 3 vext2 <7,3,0,1>, <7,3,0,1>
+ 2657736038U, // <0,1,7,4>: Cost 3 vext2 <6,7,0,1>, <7,4,5,6>
+ 3721524621U, // <0,1,7,5>: Cost 4 vext2 <5,1,0,1>, <7,5,1,0>
+ 2657736158U, // <0,1,7,6>: Cost 3 vext2 <6,7,0,1>, <7,6,1,0>
+ 2657736300U, // <0,1,7,7>: Cost 3 vext2 <6,7,0,1>, <7,7,7,7>
+ 2657736322U, // <0,1,7,u>: Cost 3 vext2 <6,7,0,1>, <7,u,1,2>
+ 1494450278U, // <0,1,u,0>: Cost 2 vext1 <3,0,1,u>, LHS
+ 1557452590U, // <0,1,u,1>: Cost 2 vext2 <2,3,0,1>, LHS
+ 2754238254U, // <0,1,u,2>: Cost 3 vuzpl <0,4,1,5>, LHS
+ 835584U, // <0,1,u,3>: Cost 0 copy LHS
+ 1494453558U, // <0,1,u,4>: Cost 2 vext1 <3,0,1,u>, RHS
+ 1557452954U, // <0,1,u,5>: Cost 2 vext2 <2,3,0,1>, RHS
+ 2754238618U, // <0,1,u,6>: Cost 3 vuzpl <0,4,1,5>, RHS
+ 1518343168U, // <0,1,u,7>: Cost 2 vext1 <7,0,1,u>, <7,0,1,u>
+ 835584U, // <0,1,u,u>: Cost 0 copy LHS
+ 2752299008U, // <0,2,0,0>: Cost 3 vuzpl LHS, <0,0,0,0>
+ 1544847462U, // <0,2,0,1>: Cost 2 vext2 <0,2,0,2>, LHS
+ 1678557286U, // <0,2,0,2>: Cost 2 vuzpl LHS, LHS
+ 2696521165U, // <0,2,0,3>: Cost 3 vext3 <2,0,3,0>, <2,0,3,0>
+ 2752340172U, // <0,2,0,4>: Cost 3 vuzpl LHS, <0,2,4,6>
+ 2691876326U, // <0,2,0,5>: Cost 3 vext3 <1,2,3,0>, <2,0,5,7>
+ 2618589695U, // <0,2,0,6>: Cost 3 vext2 <0,2,0,2>, <0,6,2,7>
+ 2592093185U, // <0,2,0,7>: Cost 3 vext1 <7,0,2,0>, <7,0,2,0>
+ 1678557340U, // <0,2,0,u>: Cost 2 vuzpl LHS, LHS
+ 2618589942U, // <0,2,1,0>: Cost 3 vext2 <0,2,0,2>, <1,0,3,2>
+ 2752299828U, // <0,2,1,1>: Cost 3 vuzpl LHS, <1,1,1,1>
+ 2886518376U, // <0,2,1,2>: Cost 3 vzipl LHS, <2,2,2,2>
+ 2752299766U, // <0,2,1,3>: Cost 3 vuzpl LHS, <1,0,3,2>
+ 2550295862U, // <0,2,1,4>: Cost 3 vext1 <0,0,2,1>, RHS
+ 2752340992U, // <0,2,1,5>: Cost 3 vuzpl LHS, <1,3,5,7>
+ 2886559674U, // <0,2,1,6>: Cost 3 vzipl LHS, <2,6,3,7>
+ 3934208106U, // <0,2,1,7>: Cost 4 vuzpr <7,0,1,2>, <0,1,2,7>
+ 2752340771U, // <0,2,1,u>: Cost 3 vuzpl LHS, <1,0,u,2>
+ 1476558868U, // <0,2,2,0>: Cost 2 vext1 <0,0,2,2>, <0,0,2,2>
+ 2226628029U, // <0,2,2,1>: Cost 3 vrev <2,0,1,2>
+ 2752300648U, // <0,2,2,2>: Cost 3 vuzpl LHS, <2,2,2,2>
+ 3020736114U, // <0,2,2,3>: Cost 3 vtrnl LHS, <2,2,3,3>
+ 1476562230U, // <0,2,2,4>: Cost 2 vext1 <0,0,2,2>, RHS
+ 2550304464U, // <0,2,2,5>: Cost 3 vext1 <0,0,2,2>, <5,1,7,3>
+ 2618591162U, // <0,2,2,6>: Cost 3 vext2 <0,2,0,2>, <2,6,3,7>
+ 2550305777U, // <0,2,2,7>: Cost 3 vext1 <0,0,2,2>, <7,0,0,2>
+ 1476564782U, // <0,2,2,u>: Cost 2 vext1 <0,0,2,2>, LHS
+ 2618591382U, // <0,2,3,0>: Cost 3 vext2 <0,2,0,2>, <3,0,1,2>
+ 2752301206U, // <0,2,3,1>: Cost 3 vuzpl LHS, <3,0,1,2>
+ 3826043121U, // <0,2,3,2>: Cost 4 vuzpl LHS, <3,1,2,3>
+ 2752301468U, // <0,2,3,3>: Cost 3 vuzpl LHS, <3,3,3,3>
+ 2618591746U, // <0,2,3,4>: Cost 3 vext2 <0,2,0,2>, <3,4,5,6>
+ 2752301570U, // <0,2,3,5>: Cost 3 vuzpl LHS, <3,4,5,6>
+ 3830688102U, // <0,2,3,6>: Cost 4 vuzpl LHS, <3,2,6,3>
+ 2698807012U, // <0,2,3,7>: Cost 3 vext3 <2,3,7,0>, <2,3,7,0>
+ 2752301269U, // <0,2,3,u>: Cost 3 vuzpl LHS, <3,0,u,2>
+ 2562261094U, // <0,2,4,0>: Cost 3 vext1 <2,0,2,4>, LHS
+ 4095313828U, // <0,2,4,1>: Cost 4 vtrnl <0,2,4,6>, <2,6,1,3>
+ 2226718152U, // <0,2,4,2>: Cost 3 vrev <2,0,2,4>
+ 2568235169U, // <0,2,4,3>: Cost 3 vext1 <3,0,2,4>, <3,0,2,4>
+ 2562264374U, // <0,2,4,4>: Cost 3 vext1 <2,0,2,4>, RHS
+ 1544850742U, // <0,2,4,5>: Cost 2 vext2 <0,2,0,2>, RHS
+ 1678560566U, // <0,2,4,6>: Cost 2 vuzpl LHS, RHS
+ 2592125957U, // <0,2,4,7>: Cost 3 vext1 <7,0,2,4>, <7,0,2,4>
+ 1678560584U, // <0,2,4,u>: Cost 2 vuzpl LHS, RHS
+ 2691876686U, // <0,2,5,0>: Cost 3 vext3 <1,2,3,0>, <2,5,0,7>
+ 2618592976U, // <0,2,5,1>: Cost 3 vext2 <0,2,0,2>, <5,1,7,3>
+ 3765618528U, // <0,2,5,2>: Cost 4 vext3 <1,2,3,0>, <2,5,2,7>
+ 3765618536U, // <0,2,5,3>: Cost 4 vext3 <1,2,3,0>, <2,5,3,6>
+ 2618593222U, // <0,2,5,4>: Cost 3 vext2 <0,2,0,2>, <5,4,7,6>
+ 2752303108U, // <0,2,5,5>: Cost 3 vuzpl LHS, <5,5,5,5>
+ 2618593378U, // <0,2,5,6>: Cost 3 vext2 <0,2,0,2>, <5,6,7,0>
+ 2824785206U, // <0,2,5,7>: Cost 3 vuzpr <1,0,3,2>, RHS
+ 2824785207U, // <0,2,5,u>: Cost 3 vuzpr <1,0,3,2>, RHS
+ 2752303950U, // <0,2,6,0>: Cost 3 vuzpl LHS, <6,7,0,1>
+ 3830690081U, // <0,2,6,1>: Cost 4 vuzpl LHS, <6,0,1,2>
+ 2618593786U, // <0,2,6,2>: Cost 3 vext2 <0,2,0,2>, <6,2,7,3>
+ 2691876794U, // <0,2,6,3>: Cost 3 vext3 <1,2,3,0>, <2,6,3,7>
+ 2752303990U, // <0,2,6,4>: Cost 3 vuzpl LHS, <6,7,4,5>
+ 3830690445U, // <0,2,6,5>: Cost 4 vuzpl LHS, <6,4,5,6>
+ 2752303928U, // <0,2,6,6>: Cost 3 vuzpl LHS, <6,6,6,6>
+ 2657743695U, // <0,2,6,7>: Cost 3 vext2 <6,7,0,2>, <6,7,0,2>
+ 2691876839U, // <0,2,6,u>: Cost 3 vext3 <1,2,3,0>, <2,6,u,7>
+ 2659070961U, // <0,2,7,0>: Cost 3 vext2 <7,0,0,2>, <7,0,0,2>
+ 2659734594U, // <0,2,7,1>: Cost 3 vext2 <7,1,0,2>, <7,1,0,2>
+ 3734140051U, // <0,2,7,2>: Cost 4 vext2 <7,2,0,2>, <7,2,0,2>
+ 2701166596U, // <0,2,7,3>: Cost 3 vext3 <2,7,3,0>, <2,7,3,0>
+ 2662389094U, // <0,2,7,4>: Cost 3 vext2 <7,5,0,2>, <7,4,5,6>
+ 2662389126U, // <0,2,7,5>: Cost 3 vext2 <7,5,0,2>, <7,5,0,2>
+ 3736794583U, // <0,2,7,6>: Cost 4 vext2 <7,6,0,2>, <7,6,0,2>
+ 2752304748U, // <0,2,7,7>: Cost 3 vuzpl LHS, <7,7,7,7>
+ 2659070961U, // <0,2,7,u>: Cost 3 vext2 <7,0,0,2>, <7,0,0,2>
+ 1476608026U, // <0,2,u,0>: Cost 2 vext1 <0,0,2,u>, <0,0,2,u>
+ 1544853294U, // <0,2,u,1>: Cost 2 vext2 <0,2,0,2>, LHS
+ 1678563118U, // <0,2,u,2>: Cost 2 vuzpl LHS, LHS
+ 3021178482U, // <0,2,u,3>: Cost 3 vtrnl LHS, <2,2,3,3>
+ 1476611382U, // <0,2,u,4>: Cost 2 vext1 <0,0,2,u>, RHS
+ 1544853658U, // <0,2,u,5>: Cost 2 vext2 <0,2,0,2>, RHS
+ 1678563482U, // <0,2,u,6>: Cost 2 vuzpl LHS, RHS
+ 2824785449U, // <0,2,u,7>: Cost 3 vuzpr <1,0,3,2>, RHS
+ 1678563172U, // <0,2,u,u>: Cost 2 vuzpl LHS, LHS
+ 2556329984U, // <0,3,0,0>: Cost 3 vext1 <1,0,3,0>, <0,0,0,0>
+ 2686421142U, // <0,3,0,1>: Cost 3 vext3 <0,3,1,0>, <3,0,1,2>
+ 2562303437U, // <0,3,0,2>: Cost 3 vext1 <2,0,3,0>, <2,0,3,0>
+ 4094986652U, // <0,3,0,3>: Cost 4 vtrnl <0,2,0,2>, <3,3,3,3>
+ 2556333366U, // <0,3,0,4>: Cost 3 vext1 <1,0,3,0>, RHS
+ 4094986754U, // <0,3,0,5>: Cost 4 vtrnl <0,2,0,2>, <3,4,5,6>
+ 3798796488U, // <0,3,0,6>: Cost 4 vext3 <6,7,3,0>, <3,0,6,7>
+ 3776530634U, // <0,3,0,7>: Cost 4 vext3 <3,0,7,0>, <3,0,7,0>
+ 2556335918U, // <0,3,0,u>: Cost 3 vext1 <1,0,3,0>, LHS
+ 2886518934U, // <0,3,1,0>: Cost 3 vzipl LHS, <3,0,1,2>
+ 2556338933U, // <0,3,1,1>: Cost 3 vext1 <1,0,3,1>, <1,0,3,1>
+ 2691877105U, // <0,3,1,2>: Cost 3 vext3 <1,2,3,0>, <3,1,2,3>
+ 2886519196U, // <0,3,1,3>: Cost 3 vzipl LHS, <3,3,3,3>
+ 2886519298U, // <0,3,1,4>: Cost 3 vzipl LHS, <3,4,5,6>
+ 4095740418U, // <0,3,1,5>: Cost 4 vtrnl <0,3,1,4>, <3,4,5,6>
+ 3659944242U, // <0,3,1,6>: Cost 4 vext1 <6,0,3,1>, <6,0,3,1>
+ 3769600286U, // <0,3,1,7>: Cost 4 vext3 <1,u,3,0>, <3,1,7,3>
+ 2886519582U, // <0,3,1,u>: Cost 3 vzipl LHS, <3,u,1,2>
+ 1482604646U, // <0,3,2,0>: Cost 2 vext1 <1,0,3,2>, LHS
+ 1482605302U, // <0,3,2,1>: Cost 2 vext1 <1,0,3,2>, <1,0,3,2>
+ 2556348008U, // <0,3,2,2>: Cost 3 vext1 <1,0,3,2>, <2,2,2,2>
+ 3020736924U, // <0,3,2,3>: Cost 3 vtrnl LHS, <3,3,3,3>
+ 1482607926U, // <0,3,2,4>: Cost 2 vext1 <1,0,3,2>, RHS
+ 3020737026U, // <0,3,2,5>: Cost 3 vtrnl LHS, <3,4,5,6>
+ 2598154746U, // <0,3,2,6>: Cost 3 vext1 <u,0,3,2>, <6,2,7,3>
+ 2598155258U, // <0,3,2,7>: Cost 3 vext1 <u,0,3,2>, <7,0,1,2>
+ 1482610478U, // <0,3,2,u>: Cost 2 vext1 <1,0,3,2>, LHS
+ 3692341398U, // <0,3,3,0>: Cost 4 vext2 <0,2,0,3>, <3,0,1,2>
+ 2635851999U, // <0,3,3,1>: Cost 3 vext2 <3,1,0,3>, <3,1,0,3>
+ 3636069840U, // <0,3,3,2>: Cost 4 vext1 <2,0,3,3>, <2,0,3,3>
+ 2691877276U, // <0,3,3,3>: Cost 3 vext3 <1,2,3,0>, <3,3,3,3>
+ 3961522690U, // <0,3,3,4>: Cost 4 vzipl <0,3,1,4>, <3,4,5,6>
+ 3826797058U, // <0,3,3,5>: Cost 4 vuzpl <0,2,3,5>, <3,4,5,6>
+ 3703622282U, // <0,3,3,6>: Cost 4 vext2 <2,1,0,3>, <3,6,2,7>
+ 3769600452U, // <0,3,3,7>: Cost 4 vext3 <1,u,3,0>, <3,3,7,7>
+ 2640497430U, // <0,3,3,u>: Cost 3 vext2 <3,u,0,3>, <3,u,0,3>
+ 3962194070U, // <0,3,4,0>: Cost 4 vzipl <0,4,1,5>, <3,0,1,2>
+ 2232617112U, // <0,3,4,1>: Cost 3 vrev <3,0,1,4>
+ 2232690849U, // <0,3,4,2>: Cost 3 vrev <3,0,2,4>
+ 4095314332U, // <0,3,4,3>: Cost 4 vtrnl <0,2,4,6>, <3,3,3,3>
+ 3962194434U, // <0,3,4,4>: Cost 4 vzipl <0,4,1,5>, <3,4,5,6>
+ 2691877378U, // <0,3,4,5>: Cost 3 vext3 <1,2,3,0>, <3,4,5,6>
+ 3826765110U, // <0,3,4,6>: Cost 4 vuzpl <0,2,3,1>, RHS
+ 3665941518U, // <0,3,4,7>: Cost 4 vext1 <7,0,3,4>, <7,0,3,4>
+ 2691877405U, // <0,3,4,u>: Cost 3 vext3 <1,2,3,0>, <3,4,u,6>
+ 3630112870U, // <0,3,5,0>: Cost 4 vext1 <1,0,3,5>, LHS
+ 3630113526U, // <0,3,5,1>: Cost 4 vext1 <1,0,3,5>, <1,0,3,2>
+ 4035199734U, // <0,3,5,2>: Cost 4 vzipr <1,4,0,5>, <1,0,3,2>
+ 3769600578U, // <0,3,5,3>: Cost 4 vext3 <1,u,3,0>, <3,5,3,7>
+ 2232846516U, // <0,3,5,4>: Cost 3 vrev <3,0,4,5>
+ 3779037780U, // <0,3,5,5>: Cost 4 vext3 <3,4,5,0>, <3,5,5,7>
+ 2718714461U, // <0,3,5,6>: Cost 3 vext3 <5,6,7,0>, <3,5,6,7>
+ 2706106975U, // <0,3,5,7>: Cost 3 vext3 <3,5,7,0>, <3,5,7,0>
+ 2233141464U, // <0,3,5,u>: Cost 3 vrev <3,0,u,5>
+ 2691877496U, // <0,3,6,0>: Cost 3 vext3 <1,2,3,0>, <3,6,0,7>
+ 3727511914U, // <0,3,6,1>: Cost 4 vext2 <6,1,0,3>, <6,1,0,3>
+ 3765619338U, // <0,3,6,2>: Cost 4 vext3 <1,2,3,0>, <3,6,2,7>
+ 3765619347U, // <0,3,6,3>: Cost 4 vext3 <1,2,3,0>, <3,6,3,7>
+ 3765987996U, // <0,3,6,4>: Cost 4 vext3 <1,2,u,0>, <3,6,4,7>
+ 3306670270U, // <0,3,6,5>: Cost 4 vrev <3,0,5,6>
+ 3792456365U, // <0,3,6,6>: Cost 4 vext3 <5,6,7,0>, <3,6,6,6>
+ 2706770608U, // <0,3,6,7>: Cost 3 vext3 <3,6,7,0>, <3,6,7,0>
+ 2706844345U, // <0,3,6,u>: Cost 3 vext3 <3,6,u,0>, <3,6,u,0>
+ 3769600707U, // <0,3,7,0>: Cost 4 vext3 <1,u,3,0>, <3,7,0,1>
+ 2659742787U, // <0,3,7,1>: Cost 3 vext2 <7,1,0,3>, <7,1,0,3>
+ 3636102612U, // <0,3,7,2>: Cost 4 vext1 <2,0,3,7>, <2,0,3,7>
+ 3769600740U, // <0,3,7,3>: Cost 4 vext3 <1,u,3,0>, <3,7,3,7>
+ 3769600747U, // <0,3,7,4>: Cost 4 vext3 <1,u,3,0>, <3,7,4,5>
+ 3769600758U, // <0,3,7,5>: Cost 4 vext3 <1,u,3,0>, <3,7,5,7>
+ 3659993400U, // <0,3,7,6>: Cost 4 vext1 <6,0,3,7>, <6,0,3,7>
+ 3781176065U, // <0,3,7,7>: Cost 4 vext3 <3,7,7,0>, <3,7,7,0>
+ 2664388218U, // <0,3,7,u>: Cost 3 vext2 <7,u,0,3>, <7,u,0,3>
+ 1482653798U, // <0,3,u,0>: Cost 2 vext1 <1,0,3,u>, LHS
+ 1482654460U, // <0,3,u,1>: Cost 2 vext1 <1,0,3,u>, <1,0,3,u>
+ 2556397160U, // <0,3,u,2>: Cost 3 vext1 <1,0,3,u>, <2,2,2,2>
+ 3021179292U, // <0,3,u,3>: Cost 3 vtrnl LHS, <3,3,3,3>
+ 1482657078U, // <0,3,u,4>: Cost 2 vext1 <1,0,3,u>, RHS
+ 3021179394U, // <0,3,u,5>: Cost 3 vtrnl LHS, <3,4,5,6>
+ 2598203898U, // <0,3,u,6>: Cost 3 vext1 <u,0,3,u>, <6,2,7,3>
+ 2708097874U, // <0,3,u,7>: Cost 3 vext3 <3,u,7,0>, <3,u,7,0>
+ 1482659630U, // <0,3,u,u>: Cost 2 vext1 <1,0,3,u>, LHS
+ 2617278468U, // <0,4,0,0>: Cost 3 vext2 <0,0,0,4>, <0,0,0,4>
+ 2618605670U, // <0,4,0,1>: Cost 3 vext2 <0,2,0,4>, LHS
+ 2618605734U, // <0,4,0,2>: Cost 3 vext2 <0,2,0,4>, <0,2,0,4>
+ 3642091695U, // <0,4,0,3>: Cost 4 vext1 <3,0,4,0>, <3,0,4,0>
+ 2753134796U, // <0,4,0,4>: Cost 3 vuzpl <0,2,4,6>, <0,2,4,6>
+ 2718714770U, // <0,4,0,5>: Cost 3 vext3 <5,6,7,0>, <4,0,5,1>
+ 3021245750U, // <0,4,0,6>: Cost 3 vtrnl <0,2,0,2>, RHS
+ 3665982483U, // <0,4,0,7>: Cost 4 vext1 <7,0,4,0>, <7,0,4,0>
+ 3021245768U, // <0,4,0,u>: Cost 3 vtrnl <0,2,0,2>, RHS
+ 2568355942U, // <0,4,1,0>: Cost 3 vext1 <3,0,4,1>, LHS
+ 3692348212U, // <0,4,1,1>: Cost 4 vext2 <0,2,0,4>, <1,1,1,1>
+ 3692348310U, // <0,4,1,2>: Cost 4 vext2 <0,2,0,4>, <1,2,3,0>
+ 2568358064U, // <0,4,1,3>: Cost 3 vext1 <3,0,4,1>, <3,0,4,1>
+ 2568359222U, // <0,4,1,4>: Cost 3 vext1 <3,0,4,1>, RHS
+ 1812778294U, // <0,4,1,5>: Cost 2 vzipl LHS, RHS
+ 3022671158U, // <0,4,1,6>: Cost 3 vtrnl <0,4,1,5>, RHS
+ 2592248852U, // <0,4,1,7>: Cost 3 vext1 <7,0,4,1>, <7,0,4,1>
+ 1812778537U, // <0,4,1,u>: Cost 2 vzipl LHS, RHS
+ 2568364134U, // <0,4,2,0>: Cost 3 vext1 <3,0,4,2>, LHS
+ 2238573423U, // <0,4,2,1>: Cost 3 vrev <4,0,1,2>
+ 3692349032U, // <0,4,2,2>: Cost 4 vext2 <0,2,0,4>, <2,2,2,2>
+ 2631214761U, // <0,4,2,3>: Cost 3 vext2 <2,3,0,4>, <2,3,0,4>
+ 2568367414U, // <0,4,2,4>: Cost 3 vext1 <3,0,4,2>, RHS
+ 2887028022U, // <0,4,2,5>: Cost 3 vzipl <0,2,0,2>, RHS
+ 1946996022U, // <0,4,2,6>: Cost 2 vtrnl LHS, RHS
+ 2592257045U, // <0,4,2,7>: Cost 3 vext1 <7,0,4,2>, <7,0,4,2>
+ 1946996040U, // <0,4,2,u>: Cost 2 vtrnl LHS, RHS
+ 3692349590U, // <0,4,3,0>: Cost 4 vext2 <0,2,0,4>, <3,0,1,2>
+ 3826878614U, // <0,4,3,1>: Cost 4 vuzpl <0,2,4,6>, <3,0,1,2>
+ 3826878625U, // <0,4,3,2>: Cost 4 vuzpl <0,2,4,6>, <3,0,2,4>
+ 3692349852U, // <0,4,3,3>: Cost 4 vext2 <0,2,0,4>, <3,3,3,3>
+ 3692349954U, // <0,4,3,4>: Cost 4 vext2 <0,2,0,4>, <3,4,5,6>
+ 3826878978U, // <0,4,3,5>: Cost 4 vuzpl <0,2,4,6>, <3,4,5,6>
+ 4095200566U, // <0,4,3,6>: Cost 4 vtrnl <0,2,3,1>, RHS
+ 3713583814U, // <0,4,3,7>: Cost 4 vext2 <3,7,0,4>, <3,7,0,4>
+ 3692350238U, // <0,4,3,u>: Cost 4 vext2 <0,2,0,4>, <3,u,1,2>
+ 2550464552U, // <0,4,4,0>: Cost 3 vext1 <0,0,4,4>, <0,0,4,4>
+ 3962194914U, // <0,4,4,1>: Cost 4 vzipl <0,4,1,5>, <4,1,5,0>
+ 3693677631U, // <0,4,4,2>: Cost 4 vext2 <0,4,0,4>, <4,2,6,3>
+ 3642124467U, // <0,4,4,3>: Cost 4 vext1 <3,0,4,4>, <3,0,4,4>
+ 2718715088U, // <0,4,4,4>: Cost 3 vext3 <5,6,7,0>, <4,4,4,4>
+ 2618608950U, // <0,4,4,5>: Cost 3 vext2 <0,2,0,4>, RHS
+ 2753137974U, // <0,4,4,6>: Cost 3 vuzpl <0,2,4,6>, RHS
+ 3666015255U, // <0,4,4,7>: Cost 4 vext1 <7,0,4,4>, <7,0,4,4>
+ 2618609193U, // <0,4,4,u>: Cost 3 vext2 <0,2,0,4>, RHS
+ 2568388710U, // <0,4,5,0>: Cost 3 vext1 <3,0,4,5>, LHS
+ 2568389526U, // <0,4,5,1>: Cost 3 vext1 <3,0,4,5>, <1,2,3,0>
+ 3636159963U, // <0,4,5,2>: Cost 4 vext1 <2,0,4,5>, <2,0,4,5>
+ 2568390836U, // <0,4,5,3>: Cost 3 vext1 <3,0,4,5>, <3,0,4,5>
+ 2568391990U, // <0,4,5,4>: Cost 3 vext1 <3,0,4,5>, RHS
+ 2718715180U, // <0,4,5,5>: Cost 3 vext3 <5,6,7,0>, <4,5,5,6>
+ 1618136374U, // <0,4,5,6>: Cost 2 vext3 <1,2,3,0>, RHS
+ 2592281624U, // <0,4,5,7>: Cost 3 vext1 <7,0,4,5>, <7,0,4,5>
+ 1618136392U, // <0,4,5,u>: Cost 2 vext3 <1,2,3,0>, RHS
+ 2550480938U, // <0,4,6,0>: Cost 3 vext1 <0,0,4,6>, <0,0,4,6>
+ 3826880801U, // <0,4,6,1>: Cost 4 vuzpl <0,2,4,6>, <6,0,1,2>
+ 2562426332U, // <0,4,6,2>: Cost 3 vext1 <2,0,4,6>, <2,0,4,6>
+ 3786190181U, // <0,4,6,3>: Cost 4 vext3 <4,6,3,0>, <4,6,3,0>
+ 2718715252U, // <0,4,6,4>: Cost 3 vext3 <5,6,7,0>, <4,6,4,6>
+ 3826881165U, // <0,4,6,5>: Cost 4 vuzpl <0,2,4,6>, <6,4,5,6>
+ 2712669568U, // <0,4,6,6>: Cost 3 vext3 <4,6,6,0>, <4,6,6,0>
+ 2657760081U, // <0,4,6,7>: Cost 3 vext2 <6,7,0,4>, <6,7,0,4>
+ 2718715284U, // <0,4,6,u>: Cost 3 vext3 <5,6,7,0>, <4,6,u,2>
+ 3654090854U, // <0,4,7,0>: Cost 4 vext1 <5,0,4,7>, LHS
+ 3934229326U, // <0,4,7,1>: Cost 4 vuzpr <7,0,1,4>, <6,7,0,1>
+ 3734156437U, // <0,4,7,2>: Cost 4 vext2 <7,2,0,4>, <7,2,0,4>
+ 3734820070U, // <0,4,7,3>: Cost 4 vext2 <7,3,0,4>, <7,3,0,4>
+ 3654094134U, // <0,4,7,4>: Cost 4 vext1 <5,0,4,7>, RHS
+ 2713259464U, // <0,4,7,5>: Cost 3 vext3 <4,7,5,0>, <4,7,5,0>
+ 2713333201U, // <0,4,7,6>: Cost 3 vext3 <4,7,6,0>, <4,7,6,0>
+ 3654095866U, // <0,4,7,7>: Cost 4 vext1 <5,0,4,7>, <7,0,1,2>
+ 2713259464U, // <0,4,7,u>: Cost 3 vext3 <4,7,5,0>, <4,7,5,0>
+ 2568413286U, // <0,4,u,0>: Cost 3 vext1 <3,0,4,u>, LHS
+ 2618611502U, // <0,4,u,1>: Cost 3 vext2 <0,2,0,4>, LHS
+ 2753140526U, // <0,4,u,2>: Cost 3 vuzpl <0,2,4,6>, LHS
+ 2568415415U, // <0,4,u,3>: Cost 3 vext1 <3,0,4,u>, <3,0,4,u>
+ 2568416566U, // <0,4,u,4>: Cost 3 vext1 <3,0,4,u>, RHS
+ 1817423158U, // <0,4,u,5>: Cost 2 vzipl LHS, RHS
+ 1947438390U, // <0,4,u,6>: Cost 2 vtrnl LHS, RHS
+ 2592306203U, // <0,4,u,7>: Cost 3 vext1 <7,0,4,u>, <7,0,4,u>
+ 1947438408U, // <0,4,u,u>: Cost 2 vtrnl LHS, RHS
+ 3630219264U, // <0,5,0,0>: Cost 4 vext1 <1,0,5,0>, <0,0,0,0>
+ 2625912934U, // <0,5,0,1>: Cost 3 vext2 <1,4,0,5>, LHS
+ 3692355748U, // <0,5,0,2>: Cost 4 vext2 <0,2,0,5>, <0,2,0,2>
+ 3693019384U, // <0,5,0,3>: Cost 4 vext2 <0,3,0,5>, <0,3,0,5>
+ 3630222646U, // <0,5,0,4>: Cost 4 vext1 <1,0,5,0>, RHS
+ 3699655062U, // <0,5,0,5>: Cost 4 vext2 <1,4,0,5>, <0,5,0,1>
+ 2718715508U, // <0,5,0,6>: Cost 3 vext3 <5,6,7,0>, <5,0,6,1>
+ 3087011126U, // <0,5,0,7>: Cost 3 vtrnr <0,0,0,0>, RHS
+ 2625913501U, // <0,5,0,u>: Cost 3 vext2 <1,4,0,5>, LHS
+ 1500659814U, // <0,5,1,0>: Cost 2 vext1 <4,0,5,1>, LHS
+ 2886520528U, // <0,5,1,1>: Cost 3 vzipl LHS, <5,1,7,3>
+ 2574403176U, // <0,5,1,2>: Cost 3 vext1 <4,0,5,1>, <2,2,2,2>
+ 2574403734U, // <0,5,1,3>: Cost 3 vext1 <4,0,5,1>, <3,0,1,2>
+ 1500662674U, // <0,5,1,4>: Cost 2 vext1 <4,0,5,1>, <4,0,5,1>
+ 2886520836U, // <0,5,1,5>: Cost 3 vzipl LHS, <5,5,5,5>
+ 2886520930U, // <0,5,1,6>: Cost 3 vzipl LHS, <5,6,7,0>
+ 2718715600U, // <0,5,1,7>: Cost 3 vext3 <5,6,7,0>, <5,1,7,3>
+ 1500665646U, // <0,5,1,u>: Cost 2 vext1 <4,0,5,1>, LHS
+ 2556493926U, // <0,5,2,0>: Cost 3 vext1 <1,0,5,2>, LHS
+ 2244546120U, // <0,5,2,1>: Cost 3 vrev <5,0,1,2>
+ 3692357256U, // <0,5,2,2>: Cost 4 vext2 <0,2,0,5>, <2,2,5,7>
+ 2568439994U, // <0,5,2,3>: Cost 3 vext1 <3,0,5,2>, <3,0,5,2>
+ 2556497206U, // <0,5,2,4>: Cost 3 vext1 <1,0,5,2>, RHS
+ 3020738564U, // <0,5,2,5>: Cost 3 vtrnl LHS, <5,5,5,5>
+ 4027877161U, // <0,5,2,6>: Cost 4 vzipr <0,2,0,2>, <2,4,5,6>
+ 3093220662U, // <0,5,2,7>: Cost 3 vtrnr <1,0,3,2>, RHS
+ 3093220663U, // <0,5,2,u>: Cost 3 vtrnr <1,0,3,2>, RHS
+ 3699656854U, // <0,5,3,0>: Cost 4 vext2 <1,4,0,5>, <3,0,1,2>
+ 3699656927U, // <0,5,3,1>: Cost 4 vext2 <1,4,0,5>, <3,1,0,3>
+ 3699657006U, // <0,5,3,2>: Cost 4 vext2 <1,4,0,5>, <3,2,0,1>
+ 3699657116U, // <0,5,3,3>: Cost 4 vext2 <1,4,0,5>, <3,3,3,3>
+ 2637859284U, // <0,5,3,4>: Cost 3 vext2 <3,4,0,5>, <3,4,0,5>
+ 3790319453U, // <0,5,3,5>: Cost 4 vext3 <5,3,5,0>, <5,3,5,0>
+ 3699657354U, // <0,5,3,6>: Cost 4 vext2 <1,4,0,5>, <3,6,2,7>
+ 2716725103U, // <0,5,3,7>: Cost 3 vext3 <5,3,7,0>, <5,3,7,0>
+ 2716798840U, // <0,5,3,u>: Cost 3 vext3 <5,3,u,0>, <5,3,u,0>
+ 2661747602U, // <0,5,4,0>: Cost 3 vext2 <7,4,0,5>, <4,0,5,1>
+ 3630252810U, // <0,5,4,1>: Cost 4 vext1 <1,0,5,4>, <1,0,5,4>
+ 3636225507U, // <0,5,4,2>: Cost 4 vext1 <2,0,5,4>, <2,0,5,4>
+ 3716910172U, // <0,5,4,3>: Cost 4 vext2 <4,3,0,5>, <4,3,0,5>
+ 3962195892U, // <0,5,4,4>: Cost 4 vzipl <0,4,1,5>, <5,4,5,6>
+ 2625916214U, // <0,5,4,5>: Cost 3 vext2 <1,4,0,5>, RHS
+ 3718901071U, // <0,5,4,6>: Cost 4 vext2 <4,6,0,5>, <4,6,0,5>
+ 2718715846U, // <0,5,4,7>: Cost 3 vext3 <5,6,7,0>, <5,4,7,6>
+ 2625916457U, // <0,5,4,u>: Cost 3 vext2 <1,4,0,5>, RHS
+ 3791278034U, // <0,5,5,0>: Cost 4 vext3 <5,5,0,0>, <5,5,0,0>
+ 3791351771U, // <0,5,5,1>: Cost 4 vext3 <5,5,1,0>, <5,5,1,0>
+ 3318386260U, // <0,5,5,2>: Cost 4 vrev <5,0,2,5>
+ 3791499245U, // <0,5,5,3>: Cost 4 vext3 <5,5,3,0>, <5,5,3,0>
+ 3318533734U, // <0,5,5,4>: Cost 4 vrev <5,0,4,5>
+ 2718715908U, // <0,5,5,5>: Cost 3 vext3 <5,6,7,0>, <5,5,5,5>
+ 2657767522U, // <0,5,5,6>: Cost 3 vext2 <6,7,0,5>, <5,6,7,0>
+ 2718715928U, // <0,5,5,7>: Cost 3 vext3 <5,6,7,0>, <5,5,7,7>
+ 2718715937U, // <0,5,5,u>: Cost 3 vext3 <5,6,7,0>, <5,5,u,7>
+ 2592358502U, // <0,5,6,0>: Cost 3 vext1 <7,0,5,6>, LHS
+ 3792015404U, // <0,5,6,1>: Cost 4 vext3 <5,6,1,0>, <5,6,1,0>
+ 3731509754U, // <0,5,6,2>: Cost 4 vext2 <6,7,0,5>, <6,2,7,3>
+ 3785748546U, // <0,5,6,3>: Cost 4 vext3 <4,5,6,0>, <5,6,3,4>
+ 2592361782U, // <0,5,6,4>: Cost 3 vext1 <7,0,5,6>, RHS
+ 2592362594U, // <0,5,6,5>: Cost 3 vext1 <7,0,5,6>, <5,6,7,0>
+ 3785748576U, // <0,5,6,6>: Cost 4 vext3 <4,5,6,0>, <5,6,6,7>
+ 1644974178U, // <0,5,6,7>: Cost 2 vext3 <5,6,7,0>, <5,6,7,0>
+ 1645047915U, // <0,5,6,u>: Cost 2 vext3 <5,6,u,0>, <5,6,u,0>
+ 2562506854U, // <0,5,7,0>: Cost 3 vext1 <2,0,5,7>, LHS
+ 2562507670U, // <0,5,7,1>: Cost 3 vext1 <2,0,5,7>, <1,2,3,0>
+ 2562508262U, // <0,5,7,2>: Cost 3 vext1 <2,0,5,7>, <2,0,5,7>
+ 3636250774U, // <0,5,7,3>: Cost 4 vext1 <2,0,5,7>, <3,0,1,2>
+ 2562510134U, // <0,5,7,4>: Cost 3 vext1 <2,0,5,7>, RHS
+ 2718716072U, // <0,5,7,5>: Cost 3 vext3 <5,6,7,0>, <5,7,5,7>
+ 2718716074U, // <0,5,7,6>: Cost 3 vext3 <5,6,7,0>, <5,7,6,0>
+ 2719379635U, // <0,5,7,7>: Cost 3 vext3 <5,7,7,0>, <5,7,7,0>
+ 2562512686U, // <0,5,7,u>: Cost 3 vext1 <2,0,5,7>, LHS
+ 1500717158U, // <0,5,u,0>: Cost 2 vext1 <4,0,5,u>, LHS
+ 2625918766U, // <0,5,u,1>: Cost 3 vext2 <1,4,0,5>, LHS
+ 2719674583U, // <0,5,u,2>: Cost 3 vext3 <5,u,2,0>, <5,u,2,0>
+ 2568489152U, // <0,5,u,3>: Cost 3 vext1 <3,0,5,u>, <3,0,5,u>
+ 1500720025U, // <0,5,u,4>: Cost 2 vext1 <4,0,5,u>, <4,0,5,u>
+ 2625919130U, // <0,5,u,5>: Cost 3 vext2 <1,4,0,5>, RHS
+ 2586407243U, // <0,5,u,6>: Cost 3 vext1 <6,0,5,u>, <6,0,5,u>
+ 1646301444U, // <0,5,u,7>: Cost 2 vext3 <5,u,7,0>, <5,u,7,0>
+ 1646375181U, // <0,5,u,u>: Cost 2 vext3 <5,u,u,0>, <5,u,u,0>
+ 2586411110U, // <0,6,0,0>: Cost 3 vext1 <6,0,6,0>, LHS
+ 2619949158U, // <0,6,0,1>: Cost 3 vext2 <0,4,0,6>, LHS
+ 2619949220U, // <0,6,0,2>: Cost 3 vext2 <0,4,0,6>, <0,2,0,2>
+ 3785748789U, // <0,6,0,3>: Cost 4 vext3 <4,5,6,0>, <6,0,3,4>
+ 2619949386U, // <0,6,0,4>: Cost 3 vext2 <0,4,0,6>, <0,4,0,6>
+ 2586415202U, // <0,6,0,5>: Cost 3 vext1 <6,0,6,0>, <5,6,7,0>
+ 2586415436U, // <0,6,0,6>: Cost 3 vext1 <6,0,6,0>, <6,0,6,0>
+ 2952793398U, // <0,6,0,7>: Cost 3 vzipr <0,0,0,0>, RHS
+ 2619949725U, // <0,6,0,u>: Cost 3 vext2 <0,4,0,6>, LHS
+ 2562531430U, // <0,6,1,0>: Cost 3 vext1 <2,0,6,1>, LHS
+ 3693691700U, // <0,6,1,1>: Cost 4 vext2 <0,4,0,6>, <1,1,1,1>
+ 2886521338U, // <0,6,1,2>: Cost 3 vzipl LHS, <6,2,7,3>
+ 3693691864U, // <0,6,1,3>: Cost 4 vext2 <0,4,0,6>, <1,3,1,3>
+ 2562534710U, // <0,6,1,4>: Cost 3 vext1 <2,0,6,1>, RHS
+ 2580450932U, // <0,6,1,5>: Cost 3 vext1 <5,0,6,1>, <5,0,6,1>
+ 2886521656U, // <0,6,1,6>: Cost 3 vzipl LHS, <6,6,6,6>
+ 2966736182U, // <0,6,1,7>: Cost 3 vzipr <2,3,0,1>, RHS
+ 2966736183U, // <0,6,1,u>: Cost 3 vzipr <2,3,0,1>, RHS
+ 1500741734U, // <0,6,2,0>: Cost 2 vext1 <4,0,6,2>, LHS
+ 2250518817U, // <0,6,2,1>: Cost 3 vrev <6,0,1,2>
+ 2574485096U, // <0,6,2,2>: Cost 3 vext1 <4,0,6,2>, <2,2,2,2>
+ 2631894694U, // <0,6,2,3>: Cost 3 vext2 <2,4,0,6>, <2,3,0,1>
+ 1500744604U, // <0,6,2,4>: Cost 2 vext1 <4,0,6,2>, <4,0,6,2>
+ 2574487248U, // <0,6,2,5>: Cost 3 vext1 <4,0,6,2>, <5,1,7,3>
+ 3020739384U, // <0,6,2,6>: Cost 3 vtrnl LHS, <6,6,6,6>
+ 2954136886U, // <0,6,2,7>: Cost 3 vzipr <0,2,0,2>, RHS
+ 1500747566U, // <0,6,2,u>: Cost 2 vext1 <4,0,6,2>, LHS
+ 3693693078U, // <0,6,3,0>: Cost 4 vext2 <0,4,0,6>, <3,0,1,2>
+ 3705637136U, // <0,6,3,1>: Cost 4 vext2 <2,4,0,6>, <3,1,5,7>
+ 3705637192U, // <0,6,3,2>: Cost 4 vext2 <2,4,0,6>, <3,2,3,0>
+ 3693693340U, // <0,6,3,3>: Cost 4 vext2 <0,4,0,6>, <3,3,3,3>
+ 2637867477U, // <0,6,3,4>: Cost 3 vext2 <3,4,0,6>, <3,4,0,6>
+ 3705637424U, // <0,6,3,5>: Cost 4 vext2 <2,4,0,6>, <3,5,1,7>
+ 3666154056U, // <0,6,3,6>: Cost 4 vext1 <7,0,6,3>, <6,3,7,0>
+ 2722697800U, // <0,6,3,7>: Cost 3 vext3 <6,3,7,0>, <6,3,7,0>
+ 2722771537U, // <0,6,3,u>: Cost 3 vext3 <6,3,u,0>, <6,3,u,0>
+ 2562556006U, // <0,6,4,0>: Cost 3 vext1 <2,0,6,4>, LHS
+ 4095316257U, // <0,6,4,1>: Cost 4 vtrnl <0,2,4,6>, <6,0,1,2>
+ 2562557420U, // <0,6,4,2>: Cost 3 vext1 <2,0,6,4>, <2,0,6,4>
+ 3636299926U, // <0,6,4,3>: Cost 4 vext1 <2,0,6,4>, <3,0,1,2>
+ 2562559286U, // <0,6,4,4>: Cost 3 vext1 <2,0,6,4>, RHS
+ 2619952438U, // <0,6,4,5>: Cost 3 vext2 <0,4,0,6>, RHS
+ 2723287696U, // <0,6,4,6>: Cost 3 vext3 <6,4,6,0>, <6,4,6,0>
+ 4027895094U, // <0,6,4,7>: Cost 4 vzipr <0,2,0,4>, RHS
+ 2619952681U, // <0,6,4,u>: Cost 3 vext2 <0,4,0,6>, RHS
+ 2718716594U, // <0,6,5,0>: Cost 3 vext3 <5,6,7,0>, <6,5,0,7>
+ 3648250774U, // <0,6,5,1>: Cost 4 vext1 <4,0,6,5>, <1,2,3,0>
+ 3792458436U, // <0,6,5,2>: Cost 4 vext3 <5,6,7,0>, <6,5,2,7>
+ 3705638767U, // <0,6,5,3>: Cost 5 vext2 <2,4,0,6>, <5,3,7,0>
+ 3648252831U, // <0,6,5,4>: Cost 4 vext1 <4,0,6,5>, <4,0,6,5>
+ 3797619416U, // <0,6,5,5>: Cost 4 vext3 <6,5,5,0>, <6,5,5,0>
+ 3792458472U, // <0,6,5,6>: Cost 4 vext3 <5,6,7,0>, <6,5,6,7>
+ 4035202358U, // <0,6,5,7>: Cost 4 vzipr <1,4,0,5>, RHS
+ 2718716594U, // <0,6,5,u>: Cost 3 vext3 <5,6,7,0>, <6,5,0,7>
+ 3786412796U, // <0,6,6,0>: Cost 4 vext3 <4,6,6,0>, <6,6,0,0>
+ 3792458504U, // <0,6,6,1>: Cost 4 vext3 <5,6,7,0>, <6,6,1,3>
+ 3728200126U, // <0,6,6,2>: Cost 4 vext2 <6,2,0,6>, <6,2,0,6>
+ 3798135575U, // <0,6,6,3>: Cost 4 vext3 <6,6,3,0>, <6,6,3,0>
+ 3786412836U, // <0,6,6,4>: Cost 4 vext3 <4,6,6,0>, <6,6,4,4>
+ 3792458543U, // <0,6,6,5>: Cost 4 vext3 <5,6,7,0>, <6,6,5,6>
+ 2718716728U, // <0,6,6,6>: Cost 3 vext3 <5,6,7,0>, <6,6,6,6>
+ 2718716738U, // <0,6,6,7>: Cost 3 vext3 <5,6,7,0>, <6,6,7,7>
+ 2718716747U, // <0,6,6,u>: Cost 3 vext3 <5,6,7,0>, <6,6,u,7>
+ 2718716750U, // <0,6,7,0>: Cost 3 vext3 <5,6,7,0>, <6,7,0,1>
+ 2724909910U, // <0,6,7,1>: Cost 3 vext3 <6,7,1,0>, <6,7,1,0>
+ 3636323823U, // <0,6,7,2>: Cost 4 vext1 <2,0,6,7>, <2,0,6,7>
+ 2725057384U, // <0,6,7,3>: Cost 3 vext3 <6,7,3,0>, <6,7,3,0>
+ 2718716790U, // <0,6,7,4>: Cost 3 vext3 <5,6,7,0>, <6,7,4,5>
+ 2718716800U, // <0,6,7,5>: Cost 3 vext3 <5,6,7,0>, <6,7,5,6>
+ 3792458629U, // <0,6,7,6>: Cost 4 vext3 <5,6,7,0>, <6,7,6,2>
+ 2725352332U, // <0,6,7,7>: Cost 3 vext3 <6,7,7,0>, <6,7,7,0>
+ 2718716822U, // <0,6,7,u>: Cost 3 vext3 <5,6,7,0>, <6,7,u,1>
+ 1500790886U, // <0,6,u,0>: Cost 2 vext1 <4,0,6,u>, LHS
+ 2619954990U, // <0,6,u,1>: Cost 3 vext2 <0,4,0,6>, LHS
+ 2562590192U, // <0,6,u,2>: Cost 3 vext1 <2,0,6,u>, <2,0,6,u>
+ 2725721017U, // <0,6,u,3>: Cost 3 vext3 <6,u,3,0>, <6,u,3,0>
+ 1500793762U, // <0,6,u,4>: Cost 2 vext1 <4,0,6,u>, <4,0,6,u>
+ 2619955354U, // <0,6,u,5>: Cost 3 vext2 <0,4,0,6>, RHS
+ 2725942228U, // <0,6,u,6>: Cost 3 vext3 <6,u,6,0>, <6,u,6,0>
+ 2954186038U, // <0,6,u,7>: Cost 3 vzipr <0,2,0,u>, RHS
+ 1500796718U, // <0,6,u,u>: Cost 2 vext1 <4,0,6,u>, LHS
+ 2256401391U, // <0,7,0,0>: Cost 3 vrev <7,0,0,0>
+ 2632564838U, // <0,7,0,1>: Cost 3 vext2 <2,5,0,7>, LHS
+ 2256548865U, // <0,7,0,2>: Cost 3 vrev <7,0,2,0>
+ 3700998396U, // <0,7,0,3>: Cost 4 vext2 <1,6,0,7>, <0,3,1,0>
+ 2718716952U, // <0,7,0,4>: Cost 3 vext3 <5,6,7,0>, <7,0,4,5>
+ 2718716962U, // <0,7,0,5>: Cost 3 vext3 <5,6,7,0>, <7,0,5,6>
+ 2621284845U, // <0,7,0,6>: Cost 3 vext2 <0,6,0,7>, <0,6,0,7>
+ 3904685542U, // <0,7,0,7>: Cost 4 vuzpr <2,0,5,7>, <2,0,5,7>
+ 2632565405U, // <0,7,0,u>: Cost 3 vext2 <2,5,0,7>, LHS
+ 2256409584U, // <0,7,1,0>: Cost 3 vrev <7,0,0,1>
+ 3706307380U, // <0,7,1,1>: Cost 4 vext2 <2,5,0,7>, <1,1,1,1>
+ 2632565654U, // <0,7,1,2>: Cost 3 vext2 <2,5,0,7>, <1,2,3,0>
+ 3769603168U, // <0,7,1,3>: Cost 4 vext3 <1,u,3,0>, <7,1,3,5>
+ 2256704532U, // <0,7,1,4>: Cost 3 vrev <7,0,4,1>
+ 3769603184U, // <0,7,1,5>: Cost 4 vext3 <1,u,3,0>, <7,1,5,3>
+ 3700999366U, // <0,7,1,6>: Cost 4 vext2 <1,6,0,7>, <1,6,0,7>
+ 2886522476U, // <0,7,1,7>: Cost 3 vzipl LHS, <7,7,7,7>
+ 2256999480U, // <0,7,1,u>: Cost 3 vrev <7,0,u,1>
+ 2586501222U, // <0,7,2,0>: Cost 3 vext1 <6,0,7,2>, LHS
+ 1182749690U, // <0,7,2,1>: Cost 2 vrev <7,0,1,2>
+ 3636356595U, // <0,7,2,2>: Cost 4 vext1 <2,0,7,2>, <2,0,7,2>
+ 2727711916U, // <0,7,2,3>: Cost 3 vext3 <7,2,3,0>, <7,2,3,0>
+ 2586504502U, // <0,7,2,4>: Cost 3 vext1 <6,0,7,2>, RHS
+ 2632566606U, // <0,7,2,5>: Cost 3 vext2 <2,5,0,7>, <2,5,0,7>
+ 2586505559U, // <0,7,2,6>: Cost 3 vext1 <6,0,7,2>, <6,0,7,2>
+ 3020740204U, // <0,7,2,7>: Cost 3 vtrnl LHS, <7,7,7,7>
+ 1183265849U, // <0,7,2,u>: Cost 2 vrev <7,0,u,2>
+ 3701000342U, // <0,7,3,0>: Cost 4 vext2 <1,6,0,7>, <3,0,1,2>
+ 3706308849U, // <0,7,3,1>: Cost 4 vext2 <2,5,0,7>, <3,1,2,3>
+ 3330315268U, // <0,7,3,2>: Cost 4 vrev <7,0,2,3>
+ 3706309020U, // <0,7,3,3>: Cost 4 vext2 <2,5,0,7>, <3,3,3,3>
+ 3706309122U, // <0,7,3,4>: Cost 4 vext2 <2,5,0,7>, <3,4,5,6>
+ 3712281127U, // <0,7,3,5>: Cost 4 vext2 <3,5,0,7>, <3,5,0,7>
+ 2639202936U, // <0,7,3,6>: Cost 3 vext2 <3,6,0,7>, <3,6,0,7>
+ 3802412321U, // <0,7,3,7>: Cost 4 vext3 <7,3,7,0>, <7,3,7,0>
+ 2640530202U, // <0,7,3,u>: Cost 3 vext2 <3,u,0,7>, <3,u,0,7>
+ 3654287462U, // <0,7,4,0>: Cost 4 vext1 <5,0,7,4>, LHS
+ 2256507900U, // <0,7,4,1>: Cost 3 vrev <7,0,1,4>
+ 2256581637U, // <0,7,4,2>: Cost 3 vrev <7,0,2,4>
+ 3660262008U, // <0,7,4,3>: Cost 4 vext1 <6,0,7,4>, <3,6,0,7>
+ 3786413405U, // <0,7,4,4>: Cost 4 vext3 <4,6,6,0>, <7,4,4,6>
+ 2632568118U, // <0,7,4,5>: Cost 3 vext2 <2,5,0,7>, RHS
+ 3718917457U, // <0,7,4,6>: Cost 4 vext2 <4,6,0,7>, <4,6,0,7>
+ 3787003255U, // <0,7,4,7>: Cost 4 vext3 <4,7,5,0>, <7,4,7,5>
+ 2632568361U, // <0,7,4,u>: Cost 3 vext2 <2,5,0,7>, RHS
+ 3706310268U, // <0,7,5,0>: Cost 4 vext2 <2,5,0,7>, <5,0,7,0>
+ 3792459156U, // <0,7,5,1>: Cost 4 vext3 <5,6,7,0>, <7,5,1,7>
+ 3330331654U, // <0,7,5,2>: Cost 4 vrev <7,0,2,5>
+ 3722899255U, // <0,7,5,3>: Cost 4 vext2 <5,3,0,7>, <5,3,0,7>
+ 2256737304U, // <0,7,5,4>: Cost 3 vrev <7,0,4,5>
+ 3724226521U, // <0,7,5,5>: Cost 4 vext2 <5,5,0,7>, <5,5,0,7>
+ 2718717377U, // <0,7,5,6>: Cost 3 vext3 <5,6,7,0>, <7,5,6,7>
+ 2729997763U, // <0,7,5,7>: Cost 3 vext3 <7,5,7,0>, <7,5,7,0>
+ 2720044499U, // <0,7,5,u>: Cost 3 vext3 <5,u,7,0>, <7,5,u,7>
+ 3712946517U, // <0,7,6,0>: Cost 4 vext2 <3,6,0,7>, <6,0,7,0>
+ 2256524286U, // <0,7,6,1>: Cost 3 vrev <7,0,1,6>
+ 3792459246U, // <0,7,6,2>: Cost 4 vext3 <5,6,7,0>, <7,6,2,7>
+ 3796440567U, // <0,7,6,3>: Cost 4 vext3 <6,3,7,0>, <7,6,3,7>
+ 3654307126U, // <0,7,6,4>: Cost 4 vext1 <5,0,7,6>, RHS
+ 2656457394U, // <0,7,6,5>: Cost 3 vext2 <6,5,0,7>, <6,5,0,7>
+ 3792459281U, // <0,7,6,6>: Cost 4 vext3 <5,6,7,0>, <7,6,6,6>
+ 2730661396U, // <0,7,6,7>: Cost 3 vext3 <7,6,7,0>, <7,6,7,0>
+ 2658448293U, // <0,7,6,u>: Cost 3 vext2 <6,u,0,7>, <6,u,0,7>
+ 3787003431U, // <0,7,7,0>: Cost 4 vext3 <4,7,5,0>, <7,7,0,1>
+ 3654312854U, // <0,7,7,1>: Cost 4 vext1 <5,0,7,7>, <1,2,3,0>
+ 3654313446U, // <0,7,7,2>: Cost 4 vext1 <5,0,7,7>, <2,0,5,7>
+ 3804771905U, // <0,7,7,3>: Cost 4 vext3 <7,7,3,0>, <7,7,3,0>
+ 3654315318U, // <0,7,7,4>: Cost 4 vext1 <5,0,7,7>, RHS
+ 3654315651U, // <0,7,7,5>: Cost 4 vext1 <5,0,7,7>, <5,0,7,7>
+ 3660288348U, // <0,7,7,6>: Cost 4 vext1 <6,0,7,7>, <6,0,7,7>
+ 2718717548U, // <0,7,7,7>: Cost 3 vext3 <5,6,7,0>, <7,7,7,7>
+ 2664420990U, // <0,7,7,u>: Cost 3 vext2 <7,u,0,7>, <7,u,0,7>
+ 2256466935U, // <0,7,u,0>: Cost 3 vrev <7,0,0,u>
+ 1182798848U, // <0,7,u,1>: Cost 2 vrev <7,0,1,u>
+ 2256614409U, // <0,7,u,2>: Cost 3 vrev <7,0,2,u>
+ 2731693714U, // <0,7,u,3>: Cost 3 vext3 <7,u,3,0>, <7,u,3,0>
+ 2256761883U, // <0,7,u,4>: Cost 3 vrev <7,0,4,u>
+ 2632571034U, // <0,7,u,5>: Cost 3 vext2 <2,5,0,7>, RHS
+ 2669066421U, // <0,7,u,6>: Cost 3 vext2 <u,6,0,7>, <u,6,0,7>
+ 2731988662U, // <0,7,u,7>: Cost 3 vext3 <7,u,7,0>, <7,u,7,0>
+ 1183315007U, // <0,7,u,u>: Cost 2 vrev <7,0,u,u>
+ 135053414U, // <0,u,0,0>: Cost 1 vdup0 LHS
+ 1544896614U, // <0,u,0,1>: Cost 2 vext2 <0,2,0,u>, LHS
+ 1678999654U, // <0,u,0,2>: Cost 2 vuzpl LHS, LHS
+ 2691880677U, // <0,u,0,3>: Cost 3 vext3 <1,2,3,0>, <u,0,3,2>
+ 1476988214U, // <0,u,0,4>: Cost 2 vext1 <0,0,u,0>, RHS
+ 2718791419U, // <0,u,0,5>: Cost 3 vext3 <5,6,u,0>, <u,0,5,6>
+ 3021248666U, // <0,u,0,6>: Cost 3 vtrnl <0,2,0,2>, RHS
+ 2592535607U, // <0,u,0,7>: Cost 3 vext1 <7,0,u,0>, <7,0,u,0>
+ 135053414U, // <0,u,0,u>: Cost 1 vdup0 LHS
+ 1476993097U, // <0,u,1,0>: Cost 2 vext1 <0,0,u,1>, <0,0,u,1>
+ 1812780846U, // <0,u,1,1>: Cost 2 vzipl LHS, LHS
+ 1618138926U, // <0,u,1,2>: Cost 2 vext3 <1,2,3,0>, LHS
+ 2752742134U, // <0,u,1,3>: Cost 3 vuzpl LHS, <1,0,3,2>
+ 1476996406U, // <0,u,1,4>: Cost 2 vext1 <0,0,u,1>, RHS
+ 1812781210U, // <0,u,1,5>: Cost 2 vzipl LHS, RHS
+ 2887006416U, // <0,u,1,6>: Cost 3 vzipl LHS, <u,6,3,7>
+ 2966736200U, // <0,u,1,7>: Cost 3 vzipr <2,3,0,1>, RHS
+ 1812781413U, // <0,u,1,u>: Cost 2 vzipl LHS, LHS
+ 1482973286U, // <0,u,2,0>: Cost 2 vext1 <1,0,u,2>, LHS
+ 1482973987U, // <0,u,2,1>: Cost 2 vext1 <1,0,u,2>, <1,0,u,2>
+ 1946998574U, // <0,u,2,2>: Cost 2 vtrnl LHS, LHS
+ 835584U, // <0,u,2,3>: Cost 0 copy LHS
+ 1482976566U, // <0,u,2,4>: Cost 2 vext1 <1,0,u,2>, RHS
+ 3020781631U, // <0,u,2,5>: Cost 3 vtrnl LHS, <u,4,5,6>
+ 1946998938U, // <0,u,2,6>: Cost 2 vtrnl LHS, RHS
+ 1518810169U, // <0,u,2,7>: Cost 2 vext1 <7,0,u,2>, <7,0,u,2>
+ 835584U, // <0,u,2,u>: Cost 0 copy LHS
+ 2618640534U, // <0,u,3,0>: Cost 3 vext2 <0,2,0,u>, <3,0,1,2>
+ 2752743574U, // <0,u,3,1>: Cost 3 vuzpl LHS, <3,0,1,2>
+ 2636556597U, // <0,u,3,2>: Cost 3 vext2 <3,2,0,u>, <3,2,0,u>
+ 2752743836U, // <0,u,3,3>: Cost 3 vuzpl LHS, <3,3,3,3>
+ 2618640898U, // <0,u,3,4>: Cost 3 vext2 <0,2,0,u>, <3,4,5,6>
+ 2752743938U, // <0,u,3,5>: Cost 3 vuzpl LHS, <3,4,5,6>
+ 2639202936U, // <0,u,3,6>: Cost 3 vext2 <3,6,0,7>, <3,6,0,7>
+ 2639874762U, // <0,u,3,7>: Cost 3 vext2 <3,7,0,u>, <3,7,0,u>
+ 2752743637U, // <0,u,3,u>: Cost 3 vuzpl LHS, <3,0,u,2>
+ 2562703462U, // <0,u,4,0>: Cost 3 vext1 <2,0,u,4>, LHS
+ 2888455982U, // <0,u,4,1>: Cost 3 vzipl <0,4,1,5>, LHS
+ 3021575982U, // <0,u,4,2>: Cost 3 vtrnl <0,2,4,6>, LHS
+ 2568677591U, // <0,u,4,3>: Cost 3 vext1 <3,0,u,4>, <3,0,u,4>
+ 2562706742U, // <0,u,4,4>: Cost 3 vext1 <2,0,u,4>, RHS
+ 1544899894U, // <0,u,4,5>: Cost 2 vext2 <0,2,0,u>, RHS
+ 1679002934U, // <0,u,4,6>: Cost 2 vuzpl LHS, RHS
+ 2718718033U, // <0,u,4,7>: Cost 3 vext3 <5,6,7,0>, <u,4,7,6>
+ 1679002952U, // <0,u,4,u>: Cost 2 vuzpl LHS, RHS
+ 2568683622U, // <0,u,5,0>: Cost 3 vext1 <3,0,u,5>, LHS
+ 2568684438U, // <0,u,5,1>: Cost 3 vext1 <3,0,u,5>, <1,2,3,0>
+ 3765622902U, // <0,u,5,2>: Cost 4 vext3 <1,2,3,0>, <u,5,2,7>
+ 2691881087U, // <0,u,5,3>: Cost 3 vext3 <1,2,3,0>, <u,5,3,7>
+ 2568686902U, // <0,u,5,4>: Cost 3 vext1 <3,0,u,5>, RHS
+ 2650492890U, // <0,u,5,5>: Cost 3 vext2 <5,5,0,u>, <5,5,0,u>
+ 1618139290U, // <0,u,5,6>: Cost 2 vext3 <1,2,3,0>, RHS
+ 2824834358U, // <0,u,5,7>: Cost 3 vuzpr <1,0,3,u>, RHS
+ 1618139308U, // <0,u,5,u>: Cost 2 vext3 <1,2,3,0>, RHS
+ 2592579686U, // <0,u,6,0>: Cost 3 vext1 <7,0,u,6>, LHS
+ 2262496983U, // <0,u,6,1>: Cost 3 vrev <u,0,1,6>
+ 2654474688U, // <0,u,6,2>: Cost 3 vext2 <6,2,0,u>, <6,2,0,u>
+ 2691881168U, // <0,u,6,3>: Cost 3 vext3 <1,2,3,0>, <u,6,3,7>
+ 2592582966U, // <0,u,6,4>: Cost 3 vext1 <7,0,u,6>, RHS
+ 2656465587U, // <0,u,6,5>: Cost 3 vext2 <6,5,0,u>, <6,5,0,u>
+ 2657129220U, // <0,u,6,6>: Cost 3 vext2 <6,6,0,u>, <6,6,0,u>
+ 1584051029U, // <0,u,6,7>: Cost 2 vext2 <6,7,0,u>, <6,7,0,u>
+ 1584714662U, // <0,u,6,u>: Cost 2 vext2 <6,u,0,u>, <6,u,0,u>
+ 2562728038U, // <0,u,7,0>: Cost 3 vext1 <2,0,u,7>, LHS
+ 2562728854U, // <0,u,7,1>: Cost 3 vext1 <2,0,u,7>, <1,2,3,0>
+ 2562729473U, // <0,u,7,2>: Cost 3 vext1 <2,0,u,7>, <2,0,u,7>
+ 2661111018U, // <0,u,7,3>: Cost 3 vext2 <7,3,0,u>, <7,3,0,u>
+ 2562731318U, // <0,u,7,4>: Cost 3 vext1 <2,0,u,7>, RHS
+ 2718718258U, // <0,u,7,5>: Cost 3 vext3 <5,6,7,0>, <u,7,5,6>
+ 2586620261U, // <0,u,7,6>: Cost 3 vext1 <6,0,u,7>, <6,0,u,7>
+ 2657793644U, // <0,u,7,7>: Cost 3 vext2 <6,7,0,u>, <7,7,7,7>
+ 2562733870U, // <0,u,7,u>: Cost 3 vext1 <2,0,u,7>, LHS
+ 135053414U, // <0,u,u,0>: Cost 1 vdup0 LHS
+ 1544902446U, // <0,u,u,1>: Cost 2 vext2 <0,2,0,u>, LHS
+ 1679005486U, // <0,u,u,2>: Cost 2 vuzpl LHS, LHS
+ 835584U, // <0,u,u,3>: Cost 0 copy LHS
+ 1483025718U, // <0,u,u,4>: Cost 2 vext1 <1,0,u,u>, RHS
+ 1544902810U, // <0,u,u,5>: Cost 2 vext2 <0,2,0,u>, RHS
+ 1679005850U, // <0,u,u,6>: Cost 2 vuzpl LHS, RHS
+ 1518859327U, // <0,u,u,7>: Cost 2 vext1 <7,0,u,u>, <7,0,u,u>
+ 835584U, // <0,u,u,u>: Cost 0 copy LHS
+ 2689744896U, // <1,0,0,0>: Cost 3 vext3 <0,u,1,1>, <0,0,0,0>
+ 1610694666U, // <1,0,0,1>: Cost 2 vext3 <0,0,1,1>, <0,0,1,1>
+ 2689744916U, // <1,0,0,2>: Cost 3 vext3 <0,u,1,1>, <0,0,2,2>
+ 2619310332U, // <1,0,0,3>: Cost 3 vext2 <0,3,1,0>, <0,3,1,0>
+ 2684657701U, // <1,0,0,4>: Cost 3 vext3 <0,0,4,1>, <0,0,4,1>
+ 2620637598U, // <1,0,0,5>: Cost 3 vext2 <0,5,1,0>, <0,5,1,0>
+ 3708977654U, // <1,0,0,6>: Cost 4 vext2 <3,0,1,0>, <0,6,1,7>
+ 3666351168U, // <1,0,0,7>: Cost 4 vext1 <7,1,0,0>, <7,1,0,0>
+ 1611210825U, // <1,0,0,u>: Cost 2 vext3 <0,0,u,1>, <0,0,u,1>
+ 2556780646U, // <1,0,1,0>: Cost 3 vext1 <1,1,0,1>, LHS
+ 2556781355U, // <1,0,1,1>: Cost 3 vext1 <1,1,0,1>, <1,1,0,1>
+ 1616003174U, // <1,0,1,2>: Cost 2 vext3 <0,u,1,1>, LHS
+ 3693052888U, // <1,0,1,3>: Cost 4 vext2 <0,3,1,0>, <1,3,1,3>
+ 2556783926U, // <1,0,1,4>: Cost 3 vext1 <1,1,0,1>, RHS
+ 2580672143U, // <1,0,1,5>: Cost 3 vext1 <5,1,0,1>, <5,1,0,1>
+ 2724839566U, // <1,0,1,6>: Cost 3 vext3 <6,7,0,1>, <0,1,6,7>
+ 3654415354U, // <1,0,1,7>: Cost 4 vext1 <5,1,0,1>, <7,0,1,2>
+ 1616003228U, // <1,0,1,u>: Cost 2 vext3 <0,u,1,1>, LHS
+ 2685690019U, // <1,0,2,0>: Cost 3 vext3 <0,2,0,1>, <0,2,0,1>
+ 2685763756U, // <1,0,2,1>: Cost 3 vext3 <0,2,1,1>, <0,2,1,1>
+ 2698297524U, // <1,0,2,2>: Cost 3 vext3 <2,3,0,1>, <0,2,2,0>
+ 2685911230U, // <1,0,2,3>: Cost 3 vext3 <0,2,3,1>, <0,2,3,1>
+ 2689745100U, // <1,0,2,4>: Cost 3 vext3 <0,u,1,1>, <0,2,4,6>
+ 3764814038U, // <1,0,2,5>: Cost 4 vext3 <1,1,1,1>, <0,2,5,7>
+ 2724839640U, // <1,0,2,6>: Cost 3 vext3 <6,7,0,1>, <0,2,6,0>
+ 2592625658U, // <1,0,2,7>: Cost 3 vext1 <7,1,0,2>, <7,0,1,2>
+ 2686279915U, // <1,0,2,u>: Cost 3 vext3 <0,2,u,1>, <0,2,u,1>
+ 3087843328U, // <1,0,3,0>: Cost 3 vtrnr LHS, <0,0,0,0>
+ 3087843338U, // <1,0,3,1>: Cost 3 vtrnr LHS, <0,0,1,1>
+ 67944550U, // <1,0,3,2>: Cost 1 vrev LHS
+ 2568743135U, // <1,0,3,3>: Cost 3 vext1 <3,1,0,3>, <3,1,0,3>
+ 2562772278U, // <1,0,3,4>: Cost 3 vext1 <2,1,0,3>, RHS
+ 4099850454U, // <1,0,3,5>: Cost 4 vtrnl <1,0,3,2>, <0,2,5,7>
+ 3704998538U, // <1,0,3,6>: Cost 4 vext2 <2,3,1,0>, <3,6,2,7>
+ 2592633923U, // <1,0,3,7>: Cost 3 vext1 <7,1,0,3>, <7,1,0,3>
+ 68386972U, // <1,0,3,u>: Cost 1 vrev LHS
+ 2620640146U, // <1,0,4,0>: Cost 3 vext2 <0,5,1,0>, <4,0,5,1>
+ 2689745234U, // <1,0,4,1>: Cost 3 vext3 <0,u,1,1>, <0,4,1,5>
+ 2689745244U, // <1,0,4,2>: Cost 3 vext3 <0,u,1,1>, <0,4,2,6>
+ 3760980320U, // <1,0,4,3>: Cost 4 vext3 <0,4,3,1>, <0,4,3,1>
+ 3761054057U, // <1,0,4,4>: Cost 4 vext3 <0,4,4,1>, <0,4,4,1>
+ 2619313462U, // <1,0,4,5>: Cost 3 vext2 <0,3,1,0>, RHS
+ 3761201531U, // <1,0,4,6>: Cost 4 vext3 <0,4,6,1>, <0,4,6,1>
+ 3666383940U, // <1,0,4,7>: Cost 4 vext1 <7,1,0,4>, <7,1,0,4>
+ 2619313705U, // <1,0,4,u>: Cost 3 vext2 <0,3,1,0>, RHS
+ 4029300736U, // <1,0,5,0>: Cost 4 vzipr <0,4,1,5>, <0,0,0,0>
+ 2895249510U, // <1,0,5,1>: Cost 3 vzipl <1,5,3,7>, LHS
+ 3028287590U, // <1,0,5,2>: Cost 3 vtrnl <1,3,5,7>, LHS
+ 3642501345U, // <1,0,5,3>: Cost 4 vext1 <3,1,0,5>, <3,1,0,5>
+ 2215592058U, // <1,0,5,4>: Cost 3 vrev <0,1,4,5>
+ 3724242907U, // <1,0,5,5>: Cost 4 vext2 <5,5,1,0>, <5,5,1,0>
+ 3724906540U, // <1,0,5,6>: Cost 4 vext2 <5,6,1,0>, <5,6,1,0>
+ 3911118134U, // <1,0,5,7>: Cost 4 vuzpr <3,1,3,0>, RHS
+ 3028287644U, // <1,0,5,u>: Cost 3 vtrnl <1,3,5,7>, LHS
+ 3762086375U, // <1,0,6,0>: Cost 4 vext3 <0,6,0,1>, <0,6,0,1>
+ 2698297846U, // <1,0,6,1>: Cost 3 vext3 <2,3,0,1>, <0,6,1,7>
+ 3760022015U, // <1,0,6,2>: Cost 4 vext3 <0,2,u,1>, <0,6,2,7>
+ 3642509538U, // <1,0,6,3>: Cost 4 vext1 <3,1,0,6>, <3,1,0,6>
+ 3762381323U, // <1,0,6,4>: Cost 4 vext3 <0,6,4,1>, <0,6,4,1>
+ 3730215604U, // <1,0,6,5>: Cost 4 vext2 <6,5,1,0>, <6,5,1,0>
+ 3730879237U, // <1,0,6,6>: Cost 4 vext2 <6,6,1,0>, <6,6,1,0>
+ 2657801046U, // <1,0,6,7>: Cost 3 vext2 <6,7,1,0>, <6,7,1,0>
+ 2658464679U, // <1,0,6,u>: Cost 3 vext2 <6,u,1,0>, <6,u,1,0>
+ 2659128312U, // <1,0,7,0>: Cost 3 vext2 <7,0,1,0>, <7,0,1,0>
+ 4047898278U, // <1,0,7,1>: Cost 4 vzipr <3,5,1,7>, <2,3,0,1>
+ 2215460970U, // <1,0,7,2>: Cost 3 vrev <0,1,2,7>
+ 3734861035U, // <1,0,7,3>: Cost 4 vext2 <7,3,1,0>, <7,3,1,0>
+ 3731543398U, // <1,0,7,4>: Cost 4 vext2 <6,7,1,0>, <7,4,5,6>
+ 3736188301U, // <1,0,7,5>: Cost 4 vext2 <7,5,1,0>, <7,5,1,0>
+ 2663110110U, // <1,0,7,6>: Cost 3 vext2 <7,6,1,0>, <7,6,1,0>
+ 3731543660U, // <1,0,7,7>: Cost 4 vext2 <6,7,1,0>, <7,7,7,7>
+ 2664437376U, // <1,0,7,u>: Cost 3 vext2 <7,u,1,0>, <7,u,1,0>
+ 3087884288U, // <1,0,u,0>: Cost 3 vtrnr LHS, <0,0,0,0>
+ 1616003730U, // <1,0,u,1>: Cost 2 vext3 <0,u,1,1>, <0,u,1,1>
+ 67985515U, // <1,0,u,2>: Cost 1 vrev LHS
+ 2689893028U, // <1,0,u,3>: Cost 3 vext3 <0,u,3,1>, <0,u,3,1>
+ 2689745586U, // <1,0,u,4>: Cost 3 vext3 <0,u,1,1>, <0,u,4,6>
+ 2619316378U, // <1,0,u,5>: Cost 3 vext2 <0,3,1,0>, RHS
+ 2669082807U, // <1,0,u,6>: Cost 3 vext2 <u,6,1,0>, <u,6,1,0>
+ 2592674888U, // <1,0,u,7>: Cost 3 vext1 <7,1,0,u>, <7,1,0,u>
+ 68427937U, // <1,0,u,u>: Cost 1 vrev LHS
+ 1543585802U, // <1,1,0,0>: Cost 2 vext2 <0,0,1,1>, <0,0,1,1>
+ 1548894310U, // <1,1,0,1>: Cost 2 vext2 <0,u,1,1>, LHS
+ 2618654892U, // <1,1,0,2>: Cost 3 vext2 <0,2,1,1>, <0,2,1,1>
+ 2689745654U, // <1,1,0,3>: Cost 3 vext3 <0,u,1,1>, <1,0,3,2>
+ 2622636370U, // <1,1,0,4>: Cost 3 vext2 <0,u,1,1>, <0,4,1,5>
+ 2620645791U, // <1,1,0,5>: Cost 3 vext2 <0,5,1,1>, <0,5,1,1>
+ 3696378367U, // <1,1,0,6>: Cost 4 vext2 <0,u,1,1>, <0,6,2,7>
+ 3666424905U, // <1,1,0,7>: Cost 4 vext1 <7,1,1,0>, <7,1,1,0>
+ 1548894866U, // <1,1,0,u>: Cost 2 vext2 <0,u,1,1>, <0,u,1,1>
+ 1483112550U, // <1,1,1,0>: Cost 2 vext1 <1,1,1,1>, LHS
+ 202162278U, // <1,1,1,1>: Cost 1 vdup1 LHS
+ 2622636950U, // <1,1,1,2>: Cost 3 vext2 <0,u,1,1>, <1,2,3,0>
+ 2622637016U, // <1,1,1,3>: Cost 3 vext2 <0,u,1,1>, <1,3,1,3>
+ 1483115830U, // <1,1,1,4>: Cost 2 vext1 <1,1,1,1>, RHS
+ 2622637200U, // <1,1,1,5>: Cost 3 vext2 <0,u,1,1>, <1,5,3,7>
+ 2622637263U, // <1,1,1,6>: Cost 3 vext2 <0,u,1,1>, <1,6,1,7>
+ 2592691274U, // <1,1,1,7>: Cost 3 vext1 <7,1,1,1>, <7,1,1,1>
+ 202162278U, // <1,1,1,u>: Cost 1 vdup1 LHS
+ 2550890588U, // <1,1,2,0>: Cost 3 vext1 <0,1,1,2>, <0,1,1,2>
+ 2617329183U, // <1,1,2,1>: Cost 3 vext2 <0,0,1,1>, <2,1,3,1>
+ 2622637672U, // <1,1,2,2>: Cost 3 vext2 <0,u,1,1>, <2,2,2,2>
+ 2622637734U, // <1,1,2,3>: Cost 3 vext2 <0,u,1,1>, <2,3,0,1>
+ 2550893878U, // <1,1,2,4>: Cost 3 vext1 <0,1,1,2>, RHS
+ 3696379744U, // <1,1,2,5>: Cost 4 vext2 <0,u,1,1>, <2,5,2,7>
+ 2622638010U, // <1,1,2,6>: Cost 3 vext2 <0,u,1,1>, <2,6,3,7>
+ 3804554170U, // <1,1,2,7>: Cost 4 vext3 <7,7,0,1>, <1,2,7,0>
+ 2622638139U, // <1,1,2,u>: Cost 3 vext2 <0,u,1,1>, <2,u,0,1>
+ 2622638230U, // <1,1,3,0>: Cost 3 vext2 <0,u,1,1>, <3,0,1,2>
+ 3087844148U, // <1,1,3,1>: Cost 3 vtrnr LHS, <1,1,1,1>
+ 4161585244U, // <1,1,3,2>: Cost 4 vtrnr LHS, <0,1,1,2>
+ 2014101606U, // <1,1,3,3>: Cost 2 vtrnr LHS, LHS
+ 2622638594U, // <1,1,3,4>: Cost 3 vext2 <0,u,1,1>, <3,4,5,6>
+ 2689745920U, // <1,1,3,5>: Cost 3 vext3 <0,u,1,1>, <1,3,5,7>
+ 3763487753U, // <1,1,3,6>: Cost 4 vext3 <0,u,1,1>, <1,3,6,7>
+ 2592707660U, // <1,1,3,7>: Cost 3 vext1 <7,1,1,3>, <7,1,1,3>
+ 2014101611U, // <1,1,3,u>: Cost 2 vtrnr LHS, LHS
+ 2556878950U, // <1,1,4,0>: Cost 3 vext1 <1,1,1,4>, LHS
+ 2221335351U, // <1,1,4,1>: Cost 3 vrev <1,1,1,4>
+ 3696380988U, // <1,1,4,2>: Cost 4 vext2 <0,u,1,1>, <4,2,6,0>
+ 3763487805U, // <1,1,4,3>: Cost 4 vext3 <0,u,1,1>, <1,4,3,5>
+ 2556882230U, // <1,1,4,4>: Cost 3 vext1 <1,1,1,4>, RHS
+ 1548897590U, // <1,1,4,5>: Cost 2 vext2 <0,u,1,1>, RHS
+ 2758184246U, // <1,1,4,6>: Cost 3 vuzpl <1,1,1,1>, RHS
+ 3666457677U, // <1,1,4,7>: Cost 4 vext1 <7,1,1,4>, <7,1,1,4>
+ 1548897833U, // <1,1,4,u>: Cost 2 vext2 <0,u,1,1>, RHS
+ 2693653615U, // <1,1,5,0>: Cost 3 vext3 <1,5,0,1>, <1,5,0,1>
+ 2617331408U, // <1,1,5,1>: Cost 3 vext2 <0,0,1,1>, <5,1,7,3>
+ 4029302934U, // <1,1,5,2>: Cost 4 vzipr <0,4,1,5>, <3,0,1,2>
+ 2689746064U, // <1,1,5,3>: Cost 3 vext3 <0,u,1,1>, <1,5,3,7>
+ 2221564755U, // <1,1,5,4>: Cost 3 vrev <1,1,4,5>
+ 2955559250U, // <1,1,5,5>: Cost 3 vzipr <0,4,1,5>, <0,4,1,5>
+ 2617331810U, // <1,1,5,6>: Cost 3 vext2 <0,0,1,1>, <5,6,7,0>
+ 2825293110U, // <1,1,5,7>: Cost 3 vuzpr <1,1,1,1>, RHS
+ 2689746109U, // <1,1,5,u>: Cost 3 vext3 <0,u,1,1>, <1,5,u,7>
+ 3696382241U, // <1,1,6,0>: Cost 4 vext2 <0,u,1,1>, <6,0,1,2>
+ 2689746127U, // <1,1,6,1>: Cost 3 vext3 <0,u,1,1>, <1,6,1,7>
+ 2617332218U, // <1,1,6,2>: Cost 3 vext2 <0,0,1,1>, <6,2,7,3>
+ 3763487969U, // <1,1,6,3>: Cost 4 vext3 <0,u,1,1>, <1,6,3,7>
+ 3696382605U, // <1,1,6,4>: Cost 4 vext2 <0,u,1,1>, <6,4,5,6>
+ 4029309266U, // <1,1,6,5>: Cost 4 vzipr <0,4,1,6>, <0,4,1,5>
+ 2617332536U, // <1,1,6,6>: Cost 3 vext2 <0,0,1,1>, <6,6,6,6>
+ 2724840702U, // <1,1,6,7>: Cost 3 vext3 <6,7,0,1>, <1,6,7,0>
+ 2725504263U, // <1,1,6,u>: Cost 3 vext3 <6,u,0,1>, <1,6,u,0>
+ 2617332720U, // <1,1,7,0>: Cost 3 vext2 <0,0,1,1>, <7,0,0,1>
+ 2659800138U, // <1,1,7,1>: Cost 3 vext2 <7,1,1,1>, <7,1,1,1>
+ 3691074717U, // <1,1,7,2>: Cost 4 vext2 <0,0,1,1>, <7,2,1,3>
+ 4167811174U, // <1,1,7,3>: Cost 4 vtrnr <1,1,5,7>, LHS
+ 2617333094U, // <1,1,7,4>: Cost 3 vext2 <0,0,1,1>, <7,4,5,6>
+ 3295396702U, // <1,1,7,5>: Cost 4 vrev <1,1,5,7>
+ 3803891014U, // <1,1,7,6>: Cost 4 vext3 <7,6,0,1>, <1,7,6,0>
+ 2617333356U, // <1,1,7,7>: Cost 3 vext2 <0,0,1,1>, <7,7,7,7>
+ 2659800138U, // <1,1,7,u>: Cost 3 vext2 <7,1,1,1>, <7,1,1,1>
+ 1483112550U, // <1,1,u,0>: Cost 2 vext1 <1,1,1,1>, LHS
+ 202162278U, // <1,1,u,1>: Cost 1 vdup1 LHS
+ 2622642056U, // <1,1,u,2>: Cost 3 vext2 <0,u,1,1>, <u,2,3,3>
+ 2014142566U, // <1,1,u,3>: Cost 2 vtrnr LHS, LHS
+ 1483115830U, // <1,1,u,4>: Cost 2 vext1 <1,1,1,1>, RHS
+ 1548900506U, // <1,1,u,5>: Cost 2 vext2 <0,u,1,1>, RHS
+ 2622642384U, // <1,1,u,6>: Cost 3 vext2 <0,u,1,1>, <u,6,3,7>
+ 2825293353U, // <1,1,u,7>: Cost 3 vuzpr <1,1,1,1>, RHS
+ 202162278U, // <1,1,u,u>: Cost 1 vdup1 LHS
+ 2635251712U, // <1,2,0,0>: Cost 3 vext2 <3,0,1,2>, <0,0,0,0>
+ 1561509990U, // <1,2,0,1>: Cost 2 vext2 <3,0,1,2>, LHS
+ 2618663085U, // <1,2,0,2>: Cost 3 vext2 <0,2,1,2>, <0,2,1,2>
+ 2696529358U, // <1,2,0,3>: Cost 3 vext3 <2,0,3,1>, <2,0,3,1>
+ 2635252050U, // <1,2,0,4>: Cost 3 vext2 <3,0,1,2>, <0,4,1,5>
+ 3769533926U, // <1,2,0,5>: Cost 4 vext3 <1,u,2,1>, <2,0,5,7>
+ 2621317617U, // <1,2,0,6>: Cost 3 vext2 <0,6,1,2>, <0,6,1,2>
+ 2659140170U, // <1,2,0,7>: Cost 3 vext2 <7,0,1,2>, <0,7,2,1>
+ 1561510557U, // <1,2,0,u>: Cost 2 vext2 <3,0,1,2>, LHS
+ 2623308516U, // <1,2,1,0>: Cost 3 vext2 <1,0,1,2>, <1,0,1,2>
+ 2635252532U, // <1,2,1,1>: Cost 3 vext2 <3,0,1,2>, <1,1,1,1>
+ 2631271318U, // <1,2,1,2>: Cost 3 vext2 <2,3,1,2>, <1,2,3,0>
+ 2958180454U, // <1,2,1,3>: Cost 3 vzipr <0,u,1,1>, LHS
+ 2550959414U, // <1,2,1,4>: Cost 3 vext1 <0,1,2,1>, RHS
+ 2635252880U, // <1,2,1,5>: Cost 3 vext2 <3,0,1,2>, <1,5,3,7>
+ 2635252952U, // <1,2,1,6>: Cost 3 vext2 <3,0,1,2>, <1,6,2,7>
+ 3732882731U, // <1,2,1,7>: Cost 4 vext2 <7,0,1,2>, <1,7,3,0>
+ 2958180459U, // <1,2,1,u>: Cost 3 vzipr <0,u,1,1>, LHS
+ 2629281213U, // <1,2,2,0>: Cost 3 vext2 <2,0,1,2>, <2,0,1,2>
+ 2635253280U, // <1,2,2,1>: Cost 3 vext2 <3,0,1,2>, <2,1,3,2>
+ 2618664552U, // <1,2,2,2>: Cost 3 vext2 <0,2,1,2>, <2,2,2,2>
+ 2689746546U, // <1,2,2,3>: Cost 3 vext3 <0,u,1,1>, <2,2,3,3>
+ 3764815485U, // <1,2,2,4>: Cost 4 vext3 <1,1,1,1>, <2,2,4,5>
+ 3760023176U, // <1,2,2,5>: Cost 4 vext3 <0,2,u,1>, <2,2,5,7>
+ 2635253690U, // <1,2,2,6>: Cost 3 vext2 <3,0,1,2>, <2,6,3,7>
+ 2659141610U, // <1,2,2,7>: Cost 3 vext2 <7,0,1,2>, <2,7,0,1>
+ 2689746591U, // <1,2,2,u>: Cost 3 vext3 <0,u,1,1>, <2,2,u,3>
+ 403488870U, // <1,2,3,0>: Cost 1 vext1 LHS, LHS
+ 1477231350U, // <1,2,3,1>: Cost 2 vext1 LHS, <1,0,3,2>
+ 1477232232U, // <1,2,3,2>: Cost 2 vext1 LHS, <2,2,2,2>
+ 1477233052U, // <1,2,3,3>: Cost 2 vext1 LHS, <3,3,3,3>
+ 403492150U, // <1,2,3,4>: Cost 1 vext1 LHS, RHS
+ 1525010128U, // <1,2,3,5>: Cost 2 vext1 LHS, <5,1,7,3>
+ 1525010938U, // <1,2,3,6>: Cost 2 vext1 LHS, <6,2,7,3>
+ 1525011450U, // <1,2,3,7>: Cost 2 vext1 LHS, <7,0,1,2>
+ 403494702U, // <1,2,3,u>: Cost 1 vext1 LHS, LHS
+ 2641226607U, // <1,2,4,0>: Cost 3 vext2 <4,0,1,2>, <4,0,1,2>
+ 3624723446U, // <1,2,4,1>: Cost 4 vext1 <0,1,2,4>, <1,3,4,6>
+ 3301123609U, // <1,2,4,2>: Cost 4 vrev <2,1,2,4>
+ 2598759198U, // <1,2,4,3>: Cost 3 vext1 <u,1,2,4>, <3,u,1,2>
+ 2659142864U, // <1,2,4,4>: Cost 3 vext2 <7,0,1,2>, <4,4,4,4>
+ 1561513270U, // <1,2,4,5>: Cost 2 vext2 <3,0,1,2>, RHS
+ 2659143028U, // <1,2,4,6>: Cost 3 vext2 <7,0,1,2>, <4,6,4,6>
+ 2659143112U, // <1,2,4,7>: Cost 3 vext2 <7,0,1,2>, <4,7,5,0>
+ 1561513513U, // <1,2,4,u>: Cost 2 vext2 <3,0,1,2>, RHS
+ 2550988902U, // <1,2,5,0>: Cost 3 vext1 <0,1,2,5>, LHS
+ 2550989824U, // <1,2,5,1>: Cost 3 vext1 <0,1,2,5>, <1,3,5,7>
+ 3624732264U, // <1,2,5,2>: Cost 4 vext1 <0,1,2,5>, <2,2,2,2>
+ 2955559014U, // <1,2,5,3>: Cost 3 vzipr <0,4,1,5>, LHS
+ 2550992182U, // <1,2,5,4>: Cost 3 vext1 <0,1,2,5>, RHS
+ 2659143684U, // <1,2,5,5>: Cost 3 vext2 <7,0,1,2>, <5,5,5,5>
+ 2659143778U, // <1,2,5,6>: Cost 3 vext2 <7,0,1,2>, <5,6,7,0>
+ 2659143848U, // <1,2,5,7>: Cost 3 vext2 <7,0,1,2>, <5,7,5,7>
+ 2550994734U, // <1,2,5,u>: Cost 3 vext1 <0,1,2,5>, LHS
+ 2700289945U, // <1,2,6,0>: Cost 3 vext3 <2,6,0,1>, <2,6,0,1>
+ 2635256232U, // <1,2,6,1>: Cost 3 vext2 <3,0,1,2>, <6,1,7,2>
+ 2659144186U, // <1,2,6,2>: Cost 3 vext2 <7,0,1,2>, <6,2,7,3>
+ 2689746874U, // <1,2,6,3>: Cost 3 vext3 <0,u,1,1>, <2,6,3,7>
+ 3763488705U, // <1,2,6,4>: Cost 4 vext3 <0,u,1,1>, <2,6,4,5>
+ 3763488716U, // <1,2,6,5>: Cost 4 vext3 <0,u,1,1>, <2,6,5,7>
+ 2659144504U, // <1,2,6,6>: Cost 3 vext2 <7,0,1,2>, <6,6,6,6>
+ 2657817432U, // <1,2,6,7>: Cost 3 vext2 <6,7,1,2>, <6,7,1,2>
+ 2689746919U, // <1,2,6,u>: Cost 3 vext3 <0,u,1,1>, <2,6,u,7>
+ 1585402874U, // <1,2,7,0>: Cost 2 vext2 <7,0,1,2>, <7,0,1,2>
+ 2659144770U, // <1,2,7,1>: Cost 3 vext2 <7,0,1,2>, <7,1,0,2>
+ 3708998858U, // <1,2,7,2>: Cost 4 vext2 <3,0,1,2>, <7,2,6,3>
+ 2635257059U, // <1,2,7,3>: Cost 3 vext2 <3,0,1,2>, <7,3,0,1>
+ 2659145062U, // <1,2,7,4>: Cost 3 vext2 <7,0,1,2>, <7,4,5,6>
+ 3732886916U, // <1,2,7,5>: Cost 4 vext2 <7,0,1,2>, <7,5,0,0>
+ 3732886998U, // <1,2,7,6>: Cost 4 vext2 <7,0,1,2>, <7,6,0,1>
+ 2659145255U, // <1,2,7,7>: Cost 3 vext2 <7,0,1,2>, <7,7,0,1>
+ 1590711938U, // <1,2,7,u>: Cost 2 vext2 <7,u,1,2>, <7,u,1,2>
+ 403529835U, // <1,2,u,0>: Cost 1 vext1 LHS, LHS
+ 1477272310U, // <1,2,u,1>: Cost 2 vext1 LHS, <1,0,3,2>
+ 1477273192U, // <1,2,u,2>: Cost 2 vext1 LHS, <2,2,2,2>
+ 1477273750U, // <1,2,u,3>: Cost 2 vext1 LHS, <3,0,1,2>
+ 403533110U, // <1,2,u,4>: Cost 1 vext1 LHS, RHS
+ 1561516186U, // <1,2,u,5>: Cost 2 vext2 <3,0,1,2>, RHS
+ 1525051898U, // <1,2,u,6>: Cost 2 vext1 LHS, <6,2,7,3>
+ 1525052410U, // <1,2,u,7>: Cost 2 vext1 LHS, <7,0,1,2>
+ 403535662U, // <1,2,u,u>: Cost 1 vext1 LHS, LHS
+ 2819407872U, // <1,3,0,0>: Cost 3 vuzpr LHS, <0,0,0,0>
+ 1551564902U, // <1,3,0,1>: Cost 2 vext2 <1,3,1,3>, LHS
+ 2819408630U, // <1,3,0,2>: Cost 3 vuzpr LHS, <1,0,3,2>
+ 2619334911U, // <1,3,0,3>: Cost 3 vext2 <0,3,1,3>, <0,3,1,3>
+ 2625306962U, // <1,3,0,4>: Cost 3 vext2 <1,3,1,3>, <0,4,1,5>
+ 3832725879U, // <1,3,0,5>: Cost 4 vuzpl <1,2,3,0>, <0,4,5,6>
+ 3699048959U, // <1,3,0,6>: Cost 4 vext2 <1,3,1,3>, <0,6,2,7>
+ 3776538827U, // <1,3,0,7>: Cost 4 vext3 <3,0,7,1>, <3,0,7,1>
+ 1551565469U, // <1,3,0,u>: Cost 2 vext2 <1,3,1,3>, LHS
+ 2618671862U, // <1,3,1,0>: Cost 3 vext2 <0,2,1,3>, <1,0,3,2>
+ 2819408692U, // <1,3,1,1>: Cost 3 vuzpr LHS, <1,1,1,1>
+ 2624643975U, // <1,3,1,2>: Cost 3 vext2 <1,2,1,3>, <1,2,1,3>
+ 1745666150U, // <1,3,1,3>: Cost 2 vuzpr LHS, LHS
+ 2557005110U, // <1,3,1,4>: Cost 3 vext1 <1,1,3,1>, RHS
+ 2625307792U, // <1,3,1,5>: Cost 3 vext2 <1,3,1,3>, <1,5,3,7>
+ 3698386127U, // <1,3,1,6>: Cost 4 vext2 <1,2,1,3>, <1,6,1,7>
+ 2592838748U, // <1,3,1,7>: Cost 3 vext1 <7,1,3,1>, <7,1,3,1>
+ 1745666155U, // <1,3,1,u>: Cost 2 vuzpr LHS, LHS
+ 2819408790U, // <1,3,2,0>: Cost 3 vuzpr LHS, <1,2,3,0>
+ 2625308193U, // <1,3,2,1>: Cost 3 vext2 <1,3,1,3>, <2,1,3,3>
+ 2819408036U, // <1,3,2,2>: Cost 3 vuzpr LHS, <0,2,0,2>
+ 2819851890U, // <1,3,2,3>: Cost 3 vuzpr LHS, <2,2,3,3>
+ 2819408794U, // <1,3,2,4>: Cost 3 vuzpr LHS, <1,2,3,4>
+ 3893149890U, // <1,3,2,5>: Cost 4 vuzpr LHS, <0,2,3,5>
+ 2819408076U, // <1,3,2,6>: Cost 3 vuzpr LHS, <0,2,4,6>
+ 3772041583U, // <1,3,2,7>: Cost 4 vext3 <2,3,0,1>, <3,2,7,3>
+ 2819408042U, // <1,3,2,u>: Cost 3 vuzpr LHS, <0,2,0,u>
+ 1483276390U, // <1,3,3,0>: Cost 2 vext1 <1,1,3,3>, LHS
+ 1483277128U, // <1,3,3,1>: Cost 2 vext1 <1,1,3,3>, <1,1,3,3>
+ 2557019752U, // <1,3,3,2>: Cost 3 vext1 <1,1,3,3>, <2,2,2,2>
+ 2819408856U, // <1,3,3,3>: Cost 3 vuzpr LHS, <1,3,1,3>
+ 1483279670U, // <1,3,3,4>: Cost 2 vext1 <1,1,3,3>, RHS
+ 2819409614U, // <1,3,3,5>: Cost 3 vuzpr LHS, <2,3,4,5>
+ 2598826490U, // <1,3,3,6>: Cost 3 vext1 <u,1,3,3>, <6,2,7,3>
+ 3087844352U, // <1,3,3,7>: Cost 3 vtrnr LHS, <1,3,5,7>
+ 1483282222U, // <1,3,3,u>: Cost 2 vext1 <1,1,3,3>, LHS
+ 2568970342U, // <1,3,4,0>: Cost 3 vext1 <3,1,3,4>, LHS
+ 2568971224U, // <1,3,4,1>: Cost 3 vext1 <3,1,3,4>, <1,3,1,3>
+ 3832761290U, // <1,3,4,2>: Cost 4 vuzpl <1,2,3,4>, <4,1,2,3>
+ 2233428219U, // <1,3,4,3>: Cost 3 vrev <3,1,3,4>
+ 2568973622U, // <1,3,4,4>: Cost 3 vext1 <3,1,3,4>, RHS
+ 1551568182U, // <1,3,4,5>: Cost 2 vext2 <1,3,1,3>, RHS
+ 2819410434U, // <1,3,4,6>: Cost 3 vuzpr LHS, <3,4,5,6>
+ 3666605151U, // <1,3,4,7>: Cost 4 vext1 <7,1,3,4>, <7,1,3,4>
+ 1551568425U, // <1,3,4,u>: Cost 2 vext2 <1,3,1,3>, RHS
+ 2563006566U, // <1,3,5,0>: Cost 3 vext1 <2,1,3,5>, LHS
+ 2568979456U, // <1,3,5,1>: Cost 3 vext1 <3,1,3,5>, <1,3,5,7>
+ 2563008035U, // <1,3,5,2>: Cost 3 vext1 <2,1,3,5>, <2,1,3,5>
+ 2233436412U, // <1,3,5,3>: Cost 3 vrev <3,1,3,5>
+ 2563009846U, // <1,3,5,4>: Cost 3 vext1 <2,1,3,5>, RHS
+ 2867187716U, // <1,3,5,5>: Cost 3 vuzpr LHS, <5,5,5,5>
+ 2655834214U, // <1,3,5,6>: Cost 3 vext2 <6,4,1,3>, <5,6,7,4>
+ 1745669430U, // <1,3,5,7>: Cost 2 vuzpr LHS, RHS
+ 1745669431U, // <1,3,5,u>: Cost 2 vuzpr LHS, RHS
+ 2867187810U, // <1,3,6,0>: Cost 3 vuzpr LHS, <5,6,7,0>
+ 3699052931U, // <1,3,6,1>: Cost 4 vext2 <1,3,1,3>, <6,1,3,1>
+ 2654507460U, // <1,3,6,2>: Cost 3 vext2 <6,2,1,3>, <6,2,1,3>
+ 3766291091U, // <1,3,6,3>: Cost 4 vext3 <1,3,3,1>, <3,6,3,7>
+ 2655834726U, // <1,3,6,4>: Cost 3 vext2 <6,4,1,3>, <6,4,1,3>
+ 3923384562U, // <1,3,6,5>: Cost 4 vuzpr <5,1,7,3>, <u,6,7,5>
+ 2657161992U, // <1,3,6,6>: Cost 3 vext2 <6,6,1,3>, <6,6,1,3>
+ 2819852218U, // <1,3,6,7>: Cost 3 vuzpr LHS, <2,6,3,7>
+ 2819852219U, // <1,3,6,u>: Cost 3 vuzpr LHS, <2,6,3,u>
+ 2706926275U, // <1,3,7,0>: Cost 3 vext3 <3,7,0,1>, <3,7,0,1>
+ 2659816524U, // <1,3,7,1>: Cost 3 vext2 <7,1,1,3>, <7,1,1,3>
+ 3636766245U, // <1,3,7,2>: Cost 4 vext1 <2,1,3,7>, <2,1,3,7>
+ 2867187903U, // <1,3,7,3>: Cost 3 vuzpr LHS, <5,7,u,3>
+ 2625312102U, // <1,3,7,4>: Cost 3 vext2 <1,3,1,3>, <7,4,5,6>
+ 2867188598U, // <1,3,7,5>: Cost 3 vuzpr LHS, <6,7,4,5>
+ 3728250344U, // <1,3,7,6>: Cost 4 vext2 <6,2,1,3>, <7,6,2,1>
+ 2867187880U, // <1,3,7,7>: Cost 3 vuzpr LHS, <5,7,5,7>
+ 2707516171U, // <1,3,7,u>: Cost 3 vext3 <3,7,u,1>, <3,7,u,1>
+ 1483317350U, // <1,3,u,0>: Cost 2 vext1 <1,1,3,u>, LHS
+ 1483318093U, // <1,3,u,1>: Cost 2 vext1 <1,1,3,u>, <1,1,3,u>
+ 2819410718U, // <1,3,u,2>: Cost 3 vuzpr LHS, <3,u,1,2>
+ 1745666717U, // <1,3,u,3>: Cost 2 vuzpr LHS, LHS
+ 1483320630U, // <1,3,u,4>: Cost 2 vext1 <1,1,3,u>, RHS
+ 1551571098U, // <1,3,u,5>: Cost 2 vext2 <1,3,1,3>, RHS
+ 2819410758U, // <1,3,u,6>: Cost 3 vuzpr LHS, <3,u,5,6>
+ 1745669673U, // <1,3,u,7>: Cost 2 vuzpr LHS, RHS
+ 1745666722U, // <1,3,u,u>: Cost 2 vuzpr LHS, LHS
+ 2617352205U, // <1,4,0,0>: Cost 3 vext2 <0,0,1,4>, <0,0,1,4>
+ 2619342950U, // <1,4,0,1>: Cost 3 vext2 <0,3,1,4>, LHS
+ 3692421295U, // <1,4,0,2>: Cost 4 vext2 <0,2,1,4>, <0,2,1,4>
+ 2619343104U, // <1,4,0,3>: Cost 3 vext2 <0,3,1,4>, <0,3,1,4>
+ 2617352530U, // <1,4,0,4>: Cost 3 vext2 <0,0,1,4>, <0,4,1,5>
+ 1634880402U, // <1,4,0,5>: Cost 2 vext3 <4,0,5,1>, <4,0,5,1>
+ 2713930652U, // <1,4,0,6>: Cost 3 vext3 <4,u,5,1>, <4,0,6,2>
+ 3732898396U, // <1,4,0,7>: Cost 4 vext2 <7,0,1,4>, <0,7,4,1>
+ 1635101613U, // <1,4,0,u>: Cost 2 vext3 <4,0,u,1>, <4,0,u,1>
+ 3693085430U, // <1,4,1,0>: Cost 4 vext2 <0,3,1,4>, <1,0,3,2>
+ 2623988535U, // <1,4,1,1>: Cost 3 vext2 <1,1,1,4>, <1,1,1,4>
+ 3693085590U, // <1,4,1,2>: Cost 4 vext2 <0,3,1,4>, <1,2,3,0>
+ 3692422134U, // <1,4,1,3>: Cost 4 vext2 <0,2,1,4>, <1,3,4,6>
+ 3693085726U, // <1,4,1,4>: Cost 4 vext2 <0,3,1,4>, <1,4,0,1>
+ 2892401974U, // <1,4,1,5>: Cost 3 vzipl <1,1,1,1>, RHS
+ 3026619702U, // <1,4,1,6>: Cost 3 vtrnl <1,1,1,1>, RHS
+ 3800206324U, // <1,4,1,7>: Cost 4 vext3 <7,0,4,1>, <4,1,7,0>
+ 2892402217U, // <1,4,1,u>: Cost 3 vzipl <1,1,1,1>, RHS
+ 3966978927U, // <1,4,2,0>: Cost 4 vzipl <1,2,3,4>, <4,0,1,2>
+ 3966979018U, // <1,4,2,1>: Cost 4 vzipl <1,2,3,4>, <4,1,2,3>
+ 3693086312U, // <1,4,2,2>: Cost 4 vext2 <0,3,1,4>, <2,2,2,2>
+ 2635269798U, // <1,4,2,3>: Cost 3 vext2 <3,0,1,4>, <2,3,0,1>
+ 3966979280U, // <1,4,2,4>: Cost 4 vzipl <1,2,3,4>, <4,4,4,4>
+ 2893204790U, // <1,4,2,5>: Cost 3 vzipl <1,2,3,0>, RHS
+ 3693086650U, // <1,4,2,6>: Cost 4 vext2 <0,3,1,4>, <2,6,3,7>
+ 3666662502U, // <1,4,2,7>: Cost 4 vext1 <7,1,4,2>, <7,1,4,2>
+ 2893205033U, // <1,4,2,u>: Cost 3 vzipl <1,2,3,0>, RHS
+ 2563063910U, // <1,4,3,0>: Cost 3 vext1 <2,1,4,3>, LHS
+ 2563064730U, // <1,4,3,1>: Cost 3 vext1 <2,1,4,3>, <1,2,3,4>
+ 2563065386U, // <1,4,3,2>: Cost 3 vext1 <2,1,4,3>, <2,1,4,3>
+ 3693087132U, // <1,4,3,3>: Cost 4 vext2 <0,3,1,4>, <3,3,3,3>
+ 2619345410U, // <1,4,3,4>: Cost 3 vext2 <0,3,1,4>, <3,4,5,6>
+ 3087843666U, // <1,4,3,5>: Cost 3 vtrnr LHS, <0,4,1,5>
+ 3087843676U, // <1,4,3,6>: Cost 3 vtrnr LHS, <0,4,2,6>
+ 3666670695U, // <1,4,3,7>: Cost 4 vext1 <7,1,4,3>, <7,1,4,3>
+ 3087843669U, // <1,4,3,u>: Cost 3 vtrnr LHS, <0,4,1,u>
+ 2620672914U, // <1,4,4,0>: Cost 3 vext2 <0,5,1,4>, <4,0,5,1>
+ 3630842706U, // <1,4,4,1>: Cost 4 vext1 <1,1,4,4>, <1,1,4,4>
+ 3313069003U, // <1,4,4,2>: Cost 4 vrev <4,1,2,4>
+ 3642788100U, // <1,4,4,3>: Cost 4 vext1 <3,1,4,4>, <3,1,4,4>
+ 2713930960U, // <1,4,4,4>: Cost 3 vext3 <4,u,5,1>, <4,4,4,4>
+ 2619346230U, // <1,4,4,5>: Cost 3 vext2 <0,3,1,4>, RHS
+ 2713930980U, // <1,4,4,6>: Cost 3 vext3 <4,u,5,1>, <4,4,6,6>
+ 3736882642U, // <1,4,4,7>: Cost 4 vext2 <7,6,1,4>, <4,7,6,1>
+ 2619346473U, // <1,4,4,u>: Cost 3 vext2 <0,3,1,4>, RHS
+ 2557108326U, // <1,4,5,0>: Cost 3 vext1 <1,1,4,5>, LHS
+ 2557109075U, // <1,4,5,1>: Cost 3 vext1 <1,1,4,5>, <1,1,4,5>
+ 2598913774U, // <1,4,5,2>: Cost 3 vext1 <u,1,4,5>, <2,3,u,1>
+ 3630852246U, // <1,4,5,3>: Cost 4 vext1 <1,1,4,5>, <3,0,1,2>
+ 2557111606U, // <1,4,5,4>: Cost 3 vext1 <1,1,4,5>, RHS
+ 2895252790U, // <1,4,5,5>: Cost 3 vzipl <1,5,3,7>, RHS
+ 1616006454U, // <1,4,5,6>: Cost 2 vext3 <0,u,1,1>, RHS
+ 3899059510U, // <1,4,5,7>: Cost 4 vuzpr <1,1,1,4>, RHS
+ 1616006472U, // <1,4,5,u>: Cost 2 vext3 <0,u,1,1>, RHS
+ 2557116518U, // <1,4,6,0>: Cost 3 vext1 <1,1,4,6>, LHS
+ 2557117236U, // <1,4,6,1>: Cost 3 vext1 <1,1,4,6>, <1,1,1,1>
+ 3630859880U, // <1,4,6,2>: Cost 4 vext1 <1,1,4,6>, <2,2,2,2>
+ 2569062550U, // <1,4,6,3>: Cost 3 vext1 <3,1,4,6>, <3,0,1,2>
+ 2557119798U, // <1,4,6,4>: Cost 3 vext1 <1,1,4,6>, RHS
+ 3763490174U, // <1,4,6,5>: Cost 4 vext3 <0,u,1,1>, <4,6,5,7>
+ 3763490183U, // <1,4,6,6>: Cost 4 vext3 <0,u,1,1>, <4,6,6,7>
+ 2712751498U, // <1,4,6,7>: Cost 3 vext3 <4,6,7,1>, <4,6,7,1>
+ 2557122350U, // <1,4,6,u>: Cost 3 vext1 <1,1,4,6>, LHS
+ 2659161084U, // <1,4,7,0>: Cost 3 vext2 <7,0,1,4>, <7,0,1,4>
+ 3732903040U, // <1,4,7,1>: Cost 4 vext2 <7,0,1,4>, <7,1,7,1>
+ 3734230174U, // <1,4,7,2>: Cost 4 vext2 <7,2,1,4>, <7,2,1,4>
+ 3734893807U, // <1,4,7,3>: Cost 4 vext2 <7,3,1,4>, <7,3,1,4>
+ 3660729654U, // <1,4,7,4>: Cost 4 vext1 <6,1,4,7>, RHS
+ 3786493384U, // <1,4,7,5>: Cost 4 vext3 <4,6,7,1>, <4,7,5,0>
+ 2713341394U, // <1,4,7,6>: Cost 3 vext3 <4,7,6,1>, <4,7,6,1>
+ 3660731386U, // <1,4,7,7>: Cost 4 vext1 <6,1,4,7>, <7,0,1,2>
+ 2664470148U, // <1,4,7,u>: Cost 3 vext2 <7,u,1,4>, <7,u,1,4>
+ 2557132902U, // <1,4,u,0>: Cost 3 vext1 <1,1,4,u>, LHS
+ 2619348782U, // <1,4,u,1>: Cost 3 vext2 <0,3,1,4>, LHS
+ 2563106351U, // <1,4,u,2>: Cost 3 vext1 <2,1,4,u>, <2,1,4,u>
+ 2713783816U, // <1,4,u,3>: Cost 3 vext3 <4,u,3,1>, <4,u,3,1>
+ 2622666815U, // <1,4,u,4>: Cost 3 vext2 <0,u,1,4>, <u,4,5,6>
+ 1640189466U, // <1,4,u,5>: Cost 2 vext3 <4,u,5,1>, <4,u,5,1>
+ 1616006697U, // <1,4,u,6>: Cost 2 vext3 <0,u,1,1>, RHS
+ 2712751498U, // <1,4,u,7>: Cost 3 vext3 <4,6,7,1>, <4,6,7,1>
+ 1616006715U, // <1,4,u,u>: Cost 2 vext3 <0,u,1,1>, RHS
+ 2620014592U, // <1,5,0,0>: Cost 3 vext2 <0,4,1,5>, <0,0,0,0>
+ 1546272870U, // <1,5,0,1>: Cost 2 vext2 <0,4,1,5>, LHS
+ 2618687664U, // <1,5,0,2>: Cost 3 vext2 <0,2,1,5>, <0,2,1,5>
+ 3693093120U, // <1,5,0,3>: Cost 4 vext2 <0,3,1,5>, <0,3,1,4>
+ 1546273106U, // <1,5,0,4>: Cost 2 vext2 <0,4,1,5>, <0,4,1,5>
+ 2620678563U, // <1,5,0,5>: Cost 3 vext2 <0,5,1,5>, <0,5,1,5>
+ 2714668660U, // <1,5,0,6>: Cost 3 vext3 <5,0,6,1>, <5,0,6,1>
+ 3772042877U, // <1,5,0,7>: Cost 4 vext3 <2,3,0,1>, <5,0,7,1>
+ 1546273437U, // <1,5,0,u>: Cost 2 vext2 <0,4,1,5>, LHS
+ 2620015350U, // <1,5,1,0>: Cost 3 vext2 <0,4,1,5>, <1,0,3,2>
+ 2620015412U, // <1,5,1,1>: Cost 3 vext2 <0,4,1,5>, <1,1,1,1>
+ 2620015510U, // <1,5,1,2>: Cost 3 vext2 <0,4,1,5>, <1,2,3,0>
+ 2618688512U, // <1,5,1,3>: Cost 3 vext2 <0,2,1,5>, <1,3,5,7>
+ 2620015677U, // <1,5,1,4>: Cost 3 vext2 <0,4,1,5>, <1,4,3,5>
+ 2620015727U, // <1,5,1,5>: Cost 3 vext2 <0,4,1,5>, <1,5,0,1>
+ 2620015859U, // <1,5,1,6>: Cost 3 vext2 <0,4,1,5>, <1,6,5,7>
+ 3093728566U, // <1,5,1,7>: Cost 3 vtrnr <1,1,1,1>, RHS
+ 2620015981U, // <1,5,1,u>: Cost 3 vext2 <0,4,1,5>, <1,u,1,3>
+ 3692430816U, // <1,5,2,0>: Cost 4 vext2 <0,2,1,5>, <2,0,5,1>
+ 2620016163U, // <1,5,2,1>: Cost 3 vext2 <0,4,1,5>, <2,1,3,5>
+ 2620016232U, // <1,5,2,2>: Cost 3 vext2 <0,4,1,5>, <2,2,2,2>
+ 2620016294U, // <1,5,2,3>: Cost 3 vext2 <0,4,1,5>, <2,3,0,1>
+ 3693758221U, // <1,5,2,4>: Cost 4 vext2 <0,4,1,5>, <2,4,2,5>
+ 3692431209U, // <1,5,2,5>: Cost 4 vext2 <0,2,1,5>, <2,5,3,7>
+ 2620016570U, // <1,5,2,6>: Cost 3 vext2 <0,4,1,5>, <2,6,3,7>
+ 4173598006U, // <1,5,2,7>: Cost 4 vtrnr <2,1,3,2>, RHS
+ 2620016699U, // <1,5,2,u>: Cost 3 vext2 <0,4,1,5>, <2,u,0,1>
+ 2620016790U, // <1,5,3,0>: Cost 3 vext2 <0,4,1,5>, <3,0,1,2>
+ 2569110672U, // <1,5,3,1>: Cost 3 vext1 <3,1,5,3>, <1,5,3,7>
+ 3693758785U, // <1,5,3,2>: Cost 4 vext2 <0,4,1,5>, <3,2,2,2>
+ 2620017052U, // <1,5,3,3>: Cost 3 vext2 <0,4,1,5>, <3,3,3,3>
+ 2620017154U, // <1,5,3,4>: Cost 3 vext2 <0,4,1,5>, <3,4,5,6>
+ 3135623172U, // <1,5,3,5>: Cost 3 vtrnr LHS, <5,5,5,5>
+ 4161587048U, // <1,5,3,6>: Cost 4 vtrnr LHS, <2,5,3,6>
+ 2014104886U, // <1,5,3,7>: Cost 2 vtrnr LHS, RHS
+ 2014104887U, // <1,5,3,u>: Cost 2 vtrnr LHS, RHS
+ 2620017554U, // <1,5,4,0>: Cost 3 vext2 <0,4,1,5>, <4,0,5,1>
+ 2620017634U, // <1,5,4,1>: Cost 3 vext2 <0,4,1,5>, <4,1,5,0>
+ 3693759551U, // <1,5,4,2>: Cost 4 vext2 <0,4,1,5>, <4,2,6,3>
+ 3642861837U, // <1,5,4,3>: Cost 4 vext1 <3,1,5,4>, <3,1,5,4>
+ 2575092710U, // <1,5,4,4>: Cost 3 vext1 <4,1,5,4>, <4,1,5,4>
+ 1546276150U, // <1,5,4,5>: Cost 2 vext2 <0,4,1,5>, RHS
+ 2759855414U, // <1,5,4,6>: Cost 3 vuzpl <1,3,5,7>, RHS
+ 2713931718U, // <1,5,4,7>: Cost 3 vext3 <4,u,5,1>, <5,4,7,6>
+ 1546276393U, // <1,5,4,u>: Cost 2 vext2 <0,4,1,5>, RHS
+ 2557182054U, // <1,5,5,0>: Cost 3 vext1 <1,1,5,5>, LHS
+ 2557182812U, // <1,5,5,1>: Cost 3 vext1 <1,1,5,5>, <1,1,5,5>
+ 3630925347U, // <1,5,5,2>: Cost 4 vext1 <1,1,5,5>, <2,1,3,5>
+ 4029301675U, // <1,5,5,3>: Cost 4 vzipr <0,4,1,5>, <1,2,5,3>
+ 2557185334U, // <1,5,5,4>: Cost 3 vext1 <1,1,5,5>, RHS
+ 2713931780U, // <1,5,5,5>: Cost 3 vext3 <4,u,5,1>, <5,5,5,5>
+ 2667794530U, // <1,5,5,6>: Cost 3 vext2 <u,4,1,5>, <5,6,7,0>
+ 2713931800U, // <1,5,5,7>: Cost 3 vext3 <4,u,5,1>, <5,5,7,7>
+ 2557187886U, // <1,5,5,u>: Cost 3 vext1 <1,1,5,5>, LHS
+ 2718208036U, // <1,5,6,0>: Cost 3 vext3 <5,6,0,1>, <5,6,0,1>
+ 2620019115U, // <1,5,6,1>: Cost 3 vext2 <0,4,1,5>, <6,1,7,5>
+ 2667794938U, // <1,5,6,2>: Cost 3 vext2 <u,4,1,5>, <6,2,7,3>
+ 3787673666U, // <1,5,6,3>: Cost 4 vext3 <4,u,5,1>, <5,6,3,4>
+ 3693761165U, // <1,5,6,4>: Cost 4 vext2 <0,4,1,5>, <6,4,5,6>
+ 3319279297U, // <1,5,6,5>: Cost 4 vrev <5,1,5,6>
+ 2667795256U, // <1,5,6,6>: Cost 3 vext2 <u,4,1,5>, <6,6,6,6>
+ 2713931874U, // <1,5,6,7>: Cost 3 vext3 <4,u,5,1>, <5,6,7,0>
+ 2713931883U, // <1,5,6,u>: Cost 3 vext3 <4,u,5,1>, <5,6,u,0>
+ 2557198438U, // <1,5,7,0>: Cost 3 vext1 <1,1,5,7>, LHS
+ 2557199156U, // <1,5,7,1>: Cost 3 vext1 <1,1,5,7>, <1,1,1,1>
+ 2569143974U, // <1,5,7,2>: Cost 3 vext1 <3,1,5,7>, <2,3,0,1>
+ 2569144592U, // <1,5,7,3>: Cost 3 vext1 <3,1,5,7>, <3,1,5,7>
+ 2557201718U, // <1,5,7,4>: Cost 3 vext1 <1,1,5,7>, RHS
+ 2713931944U, // <1,5,7,5>: Cost 3 vext3 <4,u,5,1>, <5,7,5,7>
+ 3787673770U, // <1,5,7,6>: Cost 4 vext3 <4,u,5,1>, <5,7,6,0>
+ 2719387828U, // <1,5,7,7>: Cost 3 vext3 <5,7,7,1>, <5,7,7,1>
+ 2557204270U, // <1,5,7,u>: Cost 3 vext1 <1,1,5,7>, LHS
+ 2620020435U, // <1,5,u,0>: Cost 3 vext2 <0,4,1,5>, <u,0,1,2>
+ 1546278702U, // <1,5,u,1>: Cost 2 vext2 <0,4,1,5>, LHS
+ 2620020616U, // <1,5,u,2>: Cost 3 vext2 <0,4,1,5>, <u,2,3,3>
+ 2620020668U, // <1,5,u,3>: Cost 3 vext2 <0,4,1,5>, <u,3,0,1>
+ 1594054682U, // <1,5,u,4>: Cost 2 vext2 <u,4,1,5>, <u,4,1,5>
+ 1546279066U, // <1,5,u,5>: Cost 2 vext2 <0,4,1,5>, RHS
+ 2620020944U, // <1,5,u,6>: Cost 3 vext2 <0,4,1,5>, <u,6,3,7>
+ 2014145846U, // <1,5,u,7>: Cost 2 vtrnr LHS, RHS
+ 2014145847U, // <1,5,u,u>: Cost 2 vtrnr LHS, RHS
+ 3692437504U, // <1,6,0,0>: Cost 4 vext2 <0,2,1,6>, <0,0,0,0>
+ 2618695782U, // <1,6,0,1>: Cost 3 vext2 <0,2,1,6>, LHS
+ 2618695857U, // <1,6,0,2>: Cost 3 vext2 <0,2,1,6>, <0,2,1,6>
+ 3794161970U, // <1,6,0,3>: Cost 4 vext3 <6,0,3,1>, <6,0,3,1>
+ 2620023122U, // <1,6,0,4>: Cost 3 vext2 <0,4,1,6>, <0,4,1,5>
+ 2620686756U, // <1,6,0,5>: Cost 3 vext2 <0,5,1,6>, <0,5,1,6>
+ 2621350389U, // <1,6,0,6>: Cost 3 vext2 <0,6,1,6>, <0,6,1,6>
+ 4028599606U, // <1,6,0,7>: Cost 4 vzipr <0,3,1,0>, RHS
+ 2618696349U, // <1,6,0,u>: Cost 3 vext2 <0,2,1,6>, LHS
+ 3692438262U, // <1,6,1,0>: Cost 4 vext2 <0,2,1,6>, <1,0,3,2>
+ 2625995572U, // <1,6,1,1>: Cost 3 vext2 <1,4,1,6>, <1,1,1,1>
+ 3692438422U, // <1,6,1,2>: Cost 4 vext2 <0,2,1,6>, <1,2,3,0>
+ 3692438488U, // <1,6,1,3>: Cost 4 vext2 <0,2,1,6>, <1,3,1,3>
+ 2625995820U, // <1,6,1,4>: Cost 3 vext2 <1,4,1,6>, <1,4,1,6>
+ 3692438672U, // <1,6,1,5>: Cost 4 vext2 <0,2,1,6>, <1,5,3,7>
+ 3692438720U, // <1,6,1,6>: Cost 4 vext2 <0,2,1,6>, <1,6,0,1>
+ 2958183734U, // <1,6,1,7>: Cost 3 vzipr <0,u,1,1>, RHS
+ 2958183735U, // <1,6,1,u>: Cost 3 vzipr <0,u,1,1>, RHS
+ 2721526201U, // <1,6,2,0>: Cost 3 vext3 <6,2,0,1>, <6,2,0,1>
+ 3692439097U, // <1,6,2,1>: Cost 4 vext2 <0,2,1,6>, <2,1,6,0>
+ 3692439144U, // <1,6,2,2>: Cost 4 vext2 <0,2,1,6>, <2,2,2,2>
+ 3692439206U, // <1,6,2,3>: Cost 4 vext2 <0,2,1,6>, <2,3,0,1>
+ 3636948278U, // <1,6,2,4>: Cost 4 vext1 <2,1,6,2>, RHS
+ 3787674092U, // <1,6,2,5>: Cost 4 vext3 <4,u,5,1>, <6,2,5,7>
+ 2618697658U, // <1,6,2,6>: Cost 3 vext2 <0,2,1,6>, <2,6,3,7>
+ 2970799414U, // <1,6,2,7>: Cost 3 vzipr <3,0,1,2>, RHS
+ 2970799415U, // <1,6,2,u>: Cost 3 vzipr <3,0,1,2>, RHS
+ 2563211366U, // <1,6,3,0>: Cost 3 vext1 <2,1,6,3>, LHS
+ 3699738854U, // <1,6,3,1>: Cost 4 vext2 <1,4,1,6>, <3,1,1,1>
+ 2563212860U, // <1,6,3,2>: Cost 3 vext1 <2,1,6,3>, <2,1,6,3>
+ 3692439964U, // <1,6,3,3>: Cost 4 vext2 <0,2,1,6>, <3,3,3,3>
+ 2563214646U, // <1,6,3,4>: Cost 3 vext1 <2,1,6,3>, RHS
+ 4191820018U, // <1,6,3,5>: Cost 4 vtrnr <5,1,7,3>, <u,6,7,5>
+ 2587103648U, // <1,6,3,6>: Cost 3 vext1 <6,1,6,3>, <6,1,6,3>
+ 3087845306U, // <1,6,3,7>: Cost 3 vtrnr LHS, <2,6,3,7>
+ 3087845307U, // <1,6,3,u>: Cost 3 vtrnr LHS, <2,6,3,u>
+ 3693767570U, // <1,6,4,0>: Cost 4 vext2 <0,4,1,6>, <4,0,5,1>
+ 3693767650U, // <1,6,4,1>: Cost 4 vext2 <0,4,1,6>, <4,1,5,0>
+ 3636962877U, // <1,6,4,2>: Cost 4 vext1 <2,1,6,4>, <2,1,6,4>
+ 3325088134U, // <1,6,4,3>: Cost 4 vrev <6,1,3,4>
+ 3693767898U, // <1,6,4,4>: Cost 4 vext2 <0,4,1,6>, <4,4,5,5>
+ 2618699062U, // <1,6,4,5>: Cost 3 vext2 <0,2,1,6>, RHS
+ 3833670966U, // <1,6,4,6>: Cost 4 vuzpl <1,3,6,7>, RHS
+ 4028632374U, // <1,6,4,7>: Cost 4 vzipr <0,3,1,4>, RHS
+ 2618699305U, // <1,6,4,u>: Cost 3 vext2 <0,2,1,6>, RHS
+ 3693768264U, // <1,6,5,0>: Cost 4 vext2 <0,4,1,6>, <5,0,1,2>
+ 3630998373U, // <1,6,5,1>: Cost 4 vext1 <1,1,6,5>, <1,1,6,5>
+ 3636971070U, // <1,6,5,2>: Cost 4 vext1 <2,1,6,5>, <2,1,6,5>
+ 3642943767U, // <1,6,5,3>: Cost 4 vext1 <3,1,6,5>, <3,1,6,5>
+ 3693768628U, // <1,6,5,4>: Cost 4 vext2 <0,4,1,6>, <5,4,5,6>
+ 3732918276U, // <1,6,5,5>: Cost 4 vext2 <7,0,1,6>, <5,5,5,5>
+ 2620690530U, // <1,6,5,6>: Cost 3 vext2 <0,5,1,6>, <5,6,7,0>
+ 2955562294U, // <1,6,5,7>: Cost 3 vzipr <0,4,1,5>, RHS
+ 2955562295U, // <1,6,5,u>: Cost 3 vzipr <0,4,1,5>, RHS
+ 2724180733U, // <1,6,6,0>: Cost 3 vext3 <6,6,0,1>, <6,6,0,1>
+ 3631006566U, // <1,6,6,1>: Cost 4 vext1 <1,1,6,6>, <1,1,6,6>
+ 3631007674U, // <1,6,6,2>: Cost 4 vext1 <1,1,6,6>, <2,6,3,7>
+ 3692442184U, // <1,6,6,3>: Cost 4 vext2 <0,2,1,6>, <6,3,7,0>
+ 3631009078U, // <1,6,6,4>: Cost 4 vext1 <1,1,6,6>, RHS
+ 3787674416U, // <1,6,6,5>: Cost 4 vext3 <4,u,5,1>, <6,6,5,7>
+ 2713932600U, // <1,6,6,6>: Cost 3 vext3 <4,u,5,1>, <6,6,6,6>
+ 2713932610U, // <1,6,6,7>: Cost 3 vext3 <4,u,5,1>, <6,6,7,7>
+ 2713932619U, // <1,6,6,u>: Cost 3 vext3 <4,u,5,1>, <6,6,u,7>
+ 1651102542U, // <1,6,7,0>: Cost 2 vext3 <6,7,0,1>, <6,7,0,1>
+ 2724918103U, // <1,6,7,1>: Cost 3 vext3 <6,7,1,1>, <6,7,1,1>
+ 2698302306U, // <1,6,7,2>: Cost 3 vext3 <2,3,0,1>, <6,7,2,3>
+ 3642960153U, // <1,6,7,3>: Cost 4 vext1 <3,1,6,7>, <3,1,6,7>
+ 2713932662U, // <1,6,7,4>: Cost 3 vext3 <4,u,5,1>, <6,7,4,5>
+ 2725213051U, // <1,6,7,5>: Cost 3 vext3 <6,7,5,1>, <6,7,5,1>
+ 2724844426U, // <1,6,7,6>: Cost 3 vext3 <6,7,0,1>, <6,7,6,7>
+ 4035956022U, // <1,6,7,7>: Cost 4 vzipr <1,5,1,7>, RHS
+ 1651692438U, // <1,6,7,u>: Cost 2 vext3 <6,7,u,1>, <6,7,u,1>
+ 1651766175U, // <1,6,u,0>: Cost 2 vext3 <6,u,0,1>, <6,u,0,1>
+ 2618701614U, // <1,6,u,1>: Cost 3 vext2 <0,2,1,6>, LHS
+ 3135663508U, // <1,6,u,2>: Cost 3 vtrnr LHS, <4,6,u,2>
+ 3692443580U, // <1,6,u,3>: Cost 4 vext2 <0,2,1,6>, <u,3,0,1>
+ 2713932743U, // <1,6,u,4>: Cost 3 vext3 <4,u,5,1>, <6,u,4,5>
+ 2618701978U, // <1,6,u,5>: Cost 3 vext2 <0,2,1,6>, RHS
+ 2622683344U, // <1,6,u,6>: Cost 3 vext2 <0,u,1,6>, <u,6,3,7>
+ 3087886266U, // <1,6,u,7>: Cost 3 vtrnr LHS, <2,6,3,7>
+ 1652356071U, // <1,6,u,u>: Cost 2 vext3 <6,u,u,1>, <6,u,u,1>
+ 2726171632U, // <1,7,0,0>: Cost 3 vext3 <7,0,0,1>, <7,0,0,1>
+ 2626666598U, // <1,7,0,1>: Cost 3 vext2 <1,5,1,7>, LHS
+ 3695100067U, // <1,7,0,2>: Cost 4 vext2 <0,6,1,7>, <0,2,0,1>
+ 3707044102U, // <1,7,0,3>: Cost 4 vext2 <2,6,1,7>, <0,3,2,1>
+ 2726466580U, // <1,7,0,4>: Cost 3 vext3 <7,0,4,1>, <7,0,4,1>
+ 3654921933U, // <1,7,0,5>: Cost 4 vext1 <5,1,7,0>, <5,1,7,0>
+ 2621358582U, // <1,7,0,6>: Cost 3 vext2 <0,6,1,7>, <0,6,1,7>
+ 2622022215U, // <1,7,0,7>: Cost 3 vext2 <0,7,1,7>, <0,7,1,7>
+ 2626667165U, // <1,7,0,u>: Cost 3 vext2 <1,5,1,7>, LHS
+ 2593128550U, // <1,7,1,0>: Cost 3 vext1 <7,1,7,1>, LHS
+ 2626667316U, // <1,7,1,1>: Cost 3 vext2 <1,5,1,7>, <1,1,1,1>
+ 3700409238U, // <1,7,1,2>: Cost 4 vext2 <1,5,1,7>, <1,2,3,0>
+ 2257294428U, // <1,7,1,3>: Cost 3 vrev <7,1,3,1>
+ 2593131830U, // <1,7,1,4>: Cost 3 vext1 <7,1,7,1>, RHS
+ 2626667646U, // <1,7,1,5>: Cost 3 vext2 <1,5,1,7>, <1,5,1,7>
+ 2627331279U, // <1,7,1,6>: Cost 3 vext2 <1,6,1,7>, <1,6,1,7>
+ 2593133696U, // <1,7,1,7>: Cost 3 vext1 <7,1,7,1>, <7,1,7,1>
+ 2628658545U, // <1,7,1,u>: Cost 3 vext2 <1,u,1,7>, <1,u,1,7>
+ 2587164774U, // <1,7,2,0>: Cost 3 vext1 <6,1,7,2>, LHS
+ 3701073445U, // <1,7,2,1>: Cost 4 vext2 <1,6,1,7>, <2,1,3,7>
+ 3700409960U, // <1,7,2,2>: Cost 4 vext2 <1,5,1,7>, <2,2,2,2>
+ 2638612134U, // <1,7,2,3>: Cost 3 vext2 <3,5,1,7>, <2,3,0,1>
+ 2587168054U, // <1,7,2,4>: Cost 3 vext1 <6,1,7,2>, RHS
+ 3706382167U, // <1,7,2,5>: Cost 4 vext2 <2,5,1,7>, <2,5,1,7>
+ 2587169192U, // <1,7,2,6>: Cost 3 vext1 <6,1,7,2>, <6,1,7,2>
+ 3660911610U, // <1,7,2,7>: Cost 4 vext1 <6,1,7,2>, <7,0,1,2>
+ 2587170606U, // <1,7,2,u>: Cost 3 vext1 <6,1,7,2>, LHS
+ 1507459174U, // <1,7,3,0>: Cost 2 vext1 <5,1,7,3>, LHS
+ 2569257984U, // <1,7,3,1>: Cost 3 vext1 <3,1,7,3>, <1,3,5,7>
+ 2581202536U, // <1,7,3,2>: Cost 3 vext1 <5,1,7,3>, <2,2,2,2>
+ 2569259294U, // <1,7,3,3>: Cost 3 vext1 <3,1,7,3>, <3,1,7,3>
+ 1507462454U, // <1,7,3,4>: Cost 2 vext1 <5,1,7,3>, RHS
+ 1507462864U, // <1,7,3,5>: Cost 2 vext1 <5,1,7,3>, <5,1,7,3>
+ 2581205498U, // <1,7,3,6>: Cost 3 vext1 <5,1,7,3>, <6,2,7,3>
+ 2581206010U, // <1,7,3,7>: Cost 3 vext1 <5,1,7,3>, <7,0,1,2>
+ 1507465006U, // <1,7,3,u>: Cost 2 vext1 <5,1,7,3>, LHS
+ 2728826164U, // <1,7,4,0>: Cost 3 vext3 <7,4,0,1>, <7,4,0,1>
+ 3654951732U, // <1,7,4,1>: Cost 4 vext1 <5,1,7,4>, <1,1,1,1>
+ 3330987094U, // <1,7,4,2>: Cost 4 vrev <7,1,2,4>
+ 3331060831U, // <1,7,4,3>: Cost 4 vrev <7,1,3,4>
+ 3787674971U, // <1,7,4,4>: Cost 4 vext3 <4,u,5,1>, <7,4,4,4>
+ 2626669878U, // <1,7,4,5>: Cost 3 vext2 <1,5,1,7>, RHS
+ 3785979241U, // <1,7,4,6>: Cost 4 vext3 <4,6,0,1>, <7,4,6,0>
+ 3787085176U, // <1,7,4,7>: Cost 4 vext3 <4,7,6,1>, <7,4,7,6>
+ 2626670121U, // <1,7,4,u>: Cost 3 vext2 <1,5,1,7>, RHS
+ 2569273446U, // <1,7,5,0>: Cost 3 vext1 <3,1,7,5>, LHS
+ 2569274368U, // <1,7,5,1>: Cost 3 vext1 <3,1,7,5>, <1,3,5,7>
+ 3643016808U, // <1,7,5,2>: Cost 4 vext1 <3,1,7,5>, <2,2,2,2>
+ 2569275680U, // <1,7,5,3>: Cost 3 vext1 <3,1,7,5>, <3,1,7,5>
+ 2569276726U, // <1,7,5,4>: Cost 3 vext1 <3,1,7,5>, RHS
+ 4102034790U, // <1,7,5,5>: Cost 4 vtrnl <1,3,5,7>, <7,4,5,6>
+ 2651222067U, // <1,7,5,6>: Cost 3 vext2 <5,6,1,7>, <5,6,1,7>
+ 3899378998U, // <1,7,5,7>: Cost 4 vuzpr <1,1,5,7>, RHS
+ 2569279278U, // <1,7,5,u>: Cost 3 vext1 <3,1,7,5>, LHS
+ 2730153430U, // <1,7,6,0>: Cost 3 vext3 <7,6,0,1>, <7,6,0,1>
+ 2724845022U, // <1,7,6,1>: Cost 3 vext3 <6,7,0,1>, <7,6,1,0>
+ 3643025338U, // <1,7,6,2>: Cost 4 vext1 <3,1,7,6>, <2,6,3,7>
+ 3643025697U, // <1,7,6,3>: Cost 4 vext1 <3,1,7,6>, <3,1,7,6>
+ 3643026742U, // <1,7,6,4>: Cost 4 vext1 <3,1,7,6>, RHS
+ 3654971091U, // <1,7,6,5>: Cost 4 vext1 <5,1,7,6>, <5,1,7,6>
+ 3787675153U, // <1,7,6,6>: Cost 4 vext3 <4,u,5,1>, <7,6,6,6>
+ 2724845076U, // <1,7,6,7>: Cost 3 vext3 <6,7,0,1>, <7,6,7,0>
+ 2725508637U, // <1,7,6,u>: Cost 3 vext3 <6,u,0,1>, <7,6,u,0>
+ 2730817063U, // <1,7,7,0>: Cost 3 vext3 <7,7,0,1>, <7,7,0,1>
+ 3631088436U, // <1,7,7,1>: Cost 4 vext1 <1,1,7,7>, <1,1,1,1>
+ 3660949158U, // <1,7,7,2>: Cost 4 vext1 <6,1,7,7>, <2,3,0,1>
+ 3801904705U, // <1,7,7,3>: Cost 4 vext3 <7,3,0,1>, <7,7,3,0>
+ 3631090998U, // <1,7,7,4>: Cost 4 vext1 <1,1,7,7>, RHS
+ 2662503828U, // <1,7,7,5>: Cost 3 vext2 <7,5,1,7>, <7,5,1,7>
+ 3660951981U, // <1,7,7,6>: Cost 4 vext1 <6,1,7,7>, <6,1,7,7>
+ 2713933420U, // <1,7,7,7>: Cost 3 vext3 <4,u,5,1>, <7,7,7,7>
+ 2731406959U, // <1,7,7,u>: Cost 3 vext3 <7,7,u,1>, <7,7,u,1>
+ 1507500134U, // <1,7,u,0>: Cost 2 vext1 <5,1,7,u>, LHS
+ 2626672430U, // <1,7,u,1>: Cost 3 vext2 <1,5,1,7>, LHS
+ 2581243496U, // <1,7,u,2>: Cost 3 vext1 <5,1,7,u>, <2,2,2,2>
+ 2569300259U, // <1,7,u,3>: Cost 3 vext1 <3,1,7,u>, <3,1,7,u>
+ 1507503414U, // <1,7,u,4>: Cost 2 vext1 <5,1,7,u>, RHS
+ 1507503829U, // <1,7,u,5>: Cost 2 vext1 <5,1,7,u>, <5,1,7,u>
+ 2581246458U, // <1,7,u,6>: Cost 3 vext1 <5,1,7,u>, <6,2,7,3>
+ 2581246970U, // <1,7,u,7>: Cost 3 vext1 <5,1,7,u>, <7,0,1,2>
+ 1507505966U, // <1,7,u,u>: Cost 2 vext1 <5,1,7,u>, LHS
+ 1543643153U, // <1,u,0,0>: Cost 2 vext2 <0,0,1,u>, <0,0,1,u>
+ 1546297446U, // <1,u,0,1>: Cost 2 vext2 <0,4,1,u>, LHS
+ 2819448852U, // <1,u,0,2>: Cost 3 vuzpr LHS, <0,0,2,2>
+ 2619375876U, // <1,u,0,3>: Cost 3 vext2 <0,3,1,u>, <0,3,1,u>
+ 1546297685U, // <1,u,0,4>: Cost 2 vext2 <0,4,1,u>, <0,4,1,u>
+ 1658771190U, // <1,u,0,5>: Cost 2 vext3 <u,0,5,1>, <u,0,5,1>
+ 2736789248U, // <1,u,0,6>: Cost 3 vext3 <u,7,0,1>, <u,0,6,2>
+ 2659189376U, // <1,u,0,7>: Cost 3 vext2 <7,0,1,u>, <0,7,u,1>
+ 1546298013U, // <1,u,0,u>: Cost 2 vext2 <0,4,1,u>, LHS
+ 1483112550U, // <1,u,1,0>: Cost 2 vext1 <1,1,1,1>, LHS
+ 202162278U, // <1,u,1,1>: Cost 1 vdup1 LHS
+ 1616009006U, // <1,u,1,2>: Cost 2 vext3 <0,u,1,1>, LHS
+ 1745707110U, // <1,u,1,3>: Cost 2 vuzpr LHS, LHS
+ 1483115830U, // <1,u,1,4>: Cost 2 vext1 <1,1,1,1>, RHS
+ 2620040336U, // <1,u,1,5>: Cost 3 vext2 <0,4,1,u>, <1,5,3,7>
+ 3026622618U, // <1,u,1,6>: Cost 3 vtrnl <1,1,1,1>, RHS
+ 2958183752U, // <1,u,1,7>: Cost 3 vzipr <0,u,1,1>, RHS
+ 202162278U, // <1,u,1,u>: Cost 1 vdup1 LHS
+ 2819449750U, // <1,u,2,0>: Cost 3 vuzpr LHS, <1,2,3,0>
+ 2893207342U, // <1,u,2,1>: Cost 3 vzipl <1,2,3,0>, LHS
+ 2819448996U, // <1,u,2,2>: Cost 3 vuzpr LHS, <0,2,0,2>
+ 2819450482U, // <1,u,2,3>: Cost 3 vuzpr LHS, <2,2,3,3>
+ 2819449754U, // <1,u,2,4>: Cost 3 vuzpr LHS, <1,2,3,4>
+ 2893207706U, // <1,u,2,5>: Cost 3 vzipl <1,2,3,0>, RHS
+ 2819449036U, // <1,u,2,6>: Cost 3 vuzpr LHS, <0,2,4,6>
+ 2970799432U, // <1,u,2,7>: Cost 3 vzipr <3,0,1,2>, RHS
+ 2819449002U, // <1,u,2,u>: Cost 3 vuzpr LHS, <0,2,0,u>
+ 403931292U, // <1,u,3,0>: Cost 1 vext1 LHS, LHS
+ 1477673718U, // <1,u,3,1>: Cost 2 vext1 LHS, <1,0,3,2>
+ 115726126U, // <1,u,3,2>: Cost 1 vrev LHS
+ 2014102173U, // <1,u,3,3>: Cost 2 vtrnr LHS, LHS
+ 403934518U, // <1,u,3,4>: Cost 1 vext1 LHS, RHS
+ 1507536601U, // <1,u,3,5>: Cost 2 vext1 <5,1,u,3>, <5,1,u,3>
+ 1525453306U, // <1,u,3,6>: Cost 2 vext1 LHS, <6,2,7,3>
+ 2014105129U, // <1,u,3,7>: Cost 2 vtrnr LHS, RHS
+ 403937070U, // <1,u,3,u>: Cost 1 vext1 LHS, LHS
+ 2620042157U, // <1,u,4,0>: Cost 3 vext2 <0,4,1,u>, <4,0,u,1>
+ 2620042237U, // <1,u,4,1>: Cost 3 vext2 <0,4,1,u>, <4,1,u,0>
+ 2263217967U, // <1,u,4,2>: Cost 3 vrev <u,1,2,4>
+ 2569341224U, // <1,u,4,3>: Cost 3 vext1 <3,1,u,4>, <3,1,u,4>
+ 2569342262U, // <1,u,4,4>: Cost 3 vext1 <3,1,u,4>, RHS
+ 1546300726U, // <1,u,4,5>: Cost 2 vext2 <0,4,1,u>, RHS
+ 2819449180U, // <1,u,4,6>: Cost 3 vuzpr LHS, <0,4,2,6>
+ 2724845649U, // <1,u,4,7>: Cost 3 vext3 <6,7,0,1>, <u,4,7,6>
+ 1546300969U, // <1,u,4,u>: Cost 2 vext2 <0,4,1,u>, RHS
+ 2551431270U, // <1,u,5,0>: Cost 3 vext1 <0,1,u,5>, LHS
+ 2551432192U, // <1,u,5,1>: Cost 3 vext1 <0,1,u,5>, <1,3,5,7>
+ 3028293422U, // <1,u,5,2>: Cost 3 vtrnl <1,3,5,7>, LHS
+ 2955559068U, // <1,u,5,3>: Cost 3 vzipr <0,4,1,5>, LHS
+ 2551434550U, // <1,u,5,4>: Cost 3 vext1 <0,1,u,5>, RHS
+ 2895255706U, // <1,u,5,5>: Cost 3 vzipl <1,5,3,7>, RHS
+ 1616009370U, // <1,u,5,6>: Cost 2 vext3 <0,u,1,1>, RHS
+ 1745710390U, // <1,u,5,7>: Cost 2 vuzpr LHS, RHS
+ 1745710391U, // <1,u,5,u>: Cost 2 vuzpr LHS, RHS
+ 2653221159U, // <1,u,6,0>: Cost 3 vext2 <6,0,1,u>, <6,0,1,u>
+ 2725509303U, // <1,u,6,1>: Cost 3 vext3 <6,u,0,1>, <u,6,1,0>
+ 2659193338U, // <1,u,6,2>: Cost 3 vext2 <7,0,1,u>, <6,2,7,3>
+ 2689751248U, // <1,u,6,3>: Cost 3 vext3 <0,u,1,1>, <u,6,3,7>
+ 2867228774U, // <1,u,6,4>: Cost 3 vuzpr LHS, <5,6,7,4>
+ 3764820194U, // <1,u,6,5>: Cost 4 vext3 <1,1,1,1>, <u,6,5,7>
+ 2657202957U, // <1,u,6,6>: Cost 3 vext2 <6,6,1,u>, <6,6,1,u>
+ 2819450810U, // <1,u,6,7>: Cost 3 vuzpr LHS, <2,6,3,7>
+ 2819450811U, // <1,u,6,u>: Cost 3 vuzpr LHS, <2,6,3,u>
+ 1585452032U, // <1,u,7,0>: Cost 2 vext2 <7,0,1,u>, <7,0,1,u>
+ 2557420340U, // <1,u,7,1>: Cost 3 vext1 <1,1,u,7>, <1,1,1,1>
+ 2569365158U, // <1,u,7,2>: Cost 3 vext1 <3,1,u,7>, <2,3,0,1>
+ 2569365803U, // <1,u,7,3>: Cost 3 vext1 <3,1,u,7>, <3,1,u,7>
+ 2557422902U, // <1,u,7,4>: Cost 3 vext1 <1,1,u,7>, RHS
+ 2662512021U, // <1,u,7,5>: Cost 3 vext2 <7,5,1,u>, <7,5,1,u>
+ 2724845884U, // <1,u,7,6>: Cost 3 vext3 <6,7,0,1>, <u,7,6,7>
+ 2659194476U, // <1,u,7,7>: Cost 3 vext2 <7,0,1,u>, <7,7,7,7>
+ 1590761096U, // <1,u,7,u>: Cost 2 vext2 <7,u,1,u>, <7,u,1,u>
+ 403972257U, // <1,u,u,0>: Cost 1 vext1 LHS, LHS
+ 202162278U, // <1,u,u,1>: Cost 1 vdup1 LHS
+ 115767091U, // <1,u,u,2>: Cost 1 vrev LHS
+ 1745707677U, // <1,u,u,3>: Cost 2 vuzpr LHS, LHS
+ 403975478U, // <1,u,u,4>: Cost 1 vext1 LHS, RHS
+ 1546303642U, // <1,u,u,5>: Cost 2 vext2 <0,4,1,u>, RHS
+ 1616009613U, // <1,u,u,6>: Cost 2 vext3 <0,u,1,1>, RHS
+ 1745710633U, // <1,u,u,7>: Cost 2 vuzpr LHS, RHS
+ 403978030U, // <1,u,u,u>: Cost 1 vext1 LHS, LHS
+ 2551463936U, // <2,0,0,0>: Cost 3 vext1 <0,2,0,0>, <0,0,0,0>
+ 2685698058U, // <2,0,0,1>: Cost 3 vext3 <0,2,0,2>, <0,0,1,1>
+ 1610776596U, // <2,0,0,2>: Cost 2 vext3 <0,0,2,2>, <0,0,2,2>
+ 2619384069U, // <2,0,0,3>: Cost 3 vext2 <0,3,2,0>, <0,3,2,0>
+ 2551467318U, // <2,0,0,4>: Cost 3 vext1 <0,2,0,0>, RHS
+ 3899836596U, // <2,0,0,5>: Cost 4 vuzpr <1,2,3,0>, <3,0,4,5>
+ 2621374968U, // <2,0,0,6>: Cost 3 vext2 <0,6,2,0>, <0,6,2,0>
+ 4168271334U, // <2,0,0,7>: Cost 4 vtrnr <1,2,3,0>, <2,0,5,7>
+ 1611219018U, // <2,0,0,u>: Cost 2 vext3 <0,0,u,2>, <0,0,u,2>
+ 2551472138U, // <2,0,1,0>: Cost 3 vext1 <0,2,0,1>, <0,0,1,1>
+ 2690564186U, // <2,0,1,1>: Cost 3 vext3 <1,0,3,2>, <0,1,1,0>
+ 1611956326U, // <2,0,1,2>: Cost 2 vext3 <0,2,0,2>, LHS
+ 2826092646U, // <2,0,1,3>: Cost 3 vuzpr <1,2,3,0>, LHS
+ 2551475510U, // <2,0,1,4>: Cost 3 vext1 <0,2,0,1>, RHS
+ 3692463248U, // <2,0,1,5>: Cost 4 vext2 <0,2,2,0>, <1,5,3,7>
+ 2587308473U, // <2,0,1,6>: Cost 3 vext1 <6,2,0,1>, <6,2,0,1>
+ 3661050874U, // <2,0,1,7>: Cost 4 vext1 <6,2,0,1>, <7,0,1,2>
+ 1611956380U, // <2,0,1,u>: Cost 2 vext3 <0,2,0,2>, LHS
+ 1477738598U, // <2,0,2,0>: Cost 2 vext1 <0,2,0,2>, LHS
+ 2551481078U, // <2,0,2,1>: Cost 3 vext1 <0,2,0,2>, <1,0,3,2>
+ 2551481796U, // <2,0,2,2>: Cost 3 vext1 <0,2,0,2>, <2,0,2,0>
+ 2551482518U, // <2,0,2,3>: Cost 3 vext1 <0,2,0,2>, <3,0,1,2>
+ 1477741878U, // <2,0,2,4>: Cost 2 vext1 <0,2,0,2>, RHS
+ 2551484112U, // <2,0,2,5>: Cost 3 vext1 <0,2,0,2>, <5,1,7,3>
+ 2551484759U, // <2,0,2,6>: Cost 3 vext1 <0,2,0,2>, <6,0,7,2>
+ 2551485434U, // <2,0,2,7>: Cost 3 vext1 <0,2,0,2>, <7,0,1,2>
+ 1477744430U, // <2,0,2,u>: Cost 2 vext1 <0,2,0,2>, LHS
+ 2953625600U, // <2,0,3,0>: Cost 3 vzipr LHS, <0,0,0,0>
+ 2953627302U, // <2,0,3,1>: Cost 3 vzipr LHS, <2,3,0,1>
+ 2953625764U, // <2,0,3,2>: Cost 3 vzipr LHS, <0,2,0,2>
+ 4027369695U, // <2,0,3,3>: Cost 4 vzipr LHS, <3,1,0,3>
+ 3625233718U, // <2,0,3,4>: Cost 4 vext1 <0,2,0,3>, RHS
+ 3899836110U, // <2,0,3,5>: Cost 4 vuzpr <1,2,3,0>, <2,3,4,5>
+ 4032012618U, // <2,0,3,6>: Cost 4 vzipr LHS, <0,4,0,6>
+ 3899835392U, // <2,0,3,7>: Cost 4 vuzpr <1,2,3,0>, <1,3,5,7>
+ 2953625770U, // <2,0,3,u>: Cost 3 vzipr LHS, <0,2,0,u>
+ 2551496806U, // <2,0,4,0>: Cost 3 vext1 <0,2,0,4>, LHS
+ 2685698386U, // <2,0,4,1>: Cost 3 vext3 <0,2,0,2>, <0,4,1,5>
+ 2685698396U, // <2,0,4,2>: Cost 3 vext3 <0,2,0,2>, <0,4,2,6>
+ 3625240726U, // <2,0,4,3>: Cost 4 vext1 <0,2,0,4>, <3,0,1,2>
+ 2551500086U, // <2,0,4,4>: Cost 3 vext1 <0,2,0,4>, RHS
+ 2618723638U, // <2,0,4,5>: Cost 3 vext2 <0,2,2,0>, RHS
+ 2765409590U, // <2,0,4,6>: Cost 3 vuzpl <2,3,0,1>, RHS
+ 3799990664U, // <2,0,4,7>: Cost 4 vext3 <7,0,1,2>, <0,4,7,5>
+ 2685698450U, // <2,0,4,u>: Cost 3 vext3 <0,2,0,2>, <0,4,u,6>
+ 3625246822U, // <2,0,5,0>: Cost 4 vext1 <0,2,0,5>, LHS
+ 3289776304U, // <2,0,5,1>: Cost 4 vrev <0,2,1,5>
+ 2690564526U, // <2,0,5,2>: Cost 3 vext3 <1,0,3,2>, <0,5,2,7>
+ 3289923778U, // <2,0,5,3>: Cost 4 vrev <0,2,3,5>
+ 2216255691U, // <2,0,5,4>: Cost 3 vrev <0,2,4,5>
+ 3726307332U, // <2,0,5,5>: Cost 4 vext2 <5,u,2,0>, <5,5,5,5>
+ 3726307426U, // <2,0,5,6>: Cost 4 vext2 <5,u,2,0>, <5,6,7,0>
+ 2826095926U, // <2,0,5,7>: Cost 3 vuzpr <1,2,3,0>, RHS
+ 2216550639U, // <2,0,5,u>: Cost 3 vrev <0,2,u,5>
+ 4162420736U, // <2,0,6,0>: Cost 4 vtrnr <0,2,4,6>, <0,0,0,0>
+ 2901885030U, // <2,0,6,1>: Cost 3 vzipl <2,6,3,7>, LHS
+ 2685698559U, // <2,0,6,2>: Cost 3 vext3 <0,2,0,2>, <0,6,2,7>
+ 3643173171U, // <2,0,6,3>: Cost 4 vext1 <3,2,0,6>, <3,2,0,6>
+ 2216263884U, // <2,0,6,4>: Cost 3 vrev <0,2,4,6>
+ 3730289341U, // <2,0,6,5>: Cost 4 vext2 <6,5,2,0>, <6,5,2,0>
+ 3726308152U, // <2,0,6,6>: Cost 4 vext2 <5,u,2,0>, <6,6,6,6>
+ 3899836346U, // <2,0,6,7>: Cost 4 vuzpr <1,2,3,0>, <2,6,3,7>
+ 2216558832U, // <2,0,6,u>: Cost 3 vrev <0,2,u,6>
+ 2659202049U, // <2,0,7,0>: Cost 3 vext2 <7,0,2,0>, <7,0,2,0>
+ 3726308437U, // <2,0,7,1>: Cost 4 vext2 <5,u,2,0>, <7,1,2,3>
+ 2726249034U, // <2,0,7,2>: Cost 3 vext3 <7,0,1,2>, <0,7,2,1>
+ 3734934772U, // <2,0,7,3>: Cost 4 vext2 <7,3,2,0>, <7,3,2,0>
+ 3726308710U, // <2,0,7,4>: Cost 4 vext2 <5,u,2,0>, <7,4,5,6>
+ 3726308814U, // <2,0,7,5>: Cost 4 vext2 <5,u,2,0>, <7,5,u,2>
+ 3736925671U, // <2,0,7,6>: Cost 4 vext2 <7,6,2,0>, <7,6,2,0>
+ 3726308972U, // <2,0,7,7>: Cost 4 vext2 <5,u,2,0>, <7,7,7,7>
+ 2659202049U, // <2,0,7,u>: Cost 3 vext2 <7,0,2,0>, <7,0,2,0>
+ 1477787750U, // <2,0,u,0>: Cost 2 vext1 <0,2,0,u>, LHS
+ 2953668262U, // <2,0,u,1>: Cost 3 vzipr LHS, <2,3,0,1>
+ 1611956893U, // <2,0,u,2>: Cost 2 vext3 <0,2,0,2>, LHS
+ 2551531670U, // <2,0,u,3>: Cost 3 vext1 <0,2,0,u>, <3,0,1,2>
+ 1477791030U, // <2,0,u,4>: Cost 2 vext1 <0,2,0,u>, RHS
+ 2618726554U, // <2,0,u,5>: Cost 3 vext2 <0,2,2,0>, RHS
+ 2765412506U, // <2,0,u,6>: Cost 3 vuzpl <2,3,0,1>, RHS
+ 2826096169U, // <2,0,u,7>: Cost 3 vuzpr <1,2,3,0>, RHS
+ 1611956947U, // <2,0,u,u>: Cost 2 vext3 <0,2,0,2>, LHS
+ 2569453670U, // <2,1,0,0>: Cost 3 vext1 <3,2,1,0>, LHS
+ 2619392102U, // <2,1,0,1>: Cost 3 vext2 <0,3,2,1>, LHS
+ 3759440619U, // <2,1,0,2>: Cost 4 vext3 <0,2,0,2>, <1,0,2,0>
+ 1616823030U, // <2,1,0,3>: Cost 2 vext3 <1,0,3,2>, <1,0,3,2>
+ 2569456950U, // <2,1,0,4>: Cost 3 vext1 <3,2,1,0>, RHS
+ 2690712328U, // <2,1,0,5>: Cost 3 vext3 <1,0,5,2>, <1,0,5,2>
+ 3661115841U, // <2,1,0,6>: Cost 4 vext1 <6,2,1,0>, <6,2,1,0>
+ 2622046794U, // <2,1,0,7>: Cost 3 vext2 <0,7,2,1>, <0,7,2,1>
+ 1617191715U, // <2,1,0,u>: Cost 2 vext3 <1,0,u,2>, <1,0,u,2>
+ 2551545958U, // <2,1,1,0>: Cost 3 vext1 <0,2,1,1>, LHS
+ 2685698868U, // <2,1,1,1>: Cost 3 vext3 <0,2,0,2>, <1,1,1,1>
+ 2628682646U, // <2,1,1,2>: Cost 3 vext2 <1,u,2,1>, <1,2,3,0>
+ 2685698888U, // <2,1,1,3>: Cost 3 vext3 <0,2,0,2>, <1,1,3,3>
+ 2551549238U, // <2,1,1,4>: Cost 3 vext1 <0,2,1,1>, RHS
+ 3693134992U, // <2,1,1,5>: Cost 4 vext2 <0,3,2,1>, <1,5,3,7>
+ 3661124034U, // <2,1,1,6>: Cost 4 vext1 <6,2,1,1>, <6,2,1,1>
+ 3625292794U, // <2,1,1,7>: Cost 4 vext1 <0,2,1,1>, <7,0,1,2>
+ 2685698933U, // <2,1,1,u>: Cost 3 vext3 <0,2,0,2>, <1,1,u,3>
+ 2551554150U, // <2,1,2,0>: Cost 3 vext1 <0,2,1,2>, LHS
+ 3893649571U, // <2,1,2,1>: Cost 4 vuzpr <0,2,0,1>, <0,2,0,1>
+ 2551555688U, // <2,1,2,2>: Cost 3 vext1 <0,2,1,2>, <2,2,2,2>
+ 2685698966U, // <2,1,2,3>: Cost 3 vext3 <0,2,0,2>, <1,2,3,0>
+ 2551557430U, // <2,1,2,4>: Cost 3 vext1 <0,2,1,2>, RHS
+ 3763422123U, // <2,1,2,5>: Cost 4 vext3 <0,u,0,2>, <1,2,5,3>
+ 3693135802U, // <2,1,2,6>: Cost 4 vext2 <0,3,2,1>, <2,6,3,7>
+ 2726249402U, // <2,1,2,7>: Cost 3 vext3 <7,0,1,2>, <1,2,7,0>
+ 2685699011U, // <2,1,2,u>: Cost 3 vext3 <0,2,0,2>, <1,2,u,0>
+ 2551562342U, // <2,1,3,0>: Cost 3 vext1 <0,2,1,3>, LHS
+ 2953625610U, // <2,1,3,1>: Cost 3 vzipr LHS, <0,0,1,1>
+ 2953627798U, // <2,1,3,2>: Cost 3 vzipr LHS, <3,0,1,2>
+ 2953626584U, // <2,1,3,3>: Cost 3 vzipr LHS, <1,3,1,3>
+ 2551565622U, // <2,1,3,4>: Cost 3 vext1 <0,2,1,3>, RHS
+ 2953625938U, // <2,1,3,5>: Cost 3 vzipr LHS, <0,4,1,5>
+ 2587398596U, // <2,1,3,6>: Cost 3 vext1 <6,2,1,3>, <6,2,1,3>
+ 4032013519U, // <2,1,3,7>: Cost 4 vzipr LHS, <1,6,1,7>
+ 2953625617U, // <2,1,3,u>: Cost 3 vzipr LHS, <0,0,1,u>
+ 2690565154U, // <2,1,4,0>: Cost 3 vext3 <1,0,3,2>, <1,4,0,5>
+ 3625313270U, // <2,1,4,1>: Cost 4 vext1 <0,2,1,4>, <1,3,4,6>
+ 3771532340U, // <2,1,4,2>: Cost 4 vext3 <2,2,2,2>, <1,4,2,5>
+ 1148404634U, // <2,1,4,3>: Cost 2 vrev <1,2,3,4>
+ 3625315638U, // <2,1,4,4>: Cost 4 vext1 <0,2,1,4>, RHS
+ 2619395382U, // <2,1,4,5>: Cost 3 vext2 <0,3,2,1>, RHS
+ 3837242678U, // <2,1,4,6>: Cost 4 vuzpl <2,0,1,2>, RHS
+ 3799991394U, // <2,1,4,7>: Cost 4 vext3 <7,0,1,2>, <1,4,7,6>
+ 1148773319U, // <2,1,4,u>: Cost 2 vrev <1,2,u,4>
+ 2551578726U, // <2,1,5,0>: Cost 3 vext1 <0,2,1,5>, LHS
+ 2551579648U, // <2,1,5,1>: Cost 3 vext1 <0,2,1,5>, <1,3,5,7>
+ 3625321952U, // <2,1,5,2>: Cost 4 vext1 <0,2,1,5>, <2,0,5,1>
+ 2685699216U, // <2,1,5,3>: Cost 3 vext3 <0,2,0,2>, <1,5,3,7>
+ 2551582006U, // <2,1,5,4>: Cost 3 vext1 <0,2,1,5>, RHS
+ 3740913668U, // <2,1,5,5>: Cost 4 vext2 <u,3,2,1>, <5,5,5,5>
+ 3661156806U, // <2,1,5,6>: Cost 4 vext1 <6,2,1,5>, <6,2,1,5>
+ 3893652790U, // <2,1,5,7>: Cost 4 vuzpr <0,2,0,1>, RHS
+ 2685699261U, // <2,1,5,u>: Cost 3 vext3 <0,2,0,2>, <1,5,u,7>
+ 2551586918U, // <2,1,6,0>: Cost 3 vext1 <0,2,1,6>, LHS
+ 3625329398U, // <2,1,6,1>: Cost 4 vext1 <0,2,1,6>, <1,0,3,2>
+ 2551588794U, // <2,1,6,2>: Cost 3 vext1 <0,2,1,6>, <2,6,3,7>
+ 3088679014U, // <2,1,6,3>: Cost 3 vtrnr <0,2,4,6>, LHS
+ 2551590198U, // <2,1,6,4>: Cost 3 vext1 <0,2,1,6>, RHS
+ 4029382994U, // <2,1,6,5>: Cost 4 vzipr <0,4,2,6>, <0,4,1,5>
+ 3625333560U, // <2,1,6,6>: Cost 4 vext1 <0,2,1,6>, <6,6,6,6>
+ 3731624800U, // <2,1,6,7>: Cost 4 vext2 <6,7,2,1>, <6,7,2,1>
+ 2551592750U, // <2,1,6,u>: Cost 3 vext1 <0,2,1,6>, LHS
+ 2622051322U, // <2,1,7,0>: Cost 3 vext2 <0,7,2,1>, <7,0,1,2>
+ 3733615699U, // <2,1,7,1>: Cost 4 vext2 <7,1,2,1>, <7,1,2,1>
+ 3795125538U, // <2,1,7,2>: Cost 4 vext3 <6,1,7,2>, <1,7,2,0>
+ 2222171037U, // <2,1,7,3>: Cost 3 vrev <1,2,3,7>
+ 3740915046U, // <2,1,7,4>: Cost 4 vext2 <u,3,2,1>, <7,4,5,6>
+ 3296060335U, // <2,1,7,5>: Cost 4 vrev <1,2,5,7>
+ 3736933864U, // <2,1,7,6>: Cost 4 vext2 <7,6,2,1>, <7,6,2,1>
+ 3805300055U, // <2,1,7,7>: Cost 4 vext3 <7,u,1,2>, <1,7,7,u>
+ 2669827714U, // <2,1,7,u>: Cost 3 vext2 <u,7,2,1>, <7,u,1,2>
+ 2551603302U, // <2,1,u,0>: Cost 3 vext1 <0,2,1,u>, LHS
+ 2953666570U, // <2,1,u,1>: Cost 3 vzipr LHS, <0,0,1,1>
+ 2953668758U, // <2,1,u,2>: Cost 3 vzipr LHS, <3,0,1,2>
+ 1148437406U, // <2,1,u,3>: Cost 2 vrev <1,2,3,u>
+ 2551606582U, // <2,1,u,4>: Cost 3 vext1 <0,2,1,u>, RHS
+ 2953666898U, // <2,1,u,5>: Cost 3 vzipr LHS, <0,4,1,5>
+ 2587398596U, // <2,1,u,6>: Cost 3 vext1 <6,2,1,3>, <6,2,1,3>
+ 2669828370U, // <2,1,u,7>: Cost 3 vext2 <u,7,2,1>, <u,7,2,1>
+ 1148806091U, // <2,1,u,u>: Cost 2 vrev <1,2,u,u>
+ 1543667732U, // <2,2,0,0>: Cost 2 vext2 <0,0,2,2>, <0,0,2,2>
+ 1548976230U, // <2,2,0,1>: Cost 2 vext2 <0,u,2,2>, LHS
+ 2685699524U, // <2,2,0,2>: Cost 3 vext3 <0,2,0,2>, <2,0,2,0>
+ 2685699535U, // <2,2,0,3>: Cost 3 vext3 <0,2,0,2>, <2,0,3,2>
+ 2551614774U, // <2,2,0,4>: Cost 3 vext1 <0,2,2,0>, RHS
+ 3704422830U, // <2,2,0,5>: Cost 4 vext2 <2,2,2,2>, <0,5,2,7>
+ 3893657642U, // <2,2,0,6>: Cost 4 vuzpr <0,2,0,2>, <0,0,4,6>
+ 3770574323U, // <2,2,0,7>: Cost 4 vext3 <2,0,7,2>, <2,0,7,2>
+ 1548976796U, // <2,2,0,u>: Cost 2 vext2 <0,u,2,2>, <0,u,2,2>
+ 2622718710U, // <2,2,1,0>: Cost 3 vext2 <0,u,2,2>, <1,0,3,2>
+ 2622718772U, // <2,2,1,1>: Cost 3 vext2 <0,u,2,2>, <1,1,1,1>
+ 2622718870U, // <2,2,1,2>: Cost 3 vext2 <0,u,2,2>, <1,2,3,0>
+ 2819915878U, // <2,2,1,3>: Cost 3 vuzpr <0,2,0,2>, LHS
+ 3625364790U, // <2,2,1,4>: Cost 4 vext1 <0,2,2,1>, RHS
+ 2622719120U, // <2,2,1,5>: Cost 3 vext2 <0,u,2,2>, <1,5,3,7>
+ 3760031292U, // <2,2,1,6>: Cost 4 vext3 <0,2,u,2>, <2,1,6,3>
+ 3667170468U, // <2,2,1,7>: Cost 4 vext1 <7,2,2,1>, <7,2,2,1>
+ 2819915883U, // <2,2,1,u>: Cost 3 vuzpr <0,2,0,2>, LHS
+ 1489829990U, // <2,2,2,0>: Cost 2 vext1 <2,2,2,2>, LHS
+ 2563572470U, // <2,2,2,1>: Cost 3 vext1 <2,2,2,2>, <1,0,3,2>
+ 269271142U, // <2,2,2,2>: Cost 1 vdup2 LHS
+ 2685699698U, // <2,2,2,3>: Cost 3 vext3 <0,2,0,2>, <2,2,3,3>
+ 1489833270U, // <2,2,2,4>: Cost 2 vext1 <2,2,2,2>, RHS
+ 2685699720U, // <2,2,2,5>: Cost 3 vext3 <0,2,0,2>, <2,2,5,7>
+ 2622719930U, // <2,2,2,6>: Cost 3 vext2 <0,u,2,2>, <2,6,3,7>
+ 2593436837U, // <2,2,2,7>: Cost 3 vext1 <7,2,2,2>, <7,2,2,2>
+ 269271142U, // <2,2,2,u>: Cost 1 vdup2 LHS
+ 2685699750U, // <2,2,3,0>: Cost 3 vext3 <0,2,0,2>, <2,3,0,1>
+ 2690565806U, // <2,2,3,1>: Cost 3 vext3 <1,0,3,2>, <2,3,1,0>
+ 2953627240U, // <2,2,3,2>: Cost 3 vzipr LHS, <2,2,2,2>
+ 1879883878U, // <2,2,3,3>: Cost 2 vzipr LHS, LHS
+ 2685699790U, // <2,2,3,4>: Cost 3 vext3 <0,2,0,2>, <2,3,4,5>
+ 3893659342U, // <2,2,3,5>: Cost 4 vuzpr <0,2,0,2>, <2,3,4,5>
+ 2958270812U, // <2,2,3,6>: Cost 3 vzipr LHS, <0,4,2,6>
+ 2593445030U, // <2,2,3,7>: Cost 3 vext1 <7,2,2,3>, <7,2,2,3>
+ 1879883883U, // <2,2,3,u>: Cost 2 vzipr LHS, LHS
+ 2551644262U, // <2,2,4,0>: Cost 3 vext1 <0,2,2,4>, LHS
+ 3625386742U, // <2,2,4,1>: Cost 4 vext1 <0,2,2,4>, <1,0,3,2>
+ 2551645902U, // <2,2,4,2>: Cost 3 vext1 <0,2,2,4>, <2,3,4,5>
+ 3759441686U, // <2,2,4,3>: Cost 4 vext3 <0,2,0,2>, <2,4,3,5>
+ 2551647542U, // <2,2,4,4>: Cost 3 vext1 <0,2,2,4>, RHS
+ 1548979510U, // <2,2,4,5>: Cost 2 vext2 <0,u,2,2>, RHS
+ 2764901686U, // <2,2,4,6>: Cost 3 vuzpl <2,2,2,2>, RHS
+ 3667195047U, // <2,2,4,7>: Cost 4 vext1 <7,2,2,4>, <7,2,2,4>
+ 1548979753U, // <2,2,4,u>: Cost 2 vext2 <0,u,2,2>, RHS
+ 3696463432U, // <2,2,5,0>: Cost 4 vext2 <0,u,2,2>, <5,0,1,2>
+ 2617413328U, // <2,2,5,1>: Cost 3 vext2 <0,0,2,2>, <5,1,7,3>
+ 2685699936U, // <2,2,5,2>: Cost 3 vext3 <0,2,0,2>, <2,5,2,7>
+ 4027383910U, // <2,2,5,3>: Cost 4 vzipr <0,1,2,5>, LHS
+ 2228201085U, // <2,2,5,4>: Cost 3 vrev <2,2,4,5>
+ 2617413636U, // <2,2,5,5>: Cost 3 vext2 <0,0,2,2>, <5,5,5,5>
+ 2617413730U, // <2,2,5,6>: Cost 3 vext2 <0,0,2,2>, <5,6,7,0>
+ 2819919158U, // <2,2,5,7>: Cost 3 vuzpr <0,2,0,2>, RHS
+ 2819919159U, // <2,2,5,u>: Cost 3 vuzpr <0,2,0,2>, RHS
+ 3625402554U, // <2,2,6,0>: Cost 4 vext1 <0,2,2,6>, <0,2,2,6>
+ 3760031652U, // <2,2,6,1>: Cost 4 vext3 <0,2,u,2>, <2,6,1,3>
+ 2617414138U, // <2,2,6,2>: Cost 3 vext2 <0,0,2,2>, <6,2,7,3>
+ 2685700026U, // <2,2,6,3>: Cost 3 vext3 <0,2,0,2>, <2,6,3,7>
+ 3625405750U, // <2,2,6,4>: Cost 4 vext1 <0,2,2,6>, RHS
+ 3760031692U, // <2,2,6,5>: Cost 4 vext3 <0,2,u,2>, <2,6,5,7>
+ 3088679116U, // <2,2,6,6>: Cost 3 vtrnr <0,2,4,6>, <0,2,4,6>
+ 2657891169U, // <2,2,6,7>: Cost 3 vext2 <6,7,2,2>, <6,7,2,2>
+ 2685700071U, // <2,2,6,u>: Cost 3 vext3 <0,2,0,2>, <2,6,u,7>
+ 2726250474U, // <2,2,7,0>: Cost 3 vext3 <7,0,1,2>, <2,7,0,1>
+ 3704427616U, // <2,2,7,1>: Cost 4 vext2 <2,2,2,2>, <7,1,3,5>
+ 2660545701U, // <2,2,7,2>: Cost 3 vext2 <7,2,2,2>, <7,2,2,2>
+ 4030718054U, // <2,2,7,3>: Cost 4 vzipr <0,6,2,7>, LHS
+ 2617415014U, // <2,2,7,4>: Cost 3 vext2 <0,0,2,2>, <7,4,5,6>
+ 3302033032U, // <2,2,7,5>: Cost 4 vrev <2,2,5,7>
+ 3661246929U, // <2,2,7,6>: Cost 4 vext1 <6,2,2,7>, <6,2,2,7>
+ 2617415276U, // <2,2,7,7>: Cost 3 vext2 <0,0,2,2>, <7,7,7,7>
+ 2731558962U, // <2,2,7,u>: Cost 3 vext3 <7,u,1,2>, <2,7,u,1>
+ 1489829990U, // <2,2,u,0>: Cost 2 vext1 <2,2,2,2>, LHS
+ 1548982062U, // <2,2,u,1>: Cost 2 vext2 <0,u,2,2>, LHS
+ 269271142U, // <2,2,u,2>: Cost 1 vdup2 LHS
+ 1879924838U, // <2,2,u,3>: Cost 2 vzipr LHS, LHS
+ 1489833270U, // <2,2,u,4>: Cost 2 vext1 <2,2,2,2>, RHS
+ 1548982426U, // <2,2,u,5>: Cost 2 vext2 <0,u,2,2>, RHS
+ 2953666908U, // <2,2,u,6>: Cost 3 vzipr LHS, <0,4,2,6>
+ 2819919401U, // <2,2,u,7>: Cost 3 vuzpr <0,2,0,2>, RHS
+ 269271142U, // <2,2,u,u>: Cost 1 vdup2 LHS
+ 1544339456U, // <2,3,0,0>: Cost 2 vext2 LHS, <0,0,0,0>
+ 470597734U, // <2,3,0,1>: Cost 1 vext2 LHS, LHS
+ 1548984484U, // <2,3,0,2>: Cost 2 vext2 LHS, <0,2,0,2>
+ 2619408648U, // <2,3,0,3>: Cost 3 vext2 <0,3,2,3>, <0,3,2,3>
+ 1548984658U, // <2,3,0,4>: Cost 2 vext2 LHS, <0,4,1,5>
+ 2665857454U, // <2,3,0,5>: Cost 3 vext2 LHS, <0,5,2,7>
+ 2622726655U, // <2,3,0,6>: Cost 3 vext2 LHS, <0,6,2,7>
+ 2593494188U, // <2,3,0,7>: Cost 3 vext1 <7,2,3,0>, <7,2,3,0>
+ 470598301U, // <2,3,0,u>: Cost 1 vext2 LHS, LHS
+ 1544340214U, // <2,3,1,0>: Cost 2 vext2 LHS, <1,0,3,2>
+ 1544340276U, // <2,3,1,1>: Cost 2 vext2 LHS, <1,1,1,1>
+ 1544340374U, // <2,3,1,2>: Cost 2 vext2 LHS, <1,2,3,0>
+ 1548985304U, // <2,3,1,3>: Cost 2 vext2 LHS, <1,3,1,3>
+ 2551696694U, // <2,3,1,4>: Cost 3 vext1 <0,2,3,1>, RHS
+ 1548985488U, // <2,3,1,5>: Cost 2 vext2 LHS, <1,5,3,7>
+ 2622727375U, // <2,3,1,6>: Cost 3 vext2 LHS, <1,6,1,7>
+ 2665858347U, // <2,3,1,7>: Cost 3 vext2 LHS, <1,7,3,0>
+ 1548985709U, // <2,3,1,u>: Cost 2 vext2 LHS, <1,u,1,3>
+ 2622727613U, // <2,3,2,0>: Cost 3 vext2 LHS, <2,0,1,2>
+ 2622727711U, // <2,3,2,1>: Cost 3 vext2 LHS, <2,1,3,1>
+ 1544341096U, // <2,3,2,2>: Cost 2 vext2 LHS, <2,2,2,2>
+ 1544341158U, // <2,3,2,3>: Cost 2 vext2 LHS, <2,3,0,1>
+ 2622727958U, // <2,3,2,4>: Cost 3 vext2 LHS, <2,4,3,5>
+ 2622728032U, // <2,3,2,5>: Cost 3 vext2 LHS, <2,5,2,7>
+ 1548986298U, // <2,3,2,6>: Cost 2 vext2 LHS, <2,6,3,7>
+ 2665859050U, // <2,3,2,7>: Cost 3 vext2 LHS, <2,7,0,1>
+ 1548986427U, // <2,3,2,u>: Cost 2 vext2 LHS, <2,u,0,1>
+ 1548986518U, // <2,3,3,0>: Cost 2 vext2 LHS, <3,0,1,2>
+ 2622728415U, // <2,3,3,1>: Cost 3 vext2 LHS, <3,1,0,3>
+ 1489913458U, // <2,3,3,2>: Cost 2 vext1 <2,2,3,3>, <2,2,3,3>
+ 1544341916U, // <2,3,3,3>: Cost 2 vext2 LHS, <3,3,3,3>
+ 1548986882U, // <2,3,3,4>: Cost 2 vext2 LHS, <3,4,5,6>
+ 2665859632U, // <2,3,3,5>: Cost 3 vext2 LHS, <3,5,1,7>
+ 2234304870U, // <2,3,3,6>: Cost 3 vrev <3,2,6,3>
+ 2958271632U, // <2,3,3,7>: Cost 3 vzipr LHS, <1,5,3,7>
+ 1548987166U, // <2,3,3,u>: Cost 2 vext2 LHS, <3,u,1,2>
+ 1483948134U, // <2,3,4,0>: Cost 2 vext1 <1,2,3,4>, LHS
+ 1483948954U, // <2,3,4,1>: Cost 2 vext1 <1,2,3,4>, <1,2,3,4>
+ 2622729276U, // <2,3,4,2>: Cost 3 vext2 LHS, <4,2,6,0>
+ 2557692054U, // <2,3,4,3>: Cost 3 vext1 <1,2,3,4>, <3,0,1,2>
+ 1483951414U, // <2,3,4,4>: Cost 2 vext1 <1,2,3,4>, RHS
+ 470601014U, // <2,3,4,5>: Cost 1 vext2 LHS, RHS
+ 1592118644U, // <2,3,4,6>: Cost 2 vext2 LHS, <4,6,4,6>
+ 2593526960U, // <2,3,4,7>: Cost 3 vext1 <7,2,3,4>, <7,2,3,4>
+ 470601257U, // <2,3,4,u>: Cost 1 vext2 LHS, RHS
+ 2551726182U, // <2,3,5,0>: Cost 3 vext1 <0,2,3,5>, LHS
+ 1592118992U, // <2,3,5,1>: Cost 2 vext2 LHS, <5,1,7,3>
+ 2665860862U, // <2,3,5,2>: Cost 3 vext2 LHS, <5,2,3,4>
+ 2551728642U, // <2,3,5,3>: Cost 3 vext1 <0,2,3,5>, <3,4,5,6>
+ 1592119238U, // <2,3,5,4>: Cost 2 vext2 LHS, <5,4,7,6>
+ 1592119300U, // <2,3,5,5>: Cost 2 vext2 LHS, <5,5,5,5>
+ 1592119394U, // <2,3,5,6>: Cost 2 vext2 LHS, <5,6,7,0>
+ 1592119464U, // <2,3,5,7>: Cost 2 vext2 LHS, <5,7,5,7>
+ 1592119545U, // <2,3,5,u>: Cost 2 vext2 LHS, <5,u,5,7>
+ 2622730529U, // <2,3,6,0>: Cost 3 vext2 LHS, <6,0,1,2>
+ 2557707164U, // <2,3,6,1>: Cost 3 vext1 <1,2,3,6>, <1,2,3,6>
+ 1592119802U, // <2,3,6,2>: Cost 2 vext2 LHS, <6,2,7,3>
+ 2665861682U, // <2,3,6,3>: Cost 3 vext2 LHS, <6,3,4,5>
+ 2622730893U, // <2,3,6,4>: Cost 3 vext2 LHS, <6,4,5,6>
+ 2665861810U, // <2,3,6,5>: Cost 3 vext2 LHS, <6,5,0,7>
+ 1592120120U, // <2,3,6,6>: Cost 2 vext2 LHS, <6,6,6,6>
+ 1592120142U, // <2,3,6,7>: Cost 2 vext2 LHS, <6,7,0,1>
+ 1592120223U, // <2,3,6,u>: Cost 2 vext2 LHS, <6,u,0,1>
+ 1592120314U, // <2,3,7,0>: Cost 2 vext2 LHS, <7,0,1,2>
+ 2659890261U, // <2,3,7,1>: Cost 3 vext2 <7,1,2,3>, <7,1,2,3>
+ 2660553894U, // <2,3,7,2>: Cost 3 vext2 <7,2,2,3>, <7,2,2,3>
+ 2665862371U, // <2,3,7,3>: Cost 3 vext2 LHS, <7,3,0,1>
+ 1592120678U, // <2,3,7,4>: Cost 2 vext2 LHS, <7,4,5,6>
+ 2665862534U, // <2,3,7,5>: Cost 3 vext2 LHS, <7,5,0,2>
+ 2665862614U, // <2,3,7,6>: Cost 3 vext2 LHS, <7,6,0,1>
+ 1592120940U, // <2,3,7,7>: Cost 2 vext2 LHS, <7,7,7,7>
+ 1592120962U, // <2,3,7,u>: Cost 2 vext2 LHS, <7,u,1,2>
+ 1548990163U, // <2,3,u,0>: Cost 2 vext2 LHS, <u,0,1,2>
+ 470603566U, // <2,3,u,1>: Cost 1 vext2 LHS, LHS
+ 1548990341U, // <2,3,u,2>: Cost 2 vext2 LHS, <u,2,3,0>
+ 1548990396U, // <2,3,u,3>: Cost 2 vext2 LHS, <u,3,0,1>
+ 1548990527U, // <2,3,u,4>: Cost 2 vext2 LHS, <u,4,5,6>
+ 470603930U, // <2,3,u,5>: Cost 1 vext2 LHS, RHS
+ 1548990672U, // <2,3,u,6>: Cost 2 vext2 LHS, <u,6,3,7>
+ 1592121600U, // <2,3,u,7>: Cost 2 vext2 LHS, <u,7,0,1>
+ 470604133U, // <2,3,u,u>: Cost 1 vext2 LHS, LHS
+ 2617425942U, // <2,4,0,0>: Cost 3 vext2 <0,0,2,4>, <0,0,2,4>
+ 2618753126U, // <2,4,0,1>: Cost 3 vext2 <0,2,2,4>, LHS
+ 2618753208U, // <2,4,0,2>: Cost 3 vext2 <0,2,2,4>, <0,2,2,4>
+ 2619416841U, // <2,4,0,3>: Cost 3 vext2 <0,3,2,4>, <0,3,2,4>
+ 2587593628U, // <2,4,0,4>: Cost 3 vext1 <6,2,4,0>, <4,0,6,2>
+ 2712832914U, // <2,4,0,5>: Cost 3 vext3 <4,6,u,2>, <4,0,5,1>
+ 1634962332U, // <2,4,0,6>: Cost 2 vext3 <4,0,6,2>, <4,0,6,2>
+ 3799993252U, // <2,4,0,7>: Cost 4 vext3 <7,0,1,2>, <4,0,7,1>
+ 1634962332U, // <2,4,0,u>: Cost 2 vext3 <4,0,6,2>, <4,0,6,2>
+ 2619417334U, // <2,4,1,0>: Cost 3 vext2 <0,3,2,4>, <1,0,3,2>
+ 3692495668U, // <2,4,1,1>: Cost 4 vext2 <0,2,2,4>, <1,1,1,1>
+ 2625389466U, // <2,4,1,2>: Cost 3 vext2 <1,3,2,4>, <1,2,3,4>
+ 2826125414U, // <2,4,1,3>: Cost 3 vuzpr <1,2,3,4>, LHS
+ 3699794995U, // <2,4,1,4>: Cost 4 vext2 <1,4,2,4>, <1,4,2,4>
+ 3692496016U, // <2,4,1,5>: Cost 4 vext2 <0,2,2,4>, <1,5,3,7>
+ 3763424238U, // <2,4,1,6>: Cost 4 vext3 <0,u,0,2>, <4,1,6,3>
+ 3667317942U, // <2,4,1,7>: Cost 4 vext1 <7,2,4,1>, <7,2,4,1>
+ 2826125419U, // <2,4,1,u>: Cost 3 vuzpr <1,2,3,4>, LHS
+ 2629371336U, // <2,4,2,0>: Cost 3 vext2 <2,0,2,4>, <2,0,2,4>
+ 3699131946U, // <2,4,2,1>: Cost 4 vext2 <1,3,2,4>, <2,1,4,3>
+ 2630698602U, // <2,4,2,2>: Cost 3 vext2 <2,2,2,4>, <2,2,2,4>
+ 2618754766U, // <2,4,2,3>: Cost 3 vext2 <0,2,2,4>, <2,3,4,5>
+ 2826126234U, // <2,4,2,4>: Cost 3 vuzpr <1,2,3,4>, <1,2,3,4>
+ 2899119414U, // <2,4,2,5>: Cost 3 vzipl <2,2,2,2>, RHS
+ 3033337142U, // <2,4,2,6>: Cost 3 vtrnl <2,2,2,2>, RHS
+ 3800214597U, // <2,4,2,7>: Cost 4 vext3 <7,0,4,2>, <4,2,7,0>
+ 2899119657U, // <2,4,2,u>: Cost 3 vzipl <2,2,2,2>, RHS
+ 2635344033U, // <2,4,3,0>: Cost 3 vext2 <3,0,2,4>, <3,0,2,4>
+ 4032012325U, // <2,4,3,1>: Cost 4 vzipr LHS, <0,0,4,1>
+ 3692497228U, // <2,4,3,2>: Cost 4 vext2 <0,2,2,4>, <3,2,3,4>
+ 3692497308U, // <2,4,3,3>: Cost 4 vext2 <0,2,2,4>, <3,3,3,3>
+ 3001404624U, // <2,4,3,4>: Cost 3 vzipr LHS, <4,4,4,4>
+ 2953627342U, // <2,4,3,5>: Cost 3 vzipr LHS, <2,3,4,5>
+ 2953625804U, // <2,4,3,6>: Cost 3 vzipr LHS, <0,2,4,6>
+ 3899868160U, // <2,4,3,7>: Cost 4 vuzpr <1,2,3,4>, <1,3,5,7>
+ 2953625806U, // <2,4,3,u>: Cost 3 vzipr LHS, <0,2,4,u>
+ 2710916266U, // <2,4,4,0>: Cost 3 vext3 <4,4,0,2>, <4,4,0,2>
+ 3899869648U, // <2,4,4,1>: Cost 4 vuzpr <1,2,3,4>, <3,4,0,1>
+ 3899869658U, // <2,4,4,2>: Cost 4 vuzpr <1,2,3,4>, <3,4,1,2>
+ 3899868930U, // <2,4,4,3>: Cost 4 vuzpr <1,2,3,4>, <2,4,1,3>
+ 2712833232U, // <2,4,4,4>: Cost 3 vext3 <4,6,u,2>, <4,4,4,4>
+ 2618756406U, // <2,4,4,5>: Cost 3 vext2 <0,2,2,4>, RHS
+ 2765737270U, // <2,4,4,6>: Cost 3 vuzpl <2,3,4,5>, RHS
+ 4168304426U, // <2,4,4,7>: Cost 4 vtrnr <1,2,3,4>, <2,4,5,7>
+ 2618756649U, // <2,4,4,u>: Cost 3 vext2 <0,2,2,4>, RHS
+ 2551800011U, // <2,4,5,0>: Cost 3 vext1 <0,2,4,5>, <0,2,4,5>
+ 2569716470U, // <2,4,5,1>: Cost 3 vext1 <3,2,4,5>, <1,0,3,2>
+ 2563745405U, // <2,4,5,2>: Cost 3 vext1 <2,2,4,5>, <2,2,4,5>
+ 2569718102U, // <2,4,5,3>: Cost 3 vext1 <3,2,4,5>, <3,2,4,5>
+ 2551803190U, // <2,4,5,4>: Cost 3 vext1 <0,2,4,5>, RHS
+ 3625545732U, // <2,4,5,5>: Cost 4 vext1 <0,2,4,5>, <5,5,5,5>
+ 1611959606U, // <2,4,5,6>: Cost 2 vext3 <0,2,0,2>, RHS
+ 2826128694U, // <2,4,5,7>: Cost 3 vuzpr <1,2,3,4>, RHS
+ 1611959624U, // <2,4,5,u>: Cost 2 vext3 <0,2,0,2>, RHS
+ 1478066278U, // <2,4,6,0>: Cost 2 vext1 <0,2,4,6>, LHS
+ 2551808758U, // <2,4,6,1>: Cost 3 vext1 <0,2,4,6>, <1,0,3,2>
+ 2551809516U, // <2,4,6,2>: Cost 3 vext1 <0,2,4,6>, <2,0,6,4>
+ 2551810198U, // <2,4,6,3>: Cost 3 vext1 <0,2,4,6>, <3,0,1,2>
+ 1478069558U, // <2,4,6,4>: Cost 2 vext1 <0,2,4,6>, RHS
+ 2901888310U, // <2,4,6,5>: Cost 3 vzipl <2,6,3,7>, RHS
+ 2551812920U, // <2,4,6,6>: Cost 3 vext1 <0,2,4,6>, <6,6,6,6>
+ 2726251914U, // <2,4,6,7>: Cost 3 vext3 <7,0,1,2>, <4,6,7,1>
+ 1478072110U, // <2,4,6,u>: Cost 2 vext1 <0,2,4,6>, LHS
+ 2659234821U, // <2,4,7,0>: Cost 3 vext2 <7,0,2,4>, <7,0,2,4>
+ 3786722726U, // <2,4,7,1>: Cost 4 vext3 <4,7,1,2>, <4,7,1,2>
+ 3734303911U, // <2,4,7,2>: Cost 4 vext2 <7,2,2,4>, <7,2,2,4>
+ 3734967544U, // <2,4,7,3>: Cost 4 vext2 <7,3,2,4>, <7,3,2,4>
+ 3727005030U, // <2,4,7,4>: Cost 4 vext2 <6,0,2,4>, <7,4,5,6>
+ 2726251976U, // <2,4,7,5>: Cost 3 vext3 <7,0,1,2>, <4,7,5,0>
+ 2726251986U, // <2,4,7,6>: Cost 3 vext3 <7,0,1,2>, <4,7,6,1>
+ 3727005292U, // <2,4,7,7>: Cost 4 vext2 <6,0,2,4>, <7,7,7,7>
+ 2659234821U, // <2,4,7,u>: Cost 3 vext2 <7,0,2,4>, <7,0,2,4>
+ 1478082662U, // <2,4,u,0>: Cost 2 vext1 <0,2,4,u>, LHS
+ 2618758958U, // <2,4,u,1>: Cost 3 vext2 <0,2,2,4>, LHS
+ 2551826024U, // <2,4,u,2>: Cost 3 vext1 <0,2,4,u>, <2,2,2,2>
+ 2551826582U, // <2,4,u,3>: Cost 3 vext1 <0,2,4,u>, <3,0,1,2>
+ 1478085942U, // <2,4,u,4>: Cost 2 vext1 <0,2,4,u>, RHS
+ 2953668302U, // <2,4,u,5>: Cost 3 vzipr LHS, <2,3,4,5>
+ 1611959849U, // <2,4,u,6>: Cost 2 vext3 <0,2,0,2>, RHS
+ 2826128937U, // <2,4,u,7>: Cost 3 vuzpr <1,2,3,4>, RHS
+ 1611959867U, // <2,4,u,u>: Cost 2 vext3 <0,2,0,2>, RHS
+ 3691839488U, // <2,5,0,0>: Cost 4 vext2 <0,1,2,5>, <0,0,0,0>
+ 2618097766U, // <2,5,0,1>: Cost 3 vext2 <0,1,2,5>, LHS
+ 2620088484U, // <2,5,0,2>: Cost 3 vext2 <0,4,2,5>, <0,2,0,2>
+ 2619425034U, // <2,5,0,3>: Cost 3 vext2 <0,3,2,5>, <0,3,2,5>
+ 2620088667U, // <2,5,0,4>: Cost 3 vext2 <0,4,2,5>, <0,4,2,5>
+ 2620752300U, // <2,5,0,5>: Cost 3 vext2 <0,5,2,5>, <0,5,2,5>
+ 3693830655U, // <2,5,0,6>: Cost 4 vext2 <0,4,2,5>, <0,6,2,7>
+ 3094531382U, // <2,5,0,7>: Cost 3 vtrnr <1,2,3,0>, RHS
+ 2618098333U, // <2,5,0,u>: Cost 3 vext2 <0,1,2,5>, LHS
+ 3691840246U, // <2,5,1,0>: Cost 4 vext2 <0,1,2,5>, <1,0,3,2>
+ 3691840308U, // <2,5,1,1>: Cost 4 vext2 <0,1,2,5>, <1,1,1,1>
+ 2626061206U, // <2,5,1,2>: Cost 3 vext2 <1,4,2,5>, <1,2,3,0>
+ 2618098688U, // <2,5,1,3>: Cost 3 vext2 <0,1,2,5>, <1,3,5,7>
+ 2626061364U, // <2,5,1,4>: Cost 3 vext2 <1,4,2,5>, <1,4,2,5>
+ 3691840656U, // <2,5,1,5>: Cost 4 vext2 <0,1,2,5>, <1,5,3,7>
+ 3789082310U, // <2,5,1,6>: Cost 4 vext3 <5,1,6,2>, <5,1,6,2>
+ 2712833744U, // <2,5,1,7>: Cost 3 vext3 <4,6,u,2>, <5,1,7,3>
+ 2628715896U, // <2,5,1,u>: Cost 3 vext2 <1,u,2,5>, <1,u,2,5>
+ 3693831613U, // <2,5,2,0>: Cost 4 vext2 <0,4,2,5>, <2,0,1,2>
+ 4026698642U, // <2,5,2,1>: Cost 4 vzipr <0,0,2,2>, <4,0,5,1>
+ 2632033896U, // <2,5,2,2>: Cost 3 vext2 <2,4,2,5>, <2,2,2,2>
+ 3691841190U, // <2,5,2,3>: Cost 4 vext2 <0,1,2,5>, <2,3,0,1>
+ 2632034061U, // <2,5,2,4>: Cost 3 vext2 <2,4,2,5>, <2,4,2,5>
+ 3691841352U, // <2,5,2,5>: Cost 4 vext2 <0,1,2,5>, <2,5,0,1>
+ 3691841466U, // <2,5,2,6>: Cost 4 vext2 <0,1,2,5>, <2,6,3,7>
+ 3088354614U, // <2,5,2,7>: Cost 3 vtrnr <0,2,0,2>, RHS
+ 3088354615U, // <2,5,2,u>: Cost 3 vtrnr <0,2,0,2>, RHS
+ 2557829222U, // <2,5,3,0>: Cost 3 vext1 <1,2,5,3>, LHS
+ 2557830059U, // <2,5,3,1>: Cost 3 vext1 <1,2,5,3>, <1,2,5,3>
+ 2575746766U, // <2,5,3,2>: Cost 3 vext1 <4,2,5,3>, <2,3,4,5>
+ 3691841948U, // <2,5,3,3>: Cost 4 vext2 <0,1,2,5>, <3,3,3,3>
+ 2619427330U, // <2,5,3,4>: Cost 3 vext2 <0,3,2,5>, <3,4,5,6>
+ 2581720847U, // <2,5,3,5>: Cost 3 vext1 <5,2,5,3>, <5,2,5,3>
+ 2953628162U, // <2,5,3,6>: Cost 3 vzipr LHS, <3,4,5,6>
+ 2953626624U, // <2,5,3,7>: Cost 3 vzipr LHS, <1,3,5,7>
+ 2953626625U, // <2,5,3,u>: Cost 3 vzipr LHS, <1,3,5,u>
+ 2569781350U, // <2,5,4,0>: Cost 3 vext1 <3,2,5,4>, LHS
+ 3631580076U, // <2,5,4,1>: Cost 4 vext1 <1,2,5,4>, <1,2,5,4>
+ 2569782990U, // <2,5,4,2>: Cost 3 vext1 <3,2,5,4>, <2,3,4,5>
+ 2569783646U, // <2,5,4,3>: Cost 3 vext1 <3,2,5,4>, <3,2,5,4>
+ 2569784630U, // <2,5,4,4>: Cost 3 vext1 <3,2,5,4>, RHS
+ 2618101046U, // <2,5,4,5>: Cost 3 vext2 <0,1,2,5>, RHS
+ 3893905922U, // <2,5,4,6>: Cost 4 vuzpr <0,2,3,5>, <3,4,5,6>
+ 3094564150U, // <2,5,4,7>: Cost 3 vtrnr <1,2,3,4>, RHS
+ 2618101289U, // <2,5,4,u>: Cost 3 vext2 <0,1,2,5>, RHS
+ 2551873638U, // <2,5,5,0>: Cost 3 vext1 <0,2,5,5>, LHS
+ 3637560320U, // <2,5,5,1>: Cost 4 vext1 <2,2,5,5>, <1,3,5,7>
+ 3637560966U, // <2,5,5,2>: Cost 4 vext1 <2,2,5,5>, <2,2,5,5>
+ 3723030343U, // <2,5,5,3>: Cost 4 vext2 <5,3,2,5>, <5,3,2,5>
+ 2551876918U, // <2,5,5,4>: Cost 3 vext1 <0,2,5,5>, RHS
+ 2712834052U, // <2,5,5,5>: Cost 3 vext3 <4,6,u,2>, <5,5,5,5>
+ 4028713474U, // <2,5,5,6>: Cost 4 vzipr <0,3,2,5>, <3,4,5,6>
+ 2712834072U, // <2,5,5,7>: Cost 3 vext3 <4,6,u,2>, <5,5,7,7>
+ 2712834081U, // <2,5,5,u>: Cost 3 vext3 <4,6,u,2>, <5,5,u,7>
+ 2575769702U, // <2,5,6,0>: Cost 3 vext1 <4,2,5,6>, LHS
+ 3631596462U, // <2,5,6,1>: Cost 4 vext1 <1,2,5,6>, <1,2,5,6>
+ 2655924730U, // <2,5,6,2>: Cost 3 vext2 <6,4,2,5>, <6,2,7,3>
+ 3643541856U, // <2,5,6,3>: Cost 4 vext1 <3,2,5,6>, <3,2,5,6>
+ 2655924849U, // <2,5,6,4>: Cost 3 vext2 <6,4,2,5>, <6,4,2,5>
+ 3787755607U, // <2,5,6,5>: Cost 4 vext3 <4,u,6,2>, <5,6,5,7>
+ 4029385218U, // <2,5,6,6>: Cost 4 vzipr <0,4,2,6>, <3,4,5,6>
+ 3088682294U, // <2,5,6,7>: Cost 3 vtrnr <0,2,4,6>, RHS
+ 3088682295U, // <2,5,6,u>: Cost 3 vtrnr <0,2,4,6>, RHS
+ 2563833958U, // <2,5,7,0>: Cost 3 vext1 <2,2,5,7>, LHS
+ 2551890678U, // <2,5,7,1>: Cost 3 vext1 <0,2,5,7>, <1,0,3,2>
+ 2563835528U, // <2,5,7,2>: Cost 3 vext1 <2,2,5,7>, <2,2,5,7>
+ 3637577878U, // <2,5,7,3>: Cost 4 vext1 <2,2,5,7>, <3,0,1,2>
+ 2563837238U, // <2,5,7,4>: Cost 3 vext1 <2,2,5,7>, RHS
+ 2712834216U, // <2,5,7,5>: Cost 3 vext3 <4,6,u,2>, <5,7,5,7>
+ 2712834220U, // <2,5,7,6>: Cost 3 vext3 <4,6,u,2>, <5,7,6,2>
+ 4174449974U, // <2,5,7,7>: Cost 4 vtrnr <2,2,5,7>, RHS
+ 2563839790U, // <2,5,7,u>: Cost 3 vext1 <2,2,5,7>, LHS
+ 2563842150U, // <2,5,u,0>: Cost 3 vext1 <2,2,5,u>, LHS
+ 2618103598U, // <2,5,u,1>: Cost 3 vext2 <0,1,2,5>, LHS
+ 2563843721U, // <2,5,u,2>: Cost 3 vext1 <2,2,5,u>, <2,2,5,u>
+ 2569816418U, // <2,5,u,3>: Cost 3 vext1 <3,2,5,u>, <3,2,5,u>
+ 2622748735U, // <2,5,u,4>: Cost 3 vext2 <0,u,2,5>, <u,4,5,6>
+ 2618103962U, // <2,5,u,5>: Cost 3 vext2 <0,1,2,5>, RHS
+ 2953669122U, // <2,5,u,6>: Cost 3 vzipr LHS, <3,4,5,6>
+ 2953667584U, // <2,5,u,7>: Cost 3 vzipr LHS, <1,3,5,7>
+ 2618104165U, // <2,5,u,u>: Cost 3 vext2 <0,1,2,5>, LHS
+ 2620096512U, // <2,6,0,0>: Cost 3 vext2 <0,4,2,6>, <0,0,0,0>
+ 1546354790U, // <2,6,0,1>: Cost 2 vext2 <0,4,2,6>, LHS
+ 2620096676U, // <2,6,0,2>: Cost 3 vext2 <0,4,2,6>, <0,2,0,2>
+ 3693838588U, // <2,6,0,3>: Cost 4 vext2 <0,4,2,6>, <0,3,1,0>
+ 1546355036U, // <2,6,0,4>: Cost 2 vext2 <0,4,2,6>, <0,4,2,6>
+ 3694502317U, // <2,6,0,5>: Cost 4 vext2 <0,5,2,6>, <0,5,2,6>
+ 2551911246U, // <2,6,0,6>: Cost 3 vext1 <0,2,6,0>, <6,7,0,1>
+ 2720723287U, // <2,6,0,7>: Cost 3 vext3 <6,0,7,2>, <6,0,7,2>
+ 1546355357U, // <2,6,0,u>: Cost 2 vext2 <0,4,2,6>, LHS
+ 2620097270U, // <2,6,1,0>: Cost 3 vext2 <0,4,2,6>, <1,0,3,2>
+ 2620097332U, // <2,6,1,1>: Cost 3 vext2 <0,4,2,6>, <1,1,1,1>
+ 2620097430U, // <2,6,1,2>: Cost 3 vext2 <0,4,2,6>, <1,2,3,0>
+ 2820243558U, // <2,6,1,3>: Cost 3 vuzpr <0,2,4,6>, LHS
+ 2620097598U, // <2,6,1,4>: Cost 3 vext2 <0,4,2,6>, <1,4,3,6>
+ 2620097680U, // <2,6,1,5>: Cost 3 vext2 <0,4,2,6>, <1,5,3,7>
+ 3693839585U, // <2,6,1,6>: Cost 4 vext2 <0,4,2,6>, <1,6,3,7>
+ 2721386920U, // <2,6,1,7>: Cost 3 vext3 <6,1,7,2>, <6,1,7,2>
+ 2820243563U, // <2,6,1,u>: Cost 3 vuzpr <0,2,4,6>, LHS
+ 2714014137U, // <2,6,2,0>: Cost 3 vext3 <4,u,6,2>, <6,2,0,1>
+ 2712834500U, // <2,6,2,1>: Cost 3 vext3 <4,6,u,2>, <6,2,1,3>
+ 2620098152U, // <2,6,2,2>: Cost 3 vext2 <0,4,2,6>, <2,2,2,2>
+ 2620098214U, // <2,6,2,3>: Cost 3 vext2 <0,4,2,6>, <2,3,0,1>
+ 2632042254U, // <2,6,2,4>: Cost 3 vext2 <2,4,2,6>, <2,4,2,6>
+ 2712834540U, // <2,6,2,5>: Cost 3 vext3 <4,6,u,2>, <6,2,5,7>
+ 2820243660U, // <2,6,2,6>: Cost 3 vuzpr <0,2,4,6>, <0,2,4,6>
+ 2958265654U, // <2,6,2,7>: Cost 3 vzipr <0,u,2,2>, RHS
+ 2620098619U, // <2,6,2,u>: Cost 3 vext2 <0,4,2,6>, <2,u,0,1>
+ 2620098710U, // <2,6,3,0>: Cost 3 vext2 <0,4,2,6>, <3,0,1,2>
+ 3893986982U, // <2,6,3,1>: Cost 4 vuzpr <0,2,4,6>, <2,3,0,1>
+ 2569848762U, // <2,6,3,2>: Cost 3 vext1 <3,2,6,3>, <2,6,3,7>
+ 2620098972U, // <2,6,3,3>: Cost 3 vext2 <0,4,2,6>, <3,3,3,3>
+ 2620099074U, // <2,6,3,4>: Cost 3 vext2 <0,4,2,6>, <3,4,5,6>
+ 3893987022U, // <2,6,3,5>: Cost 4 vuzpr <0,2,4,6>, <2,3,4,5>
+ 3001404644U, // <2,6,3,6>: Cost 3 vzipr LHS, <4,4,6,6>
+ 1879887158U, // <2,6,3,7>: Cost 2 vzipr LHS, RHS
+ 1879887159U, // <2,6,3,u>: Cost 2 vzipr LHS, RHS
+ 2620099484U, // <2,6,4,0>: Cost 3 vext2 <0,4,2,6>, <4,0,6,2>
+ 2620099566U, // <2,6,4,1>: Cost 3 vext2 <0,4,2,6>, <4,1,6,3>
+ 2620099644U, // <2,6,4,2>: Cost 3 vext2 <0,4,2,6>, <4,2,6,0>
+ 3643599207U, // <2,6,4,3>: Cost 4 vext1 <3,2,6,4>, <3,2,6,4>
+ 2575830080U, // <2,6,4,4>: Cost 3 vext1 <4,2,6,4>, <4,2,6,4>
+ 1546358070U, // <2,6,4,5>: Cost 2 vext2 <0,4,2,6>, RHS
+ 2667875700U, // <2,6,4,6>: Cost 3 vext2 <u,4,2,6>, <4,6,4,6>
+ 4028042550U, // <2,6,4,7>: Cost 4 vzipr <0,2,2,4>, RHS
+ 1546358313U, // <2,6,4,u>: Cost 2 vext2 <0,4,2,6>, RHS
+ 3693841992U, // <2,6,5,0>: Cost 4 vext2 <0,4,2,6>, <5,0,1,2>
+ 2667876048U, // <2,6,5,1>: Cost 3 vext2 <u,4,2,6>, <5,1,7,3>
+ 2712834756U, // <2,6,5,2>: Cost 3 vext3 <4,6,u,2>, <6,5,2,7>
+ 3643607400U, // <2,6,5,3>: Cost 4 vext1 <3,2,6,5>, <3,2,6,5>
+ 2252091873U, // <2,6,5,4>: Cost 3 vrev <6,2,4,5>
+ 2667876356U, // <2,6,5,5>: Cost 3 vext2 <u,4,2,6>, <5,5,5,5>
+ 2667876450U, // <2,6,5,6>: Cost 3 vext2 <u,4,2,6>, <5,6,7,0>
+ 2820246838U, // <2,6,5,7>: Cost 3 vuzpr <0,2,4,6>, RHS
+ 2820246839U, // <2,6,5,u>: Cost 3 vuzpr <0,2,4,6>, RHS
+ 2563899494U, // <2,6,6,0>: Cost 3 vext1 <2,2,6,6>, LHS
+ 3893988683U, // <2,6,6,1>: Cost 4 vuzpr <0,2,4,6>, <4,6,0,1>
+ 2563901072U, // <2,6,6,2>: Cost 3 vext1 <2,2,6,6>, <2,2,6,6>
+ 3893987236U, // <2,6,6,3>: Cost 4 vuzpr <0,2,4,6>, <2,6,1,3>
+ 2563902774U, // <2,6,6,4>: Cost 3 vext1 <2,2,6,6>, RHS
+ 3893988723U, // <2,6,6,5>: Cost 4 vuzpr <0,2,4,6>, <4,6,4,5>
+ 2712834872U, // <2,6,6,6>: Cost 3 vext3 <4,6,u,2>, <6,6,6,6>
+ 2955644214U, // <2,6,6,7>: Cost 3 vzipr <0,4,2,6>, RHS
+ 2955644215U, // <2,6,6,u>: Cost 3 vzipr <0,4,2,6>, RHS
+ 2712834894U, // <2,6,7,0>: Cost 3 vext3 <4,6,u,2>, <6,7,0,1>
+ 2724926296U, // <2,6,7,1>: Cost 3 vext3 <6,7,1,2>, <6,7,1,2>
+ 2725000033U, // <2,6,7,2>: Cost 3 vext3 <6,7,2,2>, <6,7,2,2>
+ 2702365544U, // <2,6,7,3>: Cost 3 vext3 <3,0,1,2>, <6,7,3,0>
+ 2712834934U, // <2,6,7,4>: Cost 3 vext3 <4,6,u,2>, <6,7,4,5>
+ 3776107393U, // <2,6,7,5>: Cost 4 vext3 <3,0,1,2>, <6,7,5,7>
+ 2725294981U, // <2,6,7,6>: Cost 3 vext3 <6,7,6,2>, <6,7,6,2>
+ 2726253452U, // <2,6,7,7>: Cost 3 vext3 <7,0,1,2>, <6,7,7,0>
+ 2712834966U, // <2,6,7,u>: Cost 3 vext3 <4,6,u,2>, <6,7,u,1>
+ 2620102355U, // <2,6,u,0>: Cost 3 vext2 <0,4,2,6>, <u,0,1,2>
+ 1546360622U, // <2,6,u,1>: Cost 2 vext2 <0,4,2,6>, LHS
+ 2620102536U, // <2,6,u,2>: Cost 3 vext2 <0,4,2,6>, <u,2,3,3>
+ 2820244125U, // <2,6,u,3>: Cost 3 vuzpr <0,2,4,6>, LHS
+ 1594136612U, // <2,6,u,4>: Cost 2 vext2 <u,4,2,6>, <u,4,2,6>
+ 1546360986U, // <2,6,u,5>: Cost 2 vext2 <0,4,2,6>, RHS
+ 2620102864U, // <2,6,u,6>: Cost 3 vext2 <0,4,2,6>, <u,6,3,7>
+ 1879928118U, // <2,6,u,7>: Cost 2 vzipr LHS, RHS
+ 1879928119U, // <2,6,u,u>: Cost 2 vzipr LHS, RHS
+ 2726179825U, // <2,7,0,0>: Cost 3 vext3 <7,0,0,2>, <7,0,0,2>
+ 1652511738U, // <2,7,0,1>: Cost 2 vext3 <7,0,1,2>, <7,0,1,2>
+ 2621431972U, // <2,7,0,2>: Cost 3 vext2 <0,6,2,7>, <0,2,0,2>
+ 2257949868U, // <2,7,0,3>: Cost 3 vrev <7,2,3,0>
+ 2726474773U, // <2,7,0,4>: Cost 3 vext3 <7,0,4,2>, <7,0,4,2>
+ 2620768686U, // <2,7,0,5>: Cost 3 vext2 <0,5,2,7>, <0,5,2,7>
+ 2621432319U, // <2,7,0,6>: Cost 3 vext2 <0,6,2,7>, <0,6,2,7>
+ 2599760953U, // <2,7,0,7>: Cost 3 vext1 <u,2,7,0>, <7,0,u,2>
+ 1653027897U, // <2,7,0,u>: Cost 2 vext3 <7,0,u,2>, <7,0,u,2>
+ 2639348470U, // <2,7,1,0>: Cost 3 vext2 <3,6,2,7>, <1,0,3,2>
+ 3695174452U, // <2,7,1,1>: Cost 4 vext2 <0,6,2,7>, <1,1,1,1>
+ 3695174550U, // <2,7,1,2>: Cost 4 vext2 <0,6,2,7>, <1,2,3,0>
+ 3694511104U, // <2,7,1,3>: Cost 4 vext2 <0,5,2,7>, <1,3,5,7>
+ 3713090594U, // <2,7,1,4>: Cost 4 vext2 <3,6,2,7>, <1,4,0,5>
+ 3693184144U, // <2,7,1,5>: Cost 4 vext2 <0,3,2,7>, <1,5,3,7>
+ 2627405016U, // <2,7,1,6>: Cost 3 vext2 <1,6,2,7>, <1,6,2,7>
+ 3799995519U, // <2,7,1,7>: Cost 4 vext3 <7,0,1,2>, <7,1,7,0>
+ 2639348470U, // <2,7,1,u>: Cost 3 vext2 <3,6,2,7>, <1,0,3,2>
+ 3695175101U, // <2,7,2,0>: Cost 4 vext2 <0,6,2,7>, <2,0,1,2>
+ 3643655168U, // <2,7,2,1>: Cost 4 vext1 <3,2,7,2>, <1,3,5,7>
+ 2257892517U, // <2,7,2,2>: Cost 3 vrev <7,2,2,2>
+ 3695175334U, // <2,7,2,3>: Cost 4 vext2 <0,6,2,7>, <2,3,0,1>
+ 3695175465U, // <2,7,2,4>: Cost 4 vext2 <0,6,2,7>, <2,4,5,6>
+ 2632714080U, // <2,7,2,5>: Cost 3 vext2 <2,5,2,7>, <2,5,2,7>
+ 2633377713U, // <2,7,2,6>: Cost 3 vext2 <2,6,2,7>, <2,6,2,7>
+ 3695175658U, // <2,7,2,7>: Cost 4 vext2 <0,6,2,7>, <2,7,0,1>
+ 2634704979U, // <2,7,2,u>: Cost 3 vext2 <2,u,2,7>, <2,u,2,7>
+ 1514094694U, // <2,7,3,0>: Cost 2 vext1 <6,2,7,3>, LHS
+ 2569921680U, // <2,7,3,1>: Cost 3 vext1 <3,2,7,3>, <1,5,3,7>
+ 2587838056U, // <2,7,3,2>: Cost 3 vext1 <6,2,7,3>, <2,2,2,2>
+ 2569922927U, // <2,7,3,3>: Cost 3 vext1 <3,2,7,3>, <3,2,7,3>
+ 1514097974U, // <2,7,3,4>: Cost 2 vext1 <6,2,7,3>, RHS
+ 2581868321U, // <2,7,3,5>: Cost 3 vext1 <5,2,7,3>, <5,2,7,3>
+ 1514099194U, // <2,7,3,6>: Cost 2 vext1 <6,2,7,3>, <6,2,7,3>
+ 2587841530U, // <2,7,3,7>: Cost 3 vext1 <6,2,7,3>, <7,0,1,2>
+ 1514100526U, // <2,7,3,u>: Cost 2 vext1 <6,2,7,3>, LHS
+ 2708706617U, // <2,7,4,0>: Cost 3 vext3 <4,0,6,2>, <7,4,0,6>
+ 3649643418U, // <2,7,4,1>: Cost 4 vext1 <4,2,7,4>, <1,2,3,4>
+ 3649644330U, // <2,7,4,2>: Cost 4 vext1 <4,2,7,4>, <2,4,5,7>
+ 2257982640U, // <2,7,4,3>: Cost 3 vrev <7,2,3,4>
+ 3649645641U, // <2,7,4,4>: Cost 4 vext1 <4,2,7,4>, <4,2,7,4>
+ 2621435190U, // <2,7,4,5>: Cost 3 vext2 <0,6,2,7>, RHS
+ 2712835441U, // <2,7,4,6>: Cost 3 vext3 <4,6,u,2>, <7,4,6,u>
+ 3799995762U, // <2,7,4,7>: Cost 4 vext3 <7,0,1,2>, <7,4,7,0>
+ 2621435433U, // <2,7,4,u>: Cost 3 vext2 <0,6,2,7>, RHS
+ 2729497990U, // <2,7,5,0>: Cost 3 vext3 <7,5,0,2>, <7,5,0,2>
+ 3643679744U, // <2,7,5,1>: Cost 4 vext1 <3,2,7,5>, <1,3,5,7>
+ 3637708424U, // <2,7,5,2>: Cost 4 vext1 <2,2,7,5>, <2,2,5,7>
+ 3643681137U, // <2,7,5,3>: Cost 4 vext1 <3,2,7,5>, <3,2,7,5>
+ 2599800118U, // <2,7,5,4>: Cost 3 vext1 <u,2,7,5>, RHS
+ 3786577334U, // <2,7,5,5>: Cost 4 vext3 <4,6,u,2>, <7,5,5,5>
+ 3786577345U, // <2,7,5,6>: Cost 4 vext3 <4,6,u,2>, <7,5,6,7>
+ 2599802214U, // <2,7,5,7>: Cost 3 vext1 <u,2,7,5>, <7,4,5,6>
+ 2599802670U, // <2,7,5,u>: Cost 3 vext1 <u,2,7,5>, LHS
+ 2581889126U, // <2,7,6,0>: Cost 3 vext1 <5,2,7,6>, LHS
+ 3643687936U, // <2,7,6,1>: Cost 4 vext1 <3,2,7,6>, <1,3,5,7>
+ 2663240186U, // <2,7,6,2>: Cost 3 vext2 <7,6,2,7>, <6,2,7,3>
+ 3643689330U, // <2,7,6,3>: Cost 4 vext1 <3,2,7,6>, <3,2,7,6>
+ 2581892406U, // <2,7,6,4>: Cost 3 vext1 <5,2,7,6>, RHS
+ 2581892900U, // <2,7,6,5>: Cost 3 vext1 <5,2,7,6>, <5,2,7,6>
+ 2587865597U, // <2,7,6,6>: Cost 3 vext1 <6,2,7,6>, <6,2,7,6>
+ 3786577428U, // <2,7,6,7>: Cost 4 vext3 <4,6,u,2>, <7,6,7,0>
+ 2581894958U, // <2,7,6,u>: Cost 3 vext1 <5,2,7,6>, LHS
+ 2726254119U, // <2,7,7,0>: Cost 3 vext3 <7,0,1,2>, <7,7,0,1>
+ 3804640817U, // <2,7,7,1>: Cost 4 vext3 <7,7,1,2>, <7,7,1,2>
+ 3637724826U, // <2,7,7,2>: Cost 4 vext1 <2,2,7,7>, <2,2,7,7>
+ 3734992123U, // <2,7,7,3>: Cost 4 vext2 <7,3,2,7>, <7,3,2,7>
+ 2552040758U, // <2,7,7,4>: Cost 3 vext1 <0,2,7,7>, RHS
+ 3799995992U, // <2,7,7,5>: Cost 4 vext3 <7,0,1,2>, <7,7,5,5>
+ 2663241198U, // <2,7,7,6>: Cost 3 vext2 <7,6,2,7>, <7,6,2,7>
+ 2712835692U, // <2,7,7,7>: Cost 3 vext3 <4,6,u,2>, <7,7,7,7>
+ 2731562607U, // <2,7,7,u>: Cost 3 vext3 <7,u,1,2>, <7,7,u,1>
+ 1514135654U, // <2,7,u,0>: Cost 2 vext1 <6,2,7,u>, LHS
+ 1657820802U, // <2,7,u,1>: Cost 2 vext3 <7,u,1,2>, <7,u,1,2>
+ 2587879016U, // <2,7,u,2>: Cost 3 vext1 <6,2,7,u>, <2,2,2,2>
+ 2569963892U, // <2,7,u,3>: Cost 3 vext1 <3,2,7,u>, <3,2,7,u>
+ 1514138934U, // <2,7,u,4>: Cost 2 vext1 <6,2,7,u>, RHS
+ 2621438106U, // <2,7,u,5>: Cost 3 vext2 <0,6,2,7>, RHS
+ 1514140159U, // <2,7,u,6>: Cost 2 vext1 <6,2,7,u>, <6,2,7,u>
+ 2587882490U, // <2,7,u,7>: Cost 3 vext1 <6,2,7,u>, <7,0,1,2>
+ 1514141486U, // <2,7,u,u>: Cost 2 vext1 <6,2,7,u>, LHS
+ 1544380416U, // <2,u,0,0>: Cost 2 vext2 LHS, <0,0,0,0>
+ 470638699U, // <2,u,0,1>: Cost 1 vext2 LHS, LHS
+ 1544380580U, // <2,u,0,2>: Cost 2 vext2 LHS, <0,2,0,2>
+ 1658631909U, // <2,u,0,3>: Cost 2 vext3 <u,0,3,2>, <u,0,3,2>
+ 1544380754U, // <2,u,0,4>: Cost 2 vext2 LHS, <0,4,1,5>
+ 2665898414U, // <2,u,0,5>: Cost 3 vext2 LHS, <0,5,2,7>
+ 1658853120U, // <2,u,0,6>: Cost 2 vext3 <u,0,6,2>, <u,0,6,2>
+ 3094531625U, // <2,u,0,7>: Cost 3 vtrnr <1,2,3,0>, RHS
+ 470639261U, // <2,u,0,u>: Cost 1 vext2 LHS, LHS
+ 1544381174U, // <2,u,1,0>: Cost 2 vext2 LHS, <1,0,3,2>
+ 1544381236U, // <2,u,1,1>: Cost 2 vext2 LHS, <1,1,1,1>
+ 1544381334U, // <2,u,1,2>: Cost 2 vext2 LHS, <1,2,3,0>
+ 1544381400U, // <2,u,1,3>: Cost 2 vext2 LHS, <1,3,1,3>
+ 2618123325U, // <2,u,1,4>: Cost 3 vext2 LHS, <1,4,3,5>
+ 1544381584U, // <2,u,1,5>: Cost 2 vext2 LHS, <1,5,3,7>
+ 2618123489U, // <2,u,1,6>: Cost 3 vext2 LHS, <1,6,3,7>
+ 2726254427U, // <2,u,1,7>: Cost 3 vext3 <7,0,1,2>, <u,1,7,3>
+ 1544381823U, // <2,u,1,u>: Cost 2 vext2 LHS, <1,u,3,3>
+ 1478328422U, // <2,u,2,0>: Cost 2 vext1 <0,2,u,2>, LHS
+ 2618123807U, // <2,u,2,1>: Cost 3 vext2 LHS, <2,1,3,1>
+ 269271142U, // <2,u,2,2>: Cost 1 vdup2 LHS
+ 1544382118U, // <2,u,2,3>: Cost 2 vext2 LHS, <2,3,0,1>
+ 1478331702U, // <2,u,2,4>: Cost 2 vext1 <0,2,u,2>, RHS
+ 2618124136U, // <2,u,2,5>: Cost 3 vext2 LHS, <2,5,3,6>
+ 1544382394U, // <2,u,2,6>: Cost 2 vext2 LHS, <2,6,3,7>
+ 3088354857U, // <2,u,2,7>: Cost 3 vtrnr <0,2,0,2>, RHS
+ 269271142U, // <2,u,2,u>: Cost 1 vdup2 LHS
+ 1544382614U, // <2,u,3,0>: Cost 2 vext2 LHS, <3,0,1,2>
+ 2953627374U, // <2,u,3,1>: Cost 3 vzipr LHS, <2,3,u,1>
+ 1490282143U, // <2,u,3,2>: Cost 2 vext1 <2,2,u,3>, <2,2,u,3>
+ 1879883932U, // <2,u,3,3>: Cost 2 vzipr LHS, LHS
+ 1544382978U, // <2,u,3,4>: Cost 2 vext2 LHS, <3,4,5,6>
+ 2953627378U, // <2,u,3,5>: Cost 3 vzipr LHS, <2,3,u,5>
+ 1514172931U, // <2,u,3,6>: Cost 2 vext1 <6,2,u,3>, <6,2,u,3>
+ 1879887176U, // <2,u,3,7>: Cost 2 vzipr LHS, RHS
+ 1879883937U, // <2,u,3,u>: Cost 2 vzipr LHS, LHS
+ 1484316774U, // <2,u,4,0>: Cost 2 vext1 <1,2,u,4>, LHS
+ 1484317639U, // <2,u,4,1>: Cost 2 vext1 <1,2,u,4>, <1,2,u,4>
+ 2552088270U, // <2,u,4,2>: Cost 3 vext1 <0,2,u,4>, <2,3,4,5>
+ 1190213513U, // <2,u,4,3>: Cost 2 vrev <u,2,3,4>
+ 1484320054U, // <2,u,4,4>: Cost 2 vext1 <1,2,u,4>, RHS
+ 470641974U, // <2,u,4,5>: Cost 1 vext2 LHS, RHS
+ 1592159604U, // <2,u,4,6>: Cost 2 vext2 LHS, <4,6,4,6>
+ 3094564393U, // <2,u,4,7>: Cost 3 vtrnr <1,2,3,4>, RHS
+ 470642217U, // <2,u,4,u>: Cost 1 vext2 LHS, RHS
+ 2552094959U, // <2,u,5,0>: Cost 3 vext1 <0,2,u,5>, <0,2,u,5>
+ 1592159952U, // <2,u,5,1>: Cost 2 vext2 LHS, <5,1,7,3>
+ 2564040353U, // <2,u,5,2>: Cost 3 vext1 <2,2,u,5>, <2,2,u,5>
+ 2690275455U, // <2,u,5,3>: Cost 3 vext3 <0,u,u,2>, <u,5,3,7>
+ 1592160198U, // <2,u,5,4>: Cost 2 vext2 LHS, <5,4,7,6>
+ 1592160260U, // <2,u,5,5>: Cost 2 vext2 LHS, <5,5,5,5>
+ 1611962522U, // <2,u,5,6>: Cost 2 vext3 <0,2,0,2>, RHS
+ 1592160424U, // <2,u,5,7>: Cost 2 vext2 LHS, <5,7,5,7>
+ 1611962540U, // <2,u,5,u>: Cost 2 vext3 <0,2,0,2>, RHS
+ 1478361190U, // <2,u,6,0>: Cost 2 vext1 <0,2,u,6>, LHS
+ 2552103670U, // <2,u,6,1>: Cost 3 vext1 <0,2,u,6>, <1,0,3,2>
+ 1592160762U, // <2,u,6,2>: Cost 2 vext2 LHS, <6,2,7,3>
+ 2685704400U, // <2,u,6,3>: Cost 3 vext3 <0,2,0,2>, <u,6,3,7>
+ 1478364470U, // <2,u,6,4>: Cost 2 vext1 <0,2,u,6>, RHS
+ 2901891226U, // <2,u,6,5>: Cost 3 vzipl <2,6,3,7>, RHS
+ 1592161080U, // <2,u,6,6>: Cost 2 vext2 LHS, <6,6,6,6>
+ 1592161102U, // <2,u,6,7>: Cost 2 vext2 LHS, <6,7,0,1>
+ 1478367022U, // <2,u,6,u>: Cost 2 vext1 <0,2,u,6>, LHS
+ 1592161274U, // <2,u,7,0>: Cost 2 vext2 LHS, <7,0,1,2>
+ 2659931226U, // <2,u,7,1>: Cost 3 vext2 <7,1,2,u>, <7,1,2,u>
+ 2564056739U, // <2,u,7,2>: Cost 3 vext1 <2,2,u,7>, <2,2,u,7>
+ 2665903331U, // <2,u,7,3>: Cost 3 vext2 LHS, <7,3,0,1>
+ 1592161638U, // <2,u,7,4>: Cost 2 vext2 LHS, <7,4,5,6>
+ 2665903494U, // <2,u,7,5>: Cost 3 vext2 LHS, <7,5,0,2>
+ 2587947527U, // <2,u,7,6>: Cost 3 vext1 <6,2,u,7>, <6,2,u,7>
+ 1592161900U, // <2,u,7,7>: Cost 2 vext2 LHS, <7,7,7,7>
+ 1592161922U, // <2,u,7,u>: Cost 2 vext2 LHS, <7,u,1,2>
+ 1478377574U, // <2,u,u,0>: Cost 2 vext1 <0,2,u,u>, LHS
+ 470644526U, // <2,u,u,1>: Cost 1 vext2 LHS, LHS
+ 269271142U, // <2,u,u,2>: Cost 1 vdup2 LHS
+ 1879924892U, // <2,u,u,3>: Cost 2 vzipr LHS, LHS
+ 1478380854U, // <2,u,u,4>: Cost 2 vext1 <0,2,u,u>, RHS
+ 470644890U, // <2,u,u,5>: Cost 1 vext2 LHS, RHS
+ 1611962765U, // <2,u,u,6>: Cost 2 vext3 <0,2,0,2>, RHS
+ 1879928136U, // <2,u,u,7>: Cost 2 vzipr LHS, RHS
+ 470645093U, // <2,u,u,u>: Cost 1 vext2 LHS, LHS
+ 1611448320U, // <3,0,0,0>: Cost 2 vext3 LHS, <0,0,0,0>
+ 1611890698U, // <3,0,0,1>: Cost 2 vext3 LHS, <0,0,1,1>
+ 1611890708U, // <3,0,0,2>: Cost 2 vext3 LHS, <0,0,2,2>
+ 3763576860U, // <3,0,0,3>: Cost 4 vext3 LHS, <0,0,3,1>
+ 2689835045U, // <3,0,0,4>: Cost 3 vext3 LHS, <0,0,4,1>
+ 3698508206U, // <3,0,0,5>: Cost 4 vext2 <1,2,3,0>, <0,5,2,7>
+ 3763576887U, // <3,0,0,6>: Cost 4 vext3 LHS, <0,0,6,1>
+ 3667678434U, // <3,0,0,7>: Cost 4 vext1 <7,3,0,0>, <7,3,0,0>
+ 1616093258U, // <3,0,0,u>: Cost 2 vext3 LHS, <0,0,u,2>
+ 1490337894U, // <3,0,1,0>: Cost 2 vext1 <2,3,0,1>, LHS
+ 2685632602U, // <3,0,1,1>: Cost 3 vext3 LHS, <0,1,1,0>
+ 537706598U, // <3,0,1,2>: Cost 1 vext3 LHS, LHS
+ 2624766936U, // <3,0,1,3>: Cost 3 vext2 <1,2,3,0>, <1,3,1,3>
+ 1490341174U, // <3,0,1,4>: Cost 2 vext1 <2,3,0,1>, RHS
+ 2624767120U, // <3,0,1,5>: Cost 3 vext2 <1,2,3,0>, <1,5,3,7>
+ 2732966030U, // <3,0,1,6>: Cost 3 vext3 LHS, <0,1,6,7>
+ 2593944803U, // <3,0,1,7>: Cost 3 vext1 <7,3,0,1>, <7,3,0,1>
+ 537706652U, // <3,0,1,u>: Cost 1 vext3 LHS, LHS
+ 1611890852U, // <3,0,2,0>: Cost 2 vext3 LHS, <0,2,0,2>
+ 2685632684U, // <3,0,2,1>: Cost 3 vext3 LHS, <0,2,1,1>
+ 2685632692U, // <3,0,2,2>: Cost 3 vext3 LHS, <0,2,2,0>
+ 2685632702U, // <3,0,2,3>: Cost 3 vext3 LHS, <0,2,3,1>
+ 1611890892U, // <3,0,2,4>: Cost 2 vext3 LHS, <0,2,4,6>
+ 2732966102U, // <3,0,2,5>: Cost 3 vext3 LHS, <0,2,5,7>
+ 2624767930U, // <3,0,2,6>: Cost 3 vext2 <1,2,3,0>, <2,6,3,7>
+ 2685632744U, // <3,0,2,7>: Cost 3 vext3 LHS, <0,2,7,7>
+ 1611890924U, // <3,0,2,u>: Cost 2 vext3 LHS, <0,2,u,2>
+ 2624768150U, // <3,0,3,0>: Cost 3 vext2 <1,2,3,0>, <3,0,1,2>
+ 2685632764U, // <3,0,3,1>: Cost 3 vext3 LHS, <0,3,1,0>
+ 2685632774U, // <3,0,3,2>: Cost 3 vext3 LHS, <0,3,2,1>
+ 2624768412U, // <3,0,3,3>: Cost 3 vext2 <1,2,3,0>, <3,3,3,3>
+ 2624768514U, // <3,0,3,4>: Cost 3 vext2 <1,2,3,0>, <3,4,5,6>
+ 3702491714U, // <3,0,3,5>: Cost 4 vext2 <1,u,3,0>, <3,5,3,7>
+ 2624768632U, // <3,0,3,6>: Cost 3 vext2 <1,2,3,0>, <3,6,0,7>
+ 3702491843U, // <3,0,3,7>: Cost 4 vext2 <1,u,3,0>, <3,7,0,1>
+ 2686959934U, // <3,0,3,u>: Cost 3 vext3 <0,3,u,3>, <0,3,u,3>
+ 2689835336U, // <3,0,4,0>: Cost 3 vext3 LHS, <0,4,0,4>
+ 1611891026U, // <3,0,4,1>: Cost 2 vext3 LHS, <0,4,1,5>
+ 1611891036U, // <3,0,4,2>: Cost 2 vext3 LHS, <0,4,2,6>
+ 3763577184U, // <3,0,4,3>: Cost 4 vext3 LHS, <0,4,3,1>
+ 2689835374U, // <3,0,4,4>: Cost 3 vext3 LHS, <0,4,4,6>
+ 1551027510U, // <3,0,4,5>: Cost 2 vext2 <1,2,3,0>, RHS
+ 2666573172U, // <3,0,4,6>: Cost 3 vext2 <u,2,3,0>, <4,6,4,6>
+ 3667711206U, // <3,0,4,7>: Cost 4 vext1 <7,3,0,4>, <7,3,0,4>
+ 1616093586U, // <3,0,4,u>: Cost 2 vext3 LHS, <0,4,u,6>
+ 2685190556U, // <3,0,5,0>: Cost 3 vext3 LHS, <0,5,0,7>
+ 2666573520U, // <3,0,5,1>: Cost 3 vext2 <u,2,3,0>, <5,1,7,3>
+ 3040886886U, // <3,0,5,2>: Cost 3 vtrnl <3,4,5,6>, LHS
+ 3625912834U, // <3,0,5,3>: Cost 4 vext1 <0,3,0,5>, <3,4,5,6>
+ 2666573766U, // <3,0,5,4>: Cost 3 vext2 <u,2,3,0>, <5,4,7,6>
+ 2666573828U, // <3,0,5,5>: Cost 3 vext2 <u,2,3,0>, <5,5,5,5>
+ 2732966354U, // <3,0,5,6>: Cost 3 vext3 LHS, <0,5,6,7>
+ 2666573992U, // <3,0,5,7>: Cost 3 vext2 <u,2,3,0>, <5,7,5,7>
+ 3040886940U, // <3,0,5,u>: Cost 3 vtrnl <3,4,5,6>, LHS
+ 2685190637U, // <3,0,6,0>: Cost 3 vext3 LHS, <0,6,0,7>
+ 2732966390U, // <3,0,6,1>: Cost 3 vext3 LHS, <0,6,1,7>
+ 2689835519U, // <3,0,6,2>: Cost 3 vext3 LHS, <0,6,2,7>
+ 3667724438U, // <3,0,6,3>: Cost 4 vext1 <7,3,0,6>, <3,0,1,2>
+ 3763577355U, // <3,0,6,4>: Cost 4 vext3 LHS, <0,6,4,1>
+ 3806708243U, // <3,0,6,5>: Cost 4 vext3 LHS, <0,6,5,0>
+ 2666574648U, // <3,0,6,6>: Cost 3 vext2 <u,2,3,0>, <6,6,6,6>
+ 2657948520U, // <3,0,6,7>: Cost 3 vext2 <6,7,3,0>, <6,7,3,0>
+ 2689835573U, // <3,0,6,u>: Cost 3 vext3 LHS, <0,6,u,7>
+ 2666574842U, // <3,0,7,0>: Cost 3 vext2 <u,2,3,0>, <7,0,1,2>
+ 2685633095U, // <3,0,7,1>: Cost 3 vext3 LHS, <0,7,1,7>
+ 2660603052U, // <3,0,7,2>: Cost 3 vext2 <7,2,3,0>, <7,2,3,0>
+ 3643844997U, // <3,0,7,3>: Cost 4 vext1 <3,3,0,7>, <3,3,0,7>
+ 2666575206U, // <3,0,7,4>: Cost 3 vext2 <u,2,3,0>, <7,4,5,6>
+ 3655790391U, // <3,0,7,5>: Cost 4 vext1 <5,3,0,7>, <5,3,0,7>
+ 3731690968U, // <3,0,7,6>: Cost 4 vext2 <6,7,3,0>, <7,6,0,3>
+ 2666575468U, // <3,0,7,7>: Cost 3 vext2 <u,2,3,0>, <7,7,7,7>
+ 2664584850U, // <3,0,7,u>: Cost 3 vext2 <7,u,3,0>, <7,u,3,0>
+ 1616093834U, // <3,0,u,0>: Cost 2 vext3 LHS, <0,u,0,2>
+ 1611891346U, // <3,0,u,1>: Cost 2 vext3 LHS, <0,u,1,1>
+ 537707165U, // <3,0,u,2>: Cost 1 vext3 LHS, LHS
+ 2689835684U, // <3,0,u,3>: Cost 3 vext3 LHS, <0,u,3,1>
+ 1616093874U, // <3,0,u,4>: Cost 2 vext3 LHS, <0,u,4,6>
+ 1551030426U, // <3,0,u,5>: Cost 2 vext2 <1,2,3,0>, RHS
+ 2624772304U, // <3,0,u,6>: Cost 3 vext2 <1,2,3,0>, <u,6,3,7>
+ 2594002154U, // <3,0,u,7>: Cost 3 vext1 <7,3,0,u>, <7,3,0,u>
+ 537707219U, // <3,0,u,u>: Cost 1 vext3 LHS, LHS
+ 2552201318U, // <3,1,0,0>: Cost 3 vext1 <0,3,1,0>, LHS
+ 2618802278U, // <3,1,0,1>: Cost 3 vext2 <0,2,3,1>, LHS
+ 2618802366U, // <3,1,0,2>: Cost 3 vext2 <0,2,3,1>, <0,2,3,1>
+ 1611449078U, // <3,1,0,3>: Cost 2 vext3 LHS, <1,0,3,2>
+ 2552204598U, // <3,1,0,4>: Cost 3 vext1 <0,3,1,0>, RHS
+ 2732966663U, // <3,1,0,5>: Cost 3 vext3 LHS, <1,0,5,1>
+ 3906258396U, // <3,1,0,6>: Cost 4 vuzpr <2,3,0,1>, <2,0,4,6>
+ 3667752171U, // <3,1,0,7>: Cost 4 vext1 <7,3,1,0>, <7,3,1,0>
+ 1611891491U, // <3,1,0,u>: Cost 2 vext3 LHS, <1,0,u,2>
+ 2689835819U, // <3,1,1,0>: Cost 3 vext3 LHS, <1,1,0,1>
+ 1611449140U, // <3,1,1,1>: Cost 2 vext3 LHS, <1,1,1,1>
+ 2624775063U, // <3,1,1,2>: Cost 3 vext2 <1,2,3,1>, <1,2,3,1>
+ 1611891528U, // <3,1,1,3>: Cost 2 vext3 LHS, <1,1,3,3>
+ 2689835859U, // <3,1,1,4>: Cost 3 vext3 LHS, <1,1,4,5>
+ 2689835868U, // <3,1,1,5>: Cost 3 vext3 LHS, <1,1,5,5>
+ 3763577701U, // <3,1,1,6>: Cost 4 vext3 LHS, <1,1,6,5>
+ 3765273452U, // <3,1,1,7>: Cost 4 vext3 <1,1,7,3>, <1,1,7,3>
+ 1611891573U, // <3,1,1,u>: Cost 2 vext3 LHS, <1,1,u,3>
+ 2629420494U, // <3,1,2,0>: Cost 3 vext2 <2,0,3,1>, <2,0,3,1>
+ 2689835911U, // <3,1,2,1>: Cost 3 vext3 LHS, <1,2,1,3>
+ 2564163248U, // <3,1,2,2>: Cost 3 vext1 <2,3,1,2>, <2,3,1,2>
+ 1611449238U, // <3,1,2,3>: Cost 2 vext3 LHS, <1,2,3,0>
+ 2564164918U, // <3,1,2,4>: Cost 3 vext1 <2,3,1,2>, RHS
+ 2689835947U, // <3,1,2,5>: Cost 3 vext3 LHS, <1,2,5,3>
+ 3692545978U, // <3,1,2,6>: Cost 4 vext2 <0,2,3,1>, <2,6,3,7>
+ 2732966842U, // <3,1,2,7>: Cost 3 vext3 LHS, <1,2,7,0>
+ 1611891651U, // <3,1,2,u>: Cost 2 vext3 LHS, <1,2,u,0>
+ 1484456038U, // <3,1,3,0>: Cost 2 vext1 <1,3,1,3>, LHS
+ 1611891672U, // <3,1,3,1>: Cost 2 vext3 LHS, <1,3,1,3>
+ 2685633502U, // <3,1,3,2>: Cost 3 vext3 LHS, <1,3,2,0>
+ 2685633512U, // <3,1,3,3>: Cost 3 vext3 LHS, <1,3,3,1>
+ 1484459318U, // <3,1,3,4>: Cost 2 vext1 <1,3,1,3>, RHS
+ 1611891712U, // <3,1,3,5>: Cost 2 vext3 LHS, <1,3,5,7>
+ 2689836041U, // <3,1,3,6>: Cost 3 vext3 LHS, <1,3,6,7>
+ 2733409294U, // <3,1,3,7>: Cost 3 vext3 LHS, <1,3,7,3>
+ 1611891735U, // <3,1,3,u>: Cost 2 vext3 LHS, <1,3,u,3>
+ 2552234086U, // <3,1,4,0>: Cost 3 vext1 <0,3,1,4>, LHS
+ 2732966955U, // <3,1,4,1>: Cost 3 vext3 LHS, <1,4,1,5>
+ 2732966964U, // <3,1,4,2>: Cost 3 vext3 LHS, <1,4,2,5>
+ 2685633597U, // <3,1,4,3>: Cost 3 vext3 LHS, <1,4,3,5>
+ 2552237366U, // <3,1,4,4>: Cost 3 vext1 <0,3,1,4>, RHS
+ 2618805558U, // <3,1,4,5>: Cost 3 vext2 <0,2,3,1>, RHS
+ 2769472822U, // <3,1,4,6>: Cost 3 vuzpl <3,0,1,2>, RHS
+ 3667784943U, // <3,1,4,7>: Cost 4 vext1 <7,3,1,4>, <7,3,1,4>
+ 2685633642U, // <3,1,4,u>: Cost 3 vext3 LHS, <1,4,u,5>
+ 2689836143U, // <3,1,5,0>: Cost 3 vext3 LHS, <1,5,0,1>
+ 2564187280U, // <3,1,5,1>: Cost 3 vext1 <2,3,1,5>, <1,5,3,7>
+ 2564187827U, // <3,1,5,2>: Cost 3 vext1 <2,3,1,5>, <2,3,1,5>
+ 1611891856U, // <3,1,5,3>: Cost 2 vext3 LHS, <1,5,3,7>
+ 2689836183U, // <3,1,5,4>: Cost 3 vext3 LHS, <1,5,4,5>
+ 3759375522U, // <3,1,5,5>: Cost 4 vext3 LHS, <1,5,5,7>
+ 3720417378U, // <3,1,5,6>: Cost 4 vext2 <4,u,3,1>, <5,6,7,0>
+ 2832518454U, // <3,1,5,7>: Cost 3 vuzpr <2,3,0,1>, RHS
+ 1611891901U, // <3,1,5,u>: Cost 2 vext3 LHS, <1,5,u,7>
+ 3763578048U, // <3,1,6,0>: Cost 4 vext3 LHS, <1,6,0,1>
+ 2689836239U, // <3,1,6,1>: Cost 3 vext3 LHS, <1,6,1,7>
+ 2732967128U, // <3,1,6,2>: Cost 3 vext3 LHS, <1,6,2,7>
+ 2685633761U, // <3,1,6,3>: Cost 3 vext3 LHS, <1,6,3,7>
+ 3763578088U, // <3,1,6,4>: Cost 4 vext3 LHS, <1,6,4,5>
+ 2689836275U, // <3,1,6,5>: Cost 3 vext3 LHS, <1,6,5,7>
+ 3763578108U, // <3,1,6,6>: Cost 4 vext3 LHS, <1,6,6,7>
+ 2732967166U, // <3,1,6,7>: Cost 3 vext3 LHS, <1,6,7,0>
+ 2685633806U, // <3,1,6,u>: Cost 3 vext3 LHS, <1,6,u,7>
+ 3631972454U, // <3,1,7,0>: Cost 4 vext1 <1,3,1,7>, LHS
+ 2659947612U, // <3,1,7,1>: Cost 3 vext2 <7,1,3,1>, <7,1,3,1>
+ 4036102294U, // <3,1,7,2>: Cost 4 vzipr <1,5,3,7>, <3,0,1,2>
+ 3095396454U, // <3,1,7,3>: Cost 3 vtrnr <1,3,5,7>, LHS
+ 3631975734U, // <3,1,7,4>: Cost 4 vext1 <1,3,1,7>, RHS
+ 2222982144U, // <3,1,7,5>: Cost 3 vrev <1,3,5,7>
+ 3296797705U, // <3,1,7,6>: Cost 4 vrev <1,3,6,7>
+ 3720418924U, // <3,1,7,7>: Cost 4 vext2 <4,u,3,1>, <7,7,7,7>
+ 3095396459U, // <3,1,7,u>: Cost 3 vtrnr <1,3,5,7>, LHS
+ 1484496998U, // <3,1,u,0>: Cost 2 vext1 <1,3,1,u>, LHS
+ 1611892077U, // <3,1,u,1>: Cost 2 vext3 LHS, <1,u,1,3>
+ 2685633907U, // <3,1,u,2>: Cost 3 vext3 LHS, <1,u,2,0>
+ 1611892092U, // <3,1,u,3>: Cost 2 vext3 LHS, <1,u,3,0>
+ 1484500278U, // <3,1,u,4>: Cost 2 vext1 <1,3,1,u>, RHS
+ 1611892117U, // <3,1,u,5>: Cost 2 vext3 LHS, <1,u,5,7>
+ 2685633950U, // <3,1,u,6>: Cost 3 vext3 LHS, <1,u,6,7>
+ 2832518697U, // <3,1,u,7>: Cost 3 vuzpr <2,3,0,1>, RHS
+ 1611892140U, // <3,1,u,u>: Cost 2 vext3 LHS, <1,u,u,3>
+ 2623455232U, // <3,2,0,0>: Cost 3 vext2 <1,0,3,2>, <0,0,0,0>
+ 1549713510U, // <3,2,0,1>: Cost 2 vext2 <1,0,3,2>, LHS
+ 2689836484U, // <3,2,0,2>: Cost 3 vext3 LHS, <2,0,2,0>
+ 2685633997U, // <3,2,0,3>: Cost 3 vext3 LHS, <2,0,3,0>
+ 2623455570U, // <3,2,0,4>: Cost 3 vext2 <1,0,3,2>, <0,4,1,5>
+ 2732967398U, // <3,2,0,5>: Cost 3 vext3 LHS, <2,0,5,7>
+ 2689836524U, // <3,2,0,6>: Cost 3 vext3 LHS, <2,0,6,4>
+ 2229044964U, // <3,2,0,7>: Cost 3 vrev <2,3,7,0>
+ 1549714077U, // <3,2,0,u>: Cost 2 vext2 <1,0,3,2>, LHS
+ 1549714166U, // <3,2,1,0>: Cost 2 vext2 <1,0,3,2>, <1,0,3,2>
+ 2623456052U, // <3,2,1,1>: Cost 3 vext2 <1,0,3,2>, <1,1,1,1>
+ 2623456150U, // <3,2,1,2>: Cost 3 vext2 <1,0,3,2>, <1,2,3,0>
+ 2685634079U, // <3,2,1,3>: Cost 3 vext3 LHS, <2,1,3,1>
+ 2552286518U, // <3,2,1,4>: Cost 3 vext1 <0,3,2,1>, RHS
+ 2623456400U, // <3,2,1,5>: Cost 3 vext2 <1,0,3,2>, <1,5,3,7>
+ 2689836604U, // <3,2,1,6>: Cost 3 vext3 LHS, <2,1,6,3>
+ 3667834101U, // <3,2,1,7>: Cost 4 vext1 <7,3,2,1>, <7,3,2,1>
+ 1155385070U, // <3,2,1,u>: Cost 2 vrev <2,3,u,1>
+ 2689836629U, // <3,2,2,0>: Cost 3 vext3 LHS, <2,2,0,1>
+ 2689836640U, // <3,2,2,1>: Cost 3 vext3 LHS, <2,2,1,3>
+ 1611449960U, // <3,2,2,2>: Cost 2 vext3 LHS, <2,2,2,2>
+ 1611892338U, // <3,2,2,3>: Cost 2 vext3 LHS, <2,2,3,3>
+ 2689836669U, // <3,2,2,4>: Cost 3 vext3 LHS, <2,2,4,5>
+ 2689836680U, // <3,2,2,5>: Cost 3 vext3 LHS, <2,2,5,7>
+ 2689836688U, // <3,2,2,6>: Cost 3 vext3 LHS, <2,2,6,6>
+ 3763578518U, // <3,2,2,7>: Cost 4 vext3 LHS, <2,2,7,3>
+ 1611892383U, // <3,2,2,u>: Cost 2 vext3 LHS, <2,2,u,3>
+ 1611450022U, // <3,2,3,0>: Cost 2 vext3 LHS, <2,3,0,1>
+ 2685191854U, // <3,2,3,1>: Cost 3 vext3 LHS, <2,3,1,0>
+ 2685191865U, // <3,2,3,2>: Cost 3 vext3 LHS, <2,3,2,2>
+ 2685191875U, // <3,2,3,3>: Cost 3 vext3 LHS, <2,3,3,3>
+ 1611450062U, // <3,2,3,4>: Cost 2 vext3 LHS, <2,3,4,5>
+ 2732967635U, // <3,2,3,5>: Cost 3 vext3 LHS, <2,3,5,1>
+ 2732967645U, // <3,2,3,6>: Cost 3 vext3 LHS, <2,3,6,2>
+ 2732967652U, // <3,2,3,7>: Cost 3 vext3 LHS, <2,3,7,0>
+ 1611450094U, // <3,2,3,u>: Cost 2 vext3 LHS, <2,3,u,1>
+ 2558279782U, // <3,2,4,0>: Cost 3 vext1 <1,3,2,4>, LHS
+ 2558280602U, // <3,2,4,1>: Cost 3 vext1 <1,3,2,4>, <1,2,3,4>
+ 2732967692U, // <3,2,4,2>: Cost 3 vext3 LHS, <2,4,2,4>
+ 2685634326U, // <3,2,4,3>: Cost 3 vext3 LHS, <2,4,3,5>
+ 2558283062U, // <3,2,4,4>: Cost 3 vext1 <1,3,2,4>, RHS
+ 1549716790U, // <3,2,4,5>: Cost 2 vext2 <1,0,3,2>, RHS
+ 2689836844U, // <3,2,4,6>: Cost 3 vext3 LHS, <2,4,6,0>
+ 2229077736U, // <3,2,4,7>: Cost 3 vrev <2,3,7,4>
+ 1549717033U, // <3,2,4,u>: Cost 2 vext2 <1,0,3,2>, RHS
+ 2552316006U, // <3,2,5,0>: Cost 3 vext1 <0,3,2,5>, LHS
+ 2228643507U, // <3,2,5,1>: Cost 3 vrev <2,3,1,5>
+ 2689836896U, // <3,2,5,2>: Cost 3 vext3 LHS, <2,5,2,7>
+ 2685634408U, // <3,2,5,3>: Cost 3 vext3 LHS, <2,5,3,6>
+ 1155122894U, // <3,2,5,4>: Cost 2 vrev <2,3,4,5>
+ 2665263108U, // <3,2,5,5>: Cost 3 vext2 <u,0,3,2>, <5,5,5,5>
+ 2689836932U, // <3,2,5,6>: Cost 3 vext3 LHS, <2,5,6,7>
+ 2665263272U, // <3,2,5,7>: Cost 3 vext2 <u,0,3,2>, <5,7,5,7>
+ 1155417842U, // <3,2,5,u>: Cost 2 vrev <2,3,u,5>
+ 2689836953U, // <3,2,6,0>: Cost 3 vext3 LHS, <2,6,0,1>
+ 2689836964U, // <3,2,6,1>: Cost 3 vext3 LHS, <2,6,1,3>
+ 2689836976U, // <3,2,6,2>: Cost 3 vext3 LHS, <2,6,2,6>
+ 1611892666U, // <3,2,6,3>: Cost 2 vext3 LHS, <2,6,3,7>
+ 2689836993U, // <3,2,6,4>: Cost 3 vext3 LHS, <2,6,4,5>
+ 2689837004U, // <3,2,6,5>: Cost 3 vext3 LHS, <2,6,5,7>
+ 2689837013U, // <3,2,6,6>: Cost 3 vext3 LHS, <2,6,6,7>
+ 2665263950U, // <3,2,6,7>: Cost 3 vext2 <u,0,3,2>, <6,7,0,1>
+ 1611892711U, // <3,2,6,u>: Cost 2 vext3 LHS, <2,6,u,7>
+ 2665264122U, // <3,2,7,0>: Cost 3 vext2 <u,0,3,2>, <7,0,1,2>
+ 2623460419U, // <3,2,7,1>: Cost 3 vext2 <1,0,3,2>, <7,1,0,3>
+ 4169138340U, // <3,2,7,2>: Cost 4 vtrnr <1,3,5,7>, <0,2,0,2>
+ 2962358374U, // <3,2,7,3>: Cost 3 vzipr <1,5,3,7>, LHS
+ 2665264486U, // <3,2,7,4>: Cost 3 vext2 <u,0,3,2>, <7,4,5,6>
+ 2228954841U, // <3,2,7,5>: Cost 3 vrev <2,3,5,7>
+ 2229028578U, // <3,2,7,6>: Cost 3 vrev <2,3,6,7>
+ 2665264748U, // <3,2,7,7>: Cost 3 vext2 <u,0,3,2>, <7,7,7,7>
+ 2962358379U, // <3,2,7,u>: Cost 3 vzipr <1,5,3,7>, LHS
+ 1611892795U, // <3,2,u,0>: Cost 2 vext3 LHS, <2,u,0,1>
+ 1549719342U, // <3,2,u,1>: Cost 2 vext2 <1,0,3,2>, LHS
+ 1611449960U, // <3,2,u,2>: Cost 2 vext3 LHS, <2,2,2,2>
+ 1611892824U, // <3,2,u,3>: Cost 2 vext3 LHS, <2,u,3,3>
+ 1611892835U, // <3,2,u,4>: Cost 2 vext3 LHS, <2,u,4,5>
+ 1549719706U, // <3,2,u,5>: Cost 2 vext2 <1,0,3,2>, RHS
+ 2689837168U, // <3,2,u,6>: Cost 3 vext3 LHS, <2,u,6,0>
+ 2665265408U, // <3,2,u,7>: Cost 3 vext2 <u,0,3,2>, <u,7,0,1>
+ 1611892867U, // <3,2,u,u>: Cost 2 vext3 LHS, <2,u,u,1>
+ 2685192331U, // <3,3,0,0>: Cost 3 vext3 LHS, <3,0,0,0>
+ 1611450518U, // <3,3,0,1>: Cost 2 vext3 LHS, <3,0,1,2>
+ 2685634717U, // <3,3,0,2>: Cost 3 vext3 LHS, <3,0,2,0>
+ 2564294806U, // <3,3,0,3>: Cost 3 vext1 <2,3,3,0>, <3,0,1,2>
+ 2685634736U, // <3,3,0,4>: Cost 3 vext3 LHS, <3,0,4,1>
+ 2732968122U, // <3,3,0,5>: Cost 3 vext3 LHS, <3,0,5,2>
+ 3763579075U, // <3,3,0,6>: Cost 4 vext3 LHS, <3,0,6,2>
+ 4034053264U, // <3,3,0,7>: Cost 4 vzipr <1,2,3,0>, <1,5,3,7>
+ 1611450581U, // <3,3,0,u>: Cost 2 vext3 LHS, <3,0,u,2>
+ 2685192415U, // <3,3,1,0>: Cost 3 vext3 LHS, <3,1,0,3>
+ 1550385992U, // <3,3,1,1>: Cost 2 vext2 <1,1,3,3>, <1,1,3,3>
+ 2685192433U, // <3,3,1,2>: Cost 3 vext3 LHS, <3,1,2,3>
+ 2685634808U, // <3,3,1,3>: Cost 3 vext3 LHS, <3,1,3,1>
+ 2558332214U, // <3,3,1,4>: Cost 3 vext1 <1,3,3,1>, RHS
+ 2685634828U, // <3,3,1,5>: Cost 3 vext3 LHS, <3,1,5,3>
+ 3759376661U, // <3,3,1,6>: Cost 4 vext3 LHS, <3,1,6,3>
+ 2703477022U, // <3,3,1,7>: Cost 3 vext3 <3,1,7,3>, <3,1,7,3>
+ 1555031423U, // <3,3,1,u>: Cost 2 vext2 <1,u,3,3>, <1,u,3,3>
+ 2564309094U, // <3,3,2,0>: Cost 3 vext1 <2,3,3,2>, LHS
+ 2630100513U, // <3,3,2,1>: Cost 3 vext2 <2,1,3,3>, <2,1,3,3>
+ 1557022322U, // <3,3,2,2>: Cost 2 vext2 <2,2,3,3>, <2,2,3,3>
+ 2685192520U, // <3,3,2,3>: Cost 3 vext3 LHS, <3,2,3,0>
+ 2564312374U, // <3,3,2,4>: Cost 3 vext1 <2,3,3,2>, RHS
+ 2732968286U, // <3,3,2,5>: Cost 3 vext3 LHS, <3,2,5,4>
+ 2685634918U, // <3,3,2,6>: Cost 3 vext3 LHS, <3,2,6,3>
+ 2704140655U, // <3,3,2,7>: Cost 3 vext3 <3,2,7,3>, <3,2,7,3>
+ 1561004120U, // <3,3,2,u>: Cost 2 vext2 <2,u,3,3>, <2,u,3,3>
+ 1496547430U, // <3,3,3,0>: Cost 2 vext1 <3,3,3,3>, LHS
+ 2624129256U, // <3,3,3,1>: Cost 3 vext2 <1,1,3,3>, <3,1,1,3>
+ 2630764866U, // <3,3,3,2>: Cost 3 vext2 <2,2,3,3>, <3,2,2,3>
+ 336380006U, // <3,3,3,3>: Cost 1 vdup3 LHS
+ 1496550710U, // <3,3,3,4>: Cost 2 vext1 <3,3,3,3>, RHS
+ 2732968368U, // <3,3,3,5>: Cost 3 vext3 LHS, <3,3,5,5>
+ 2624129683U, // <3,3,3,6>: Cost 3 vext2 <1,1,3,3>, <3,6,3,7>
+ 2594182400U, // <3,3,3,7>: Cost 3 vext1 <7,3,3,3>, <7,3,3,3>
+ 336380006U, // <3,3,3,u>: Cost 1 vdup3 LHS
+ 2558353510U, // <3,3,4,0>: Cost 3 vext1 <1,3,3,4>, LHS
+ 2558354411U, // <3,3,4,1>: Cost 3 vext1 <1,3,3,4>, <1,3,3,4>
+ 2564327108U, // <3,3,4,2>: Cost 3 vext1 <2,3,3,4>, <2,3,3,4>
+ 2564327938U, // <3,3,4,3>: Cost 3 vext1 <2,3,3,4>, <3,4,5,6>
+ 2960343962U, // <3,3,4,4>: Cost 3 vzipr <1,2,3,4>, <1,2,3,4>
+ 1611893250U, // <3,3,4,5>: Cost 2 vext3 LHS, <3,4,5,6>
+ 2771619126U, // <3,3,4,6>: Cost 3 vuzpl <3,3,3,3>, RHS
+ 4034086032U, // <3,3,4,7>: Cost 4 vzipr <1,2,3,4>, <1,5,3,7>
+ 1611893277U, // <3,3,4,u>: Cost 2 vext3 LHS, <3,4,u,6>
+ 2558361702U, // <3,3,5,0>: Cost 3 vext1 <1,3,3,5>, LHS
+ 2558362604U, // <3,3,5,1>: Cost 3 vext1 <1,3,3,5>, <1,3,3,5>
+ 2558363342U, // <3,3,5,2>: Cost 3 vext1 <1,3,3,5>, <2,3,4,5>
+ 2732968512U, // <3,3,5,3>: Cost 3 vext3 LHS, <3,5,3,5>
+ 2558364982U, // <3,3,5,4>: Cost 3 vext1 <1,3,3,5>, RHS
+ 3101279950U, // <3,3,5,5>: Cost 3 vtrnr <2,3,4,5>, <2,3,4,5>
+ 2665934946U, // <3,3,5,6>: Cost 3 vext2 <u,1,3,3>, <5,6,7,0>
+ 2826636598U, // <3,3,5,7>: Cost 3 vuzpr <1,3,1,3>, RHS
+ 2826636599U, // <3,3,5,u>: Cost 3 vuzpr <1,3,1,3>, RHS
+ 2732968568U, // <3,3,6,0>: Cost 3 vext3 LHS, <3,6,0,7>
+ 3763579521U, // <3,3,6,1>: Cost 4 vext3 LHS, <3,6,1,7>
+ 2732968586U, // <3,3,6,2>: Cost 3 vext3 LHS, <3,6,2,7>
+ 2732968595U, // <3,3,6,3>: Cost 3 vext3 LHS, <3,6,3,7>
+ 2732968604U, // <3,3,6,4>: Cost 3 vext3 LHS, <3,6,4,7>
+ 3763579557U, // <3,3,6,5>: Cost 4 vext3 LHS, <3,6,5,7>
+ 2732968621U, // <3,3,6,6>: Cost 3 vext3 LHS, <3,6,6,6>
+ 2657973099U, // <3,3,6,7>: Cost 3 vext2 <6,7,3,3>, <6,7,3,3>
+ 2658636732U, // <3,3,6,u>: Cost 3 vext2 <6,u,3,3>, <6,u,3,3>
+ 2558378086U, // <3,3,7,0>: Cost 3 vext1 <1,3,3,7>, LHS
+ 2558378990U, // <3,3,7,1>: Cost 3 vext1 <1,3,3,7>, <1,3,3,7>
+ 2564351687U, // <3,3,7,2>: Cost 3 vext1 <2,3,3,7>, <2,3,3,7>
+ 2661291264U, // <3,3,7,3>: Cost 3 vext2 <7,3,3,3>, <7,3,3,3>
+ 2558381366U, // <3,3,7,4>: Cost 3 vext1 <1,3,3,7>, RHS
+ 2732968694U, // <3,3,7,5>: Cost 3 vext3 LHS, <3,7,5,7>
+ 3781126907U, // <3,3,7,6>: Cost 4 vext3 <3,7,6,3>, <3,7,6,3>
+ 3095397376U, // <3,3,7,7>: Cost 3 vtrnr <1,3,5,7>, <1,3,5,7>
+ 2558383918U, // <3,3,7,u>: Cost 3 vext1 <1,3,3,7>, LHS
+ 1496547430U, // <3,3,u,0>: Cost 2 vext1 <3,3,3,3>, LHS
+ 1611893534U, // <3,3,u,1>: Cost 2 vext3 LHS, <3,u,1,2>
+ 1592858504U, // <3,3,u,2>: Cost 2 vext2 <u,2,3,3>, <u,2,3,3>
+ 336380006U, // <3,3,u,3>: Cost 1 vdup3 LHS
+ 1496550710U, // <3,3,u,4>: Cost 2 vext1 <3,3,3,3>, RHS
+ 1611893574U, // <3,3,u,5>: Cost 2 vext3 LHS, <3,u,5,6>
+ 2690280268U, // <3,3,u,6>: Cost 3 vext3 LHS, <3,u,6,3>
+ 2826636841U, // <3,3,u,7>: Cost 3 vuzpr <1,3,1,3>, RHS
+ 336380006U, // <3,3,u,u>: Cost 1 vdup3 LHS
+ 2624798720U, // <3,4,0,0>: Cost 3 vext2 <1,2,3,4>, <0,0,0,0>
+ 1551056998U, // <3,4,0,1>: Cost 2 vext2 <1,2,3,4>, LHS
+ 2624798884U, // <3,4,0,2>: Cost 3 vext2 <1,2,3,4>, <0,2,0,2>
+ 3693232384U, // <3,4,0,3>: Cost 4 vext2 <0,3,3,4>, <0,3,1,4>
+ 2624799058U, // <3,4,0,4>: Cost 3 vext2 <1,2,3,4>, <0,4,1,5>
+ 1659227026U, // <3,4,0,5>: Cost 2 vext3 LHS, <4,0,5,1>
+ 1659227036U, // <3,4,0,6>: Cost 2 vext3 LHS, <4,0,6,2>
+ 3667973382U, // <3,4,0,7>: Cost 4 vext1 <7,3,4,0>, <7,3,4,0>
+ 1551057565U, // <3,4,0,u>: Cost 2 vext2 <1,2,3,4>, LHS
+ 2624799478U, // <3,4,1,0>: Cost 3 vext2 <1,2,3,4>, <1,0,3,2>
+ 2624799540U, // <3,4,1,1>: Cost 3 vext2 <1,2,3,4>, <1,1,1,1>
+ 1551057818U, // <3,4,1,2>: Cost 2 vext2 <1,2,3,4>, <1,2,3,4>
+ 2624799704U, // <3,4,1,3>: Cost 3 vext2 <1,2,3,4>, <1,3,1,3>
+ 2564377910U, // <3,4,1,4>: Cost 3 vext1 <2,3,4,1>, RHS
+ 2689838050U, // <3,4,1,5>: Cost 3 vext3 LHS, <4,1,5,0>
+ 2689838062U, // <3,4,1,6>: Cost 3 vext3 LHS, <4,1,6,3>
+ 2628117807U, // <3,4,1,7>: Cost 3 vext2 <1,7,3,4>, <1,7,3,4>
+ 1555039616U, // <3,4,1,u>: Cost 2 vext2 <1,u,3,4>, <1,u,3,4>
+ 3626180710U, // <3,4,2,0>: Cost 4 vext1 <0,3,4,2>, LHS
+ 2624800298U, // <3,4,2,1>: Cost 3 vext2 <1,2,3,4>, <2,1,4,3>
+ 2624800360U, // <3,4,2,2>: Cost 3 vext2 <1,2,3,4>, <2,2,2,2>
+ 2624800422U, // <3,4,2,3>: Cost 3 vext2 <1,2,3,4>, <2,3,0,1>
+ 2624800514U, // <3,4,2,4>: Cost 3 vext2 <1,2,3,4>, <2,4,1,3>
+ 2709965878U, // <3,4,2,5>: Cost 3 vext3 <4,2,5,3>, <4,2,5,3>
+ 2689838140U, // <3,4,2,6>: Cost 3 vext3 LHS, <4,2,6,0>
+ 2634090504U, // <3,4,2,7>: Cost 3 vext2 <2,7,3,4>, <2,7,3,4>
+ 2689838158U, // <3,4,2,u>: Cost 3 vext3 LHS, <4,2,u,0>
+ 2624800918U, // <3,4,3,0>: Cost 3 vext2 <1,2,3,4>, <3,0,1,2>
+ 2636081403U, // <3,4,3,1>: Cost 3 vext2 <3,1,3,4>, <3,1,3,4>
+ 2636745036U, // <3,4,3,2>: Cost 3 vext2 <3,2,3,4>, <3,2,3,4>
+ 2624801180U, // <3,4,3,3>: Cost 3 vext2 <1,2,3,4>, <3,3,3,3>
+ 2624801232U, // <3,4,3,4>: Cost 3 vext2 <1,2,3,4>, <3,4,0,1>
+ 2905836854U, // <3,4,3,5>: Cost 3 vzipl <3,3,3,3>, RHS
+ 3040054582U, // <3,4,3,6>: Cost 3 vtrnl <3,3,3,3>, RHS
+ 3702524611U, // <3,4,3,7>: Cost 4 vext2 <1,u,3,4>, <3,7,0,1>
+ 2624801566U, // <3,4,3,u>: Cost 3 vext2 <1,2,3,4>, <3,u,1,2>
+ 2564399206U, // <3,4,4,0>: Cost 3 vext1 <2,3,4,4>, LHS
+ 2564400026U, // <3,4,4,1>: Cost 3 vext1 <2,3,4,4>, <1,2,3,4>
+ 2564400845U, // <3,4,4,2>: Cost 3 vext1 <2,3,4,4>, <2,3,4,4>
+ 2570373542U, // <3,4,4,3>: Cost 3 vext1 <3,3,4,4>, <3,3,4,4>
+ 1659227344U, // <3,4,4,4>: Cost 2 vext3 LHS, <4,4,4,4>
+ 1551060278U, // <3,4,4,5>: Cost 2 vext2 <1,2,3,4>, RHS
+ 1659227364U, // <3,4,4,6>: Cost 2 vext3 LHS, <4,4,6,6>
+ 3668006154U, // <3,4,4,7>: Cost 4 vext1 <7,3,4,4>, <7,3,4,4>
+ 1551060521U, // <3,4,4,u>: Cost 2 vext2 <1,2,3,4>, RHS
+ 1490665574U, // <3,4,5,0>: Cost 2 vext1 <2,3,4,5>, LHS
+ 2689838341U, // <3,4,5,1>: Cost 3 vext3 LHS, <4,5,1,3>
+ 1490667214U, // <3,4,5,2>: Cost 2 vext1 <2,3,4,5>, <2,3,4,5>
+ 2564409494U, // <3,4,5,3>: Cost 3 vext1 <2,3,4,5>, <3,0,1,2>
+ 1490668854U, // <3,4,5,4>: Cost 2 vext1 <2,3,4,5>, RHS
+ 2689838381U, // <3,4,5,5>: Cost 3 vext3 LHS, <4,5,5,7>
+ 537709878U, // <3,4,5,6>: Cost 1 vext3 LHS, RHS
+ 2594272523U, // <3,4,5,7>: Cost 3 vext1 <7,3,4,5>, <7,3,4,5>
+ 537709896U, // <3,4,5,u>: Cost 1 vext3 LHS, RHS
+ 2689838411U, // <3,4,6,0>: Cost 3 vext3 LHS, <4,6,0,1>
+ 2558444534U, // <3,4,6,1>: Cost 3 vext1 <1,3,4,6>, <1,3,4,6>
+ 2666607098U, // <3,4,6,2>: Cost 3 vext2 <u,2,3,4>, <6,2,7,3>
+ 2558446082U, // <3,4,6,3>: Cost 3 vext1 <1,3,4,6>, <3,4,5,6>
+ 1659227508U, // <3,4,6,4>: Cost 2 vext3 LHS, <4,6,4,6>
+ 2689838462U, // <3,4,6,5>: Cost 3 vext3 LHS, <4,6,5,7>
+ 2689838471U, // <3,4,6,6>: Cost 3 vext3 LHS, <4,6,6,7>
+ 2657981292U, // <3,4,6,7>: Cost 3 vext2 <6,7,3,4>, <6,7,3,4>
+ 1659227540U, // <3,4,6,u>: Cost 2 vext3 LHS, <4,6,u,2>
+ 2666607610U, // <3,4,7,0>: Cost 3 vext2 <u,2,3,4>, <7,0,1,2>
+ 3702527072U, // <3,4,7,1>: Cost 4 vext2 <1,u,3,4>, <7,1,3,5>
+ 2660635824U, // <3,4,7,2>: Cost 3 vext2 <7,2,3,4>, <7,2,3,4>
+ 3644139945U, // <3,4,7,3>: Cost 4 vext1 <3,3,4,7>, <3,3,4,7>
+ 2666607974U, // <3,4,7,4>: Cost 3 vext2 <u,2,3,4>, <7,4,5,6>
+ 2732969416U, // <3,4,7,5>: Cost 3 vext3 LHS, <4,7,5,0>
+ 2732969425U, // <3,4,7,6>: Cost 3 vext3 LHS, <4,7,6,0>
+ 2666608236U, // <3,4,7,7>: Cost 3 vext2 <u,2,3,4>, <7,7,7,7>
+ 2664617622U, // <3,4,7,u>: Cost 3 vext2 <7,u,3,4>, <7,u,3,4>
+ 1490690150U, // <3,4,u,0>: Cost 2 vext1 <2,3,4,u>, LHS
+ 1551062830U, // <3,4,u,1>: Cost 2 vext2 <1,2,3,4>, LHS
+ 1490691793U, // <3,4,u,2>: Cost 2 vext1 <2,3,4,u>, <2,3,4,u>
+ 2624804796U, // <3,4,u,3>: Cost 3 vext2 <1,2,3,4>, <u,3,0,1>
+ 1490693430U, // <3,4,u,4>: Cost 2 vext1 <2,3,4,u>, RHS
+ 1551063194U, // <3,4,u,5>: Cost 2 vext2 <1,2,3,4>, RHS
+ 537710121U, // <3,4,u,6>: Cost 1 vext3 LHS, RHS
+ 2594297102U, // <3,4,u,7>: Cost 3 vext1 <7,3,4,u>, <7,3,4,u>
+ 537710139U, // <3,4,u,u>: Cost 1 vext3 LHS, RHS
+ 3692576768U, // <3,5,0,0>: Cost 4 vext2 <0,2,3,5>, <0,0,0,0>
+ 2618835046U, // <3,5,0,1>: Cost 3 vext2 <0,2,3,5>, LHS
+ 2618835138U, // <3,5,0,2>: Cost 3 vext2 <0,2,3,5>, <0,2,3,5>
+ 3692577024U, // <3,5,0,3>: Cost 4 vext2 <0,2,3,5>, <0,3,1,4>
+ 2689838690U, // <3,5,0,4>: Cost 3 vext3 LHS, <5,0,4,1>
+ 2732969579U, // <3,5,0,5>: Cost 3 vext3 LHS, <5,0,5,1>
+ 2732969588U, // <3,5,0,6>: Cost 3 vext3 LHS, <5,0,6,1>
+ 2246963055U, // <3,5,0,7>: Cost 3 vrev <5,3,7,0>
+ 2618835613U, // <3,5,0,u>: Cost 3 vext2 <0,2,3,5>, LHS
+ 2594308198U, // <3,5,1,0>: Cost 3 vext1 <7,3,5,1>, LHS
+ 3692577588U, // <3,5,1,1>: Cost 4 vext2 <0,2,3,5>, <1,1,1,1>
+ 2624807835U, // <3,5,1,2>: Cost 3 vext2 <1,2,3,5>, <1,2,3,5>
+ 2625471468U, // <3,5,1,3>: Cost 3 vext2 <1,3,3,5>, <1,3,3,5>
+ 2626135101U, // <3,5,1,4>: Cost 3 vext2 <1,4,3,5>, <1,4,3,5>
+ 2594311888U, // <3,5,1,5>: Cost 3 vext1 <7,3,5,1>, <5,1,7,3>
+ 3699877107U, // <3,5,1,6>: Cost 4 vext2 <1,4,3,5>, <1,6,5,7>
+ 1641680592U, // <3,5,1,7>: Cost 2 vext3 <5,1,7,3>, <5,1,7,3>
+ 1641754329U, // <3,5,1,u>: Cost 2 vext3 <5,1,u,3>, <5,1,u,3>
+ 3692578274U, // <3,5,2,0>: Cost 4 vext2 <0,2,3,5>, <2,0,5,3>
+ 2630116899U, // <3,5,2,1>: Cost 3 vext2 <2,1,3,5>, <2,1,3,5>
+ 3692578408U, // <3,5,2,2>: Cost 4 vext2 <0,2,3,5>, <2,2,2,2>
+ 2625472206U, // <3,5,2,3>: Cost 3 vext2 <1,3,3,5>, <2,3,4,5>
+ 2632107798U, // <3,5,2,4>: Cost 3 vext2 <2,4,3,5>, <2,4,3,5>
+ 2715938575U, // <3,5,2,5>: Cost 3 vext3 <5,2,5,3>, <5,2,5,3>
+ 3692578746U, // <3,5,2,6>: Cost 4 vext2 <0,2,3,5>, <2,6,3,7>
+ 2716086049U, // <3,5,2,7>: Cost 3 vext3 <5,2,7,3>, <5,2,7,3>
+ 2634762330U, // <3,5,2,u>: Cost 3 vext2 <2,u,3,5>, <2,u,3,5>
+ 3692578966U, // <3,5,3,0>: Cost 4 vext2 <0,2,3,5>, <3,0,1,2>
+ 2636089596U, // <3,5,3,1>: Cost 3 vext2 <3,1,3,5>, <3,1,3,5>
+ 3699214668U, // <3,5,3,2>: Cost 4 vext2 <1,3,3,5>, <3,2,3,4>
+ 2638080412U, // <3,5,3,3>: Cost 3 vext2 <3,4,3,5>, <3,3,3,3>
+ 2618837506U, // <3,5,3,4>: Cost 3 vext2 <0,2,3,5>, <3,4,5,6>
+ 2832844494U, // <3,5,3,5>: Cost 3 vuzpr <2,3,4,5>, <2,3,4,5>
+ 4033415682U, // <3,5,3,6>: Cost 4 vzipr <1,1,3,3>, <3,4,5,6>
+ 3095072054U, // <3,5,3,7>: Cost 3 vtrnr <1,3,1,3>, RHS
+ 3095072055U, // <3,5,3,u>: Cost 3 vtrnr <1,3,1,3>, RHS
+ 2600304742U, // <3,5,4,0>: Cost 3 vext1 <u,3,5,4>, LHS
+ 3763580815U, // <3,5,4,1>: Cost 4 vext3 LHS, <5,4,1,5>
+ 2564474582U, // <3,5,4,2>: Cost 3 vext1 <2,3,5,4>, <2,3,5,4>
+ 3699879044U, // <3,5,4,3>: Cost 4 vext2 <1,4,3,5>, <4,3,5,0>
+ 2600308022U, // <3,5,4,4>: Cost 3 vext1 <u,3,5,4>, RHS
+ 2618838326U, // <3,5,4,5>: Cost 3 vext2 <0,2,3,5>, RHS
+ 2772454710U, // <3,5,4,6>: Cost 3 vuzpl <3,4,5,6>, RHS
+ 1659228102U, // <3,5,4,7>: Cost 2 vext3 LHS, <5,4,7,6>
+ 1659228111U, // <3,5,4,u>: Cost 2 vext3 LHS, <5,4,u,6>
+ 2570453094U, // <3,5,5,0>: Cost 3 vext1 <3,3,5,5>, LHS
+ 2624810704U, // <3,5,5,1>: Cost 3 vext2 <1,2,3,5>, <5,1,7,3>
+ 2570454734U, // <3,5,5,2>: Cost 3 vext1 <3,3,5,5>, <2,3,4,5>
+ 2570455472U, // <3,5,5,3>: Cost 3 vext1 <3,3,5,5>, <3,3,5,5>
+ 2570456374U, // <3,5,5,4>: Cost 3 vext1 <3,3,5,5>, RHS
+ 1659228164U, // <3,5,5,5>: Cost 2 vext3 LHS, <5,5,5,5>
+ 2732969998U, // <3,5,5,6>: Cost 3 vext3 LHS, <5,5,6,6>
+ 1659228184U, // <3,5,5,7>: Cost 2 vext3 LHS, <5,5,7,7>
+ 1659228193U, // <3,5,5,u>: Cost 2 vext3 LHS, <5,5,u,7>
+ 2732970020U, // <3,5,6,0>: Cost 3 vext3 LHS, <5,6,0,1>
+ 2732970035U, // <3,5,6,1>: Cost 3 vext3 LHS, <5,6,1,7>
+ 2564490968U, // <3,5,6,2>: Cost 3 vext1 <2,3,5,6>, <2,3,5,6>
+ 2732970050U, // <3,5,6,3>: Cost 3 vext3 LHS, <5,6,3,4>
+ 2732970060U, // <3,5,6,4>: Cost 3 vext3 LHS, <5,6,4,5>
+ 2732970071U, // <3,5,6,5>: Cost 3 vext3 LHS, <5,6,5,7>
+ 2732970080U, // <3,5,6,6>: Cost 3 vext3 LHS, <5,6,6,7>
+ 1659228258U, // <3,5,6,7>: Cost 2 vext3 LHS, <5,6,7,0>
+ 1659228267U, // <3,5,6,u>: Cost 2 vext3 LHS, <5,6,u,0>
+ 1484783718U, // <3,5,7,0>: Cost 2 vext1 <1,3,5,7>, LHS
+ 1484784640U, // <3,5,7,1>: Cost 2 vext1 <1,3,5,7>, <1,3,5,7>
+ 2558527080U, // <3,5,7,2>: Cost 3 vext1 <1,3,5,7>, <2,2,2,2>
+ 2558527638U, // <3,5,7,3>: Cost 3 vext1 <1,3,5,7>, <3,0,1,2>
+ 1484786998U, // <3,5,7,4>: Cost 2 vext1 <1,3,5,7>, RHS
+ 1659228328U, // <3,5,7,5>: Cost 2 vext3 LHS, <5,7,5,7>
+ 2732970154U, // <3,5,7,6>: Cost 3 vext3 LHS, <5,7,6,0>
+ 2558531180U, // <3,5,7,7>: Cost 3 vext1 <1,3,5,7>, <7,7,7,7>
+ 1484789550U, // <3,5,7,u>: Cost 2 vext1 <1,3,5,7>, LHS
+ 1484791910U, // <3,5,u,0>: Cost 2 vext1 <1,3,5,u>, LHS
+ 1484792833U, // <3,5,u,1>: Cost 2 vext1 <1,3,5,u>, <1,3,5,u>
+ 2558535272U, // <3,5,u,2>: Cost 3 vext1 <1,3,5,u>, <2,2,2,2>
+ 2558535830U, // <3,5,u,3>: Cost 3 vext1 <1,3,5,u>, <3,0,1,2>
+ 1484795190U, // <3,5,u,4>: Cost 2 vext1 <1,3,5,u>, RHS
+ 1659228409U, // <3,5,u,5>: Cost 2 vext3 LHS, <5,u,5,7>
+ 2772457626U, // <3,5,u,6>: Cost 3 vuzpl <3,4,5,6>, RHS
+ 1646326023U, // <3,5,u,7>: Cost 2 vext3 <5,u,7,3>, <5,u,7,3>
+ 1484797742U, // <3,5,u,u>: Cost 2 vext1 <1,3,5,u>, LHS
+ 2558541926U, // <3,6,0,0>: Cost 3 vext1 <1,3,6,0>, LHS
+ 2689839393U, // <3,6,0,1>: Cost 3 vext3 LHS, <6,0,1,2>
+ 2689839404U, // <3,6,0,2>: Cost 3 vext3 LHS, <6,0,2,4>
+ 3706519808U, // <3,6,0,3>: Cost 4 vext2 <2,5,3,6>, <0,3,1,4>
+ 2689839420U, // <3,6,0,4>: Cost 3 vext3 LHS, <6,0,4,2>
+ 2732970314U, // <3,6,0,5>: Cost 3 vext3 LHS, <6,0,5,7>
+ 2732970316U, // <3,6,0,6>: Cost 3 vext3 LHS, <6,0,6,0>
+ 2960313654U, // <3,6,0,7>: Cost 3 vzipr <1,2,3,0>, RHS
+ 2689839456U, // <3,6,0,u>: Cost 3 vext3 LHS, <6,0,u,2>
+ 3763581290U, // <3,6,1,0>: Cost 4 vext3 LHS, <6,1,0,3>
+ 3763581297U, // <3,6,1,1>: Cost 4 vext3 LHS, <6,1,1,1>
+ 2624816028U, // <3,6,1,2>: Cost 3 vext2 <1,2,3,6>, <1,2,3,6>
+ 3763581315U, // <3,6,1,3>: Cost 4 vext3 LHS, <6,1,3,1>
+ 2626143294U, // <3,6,1,4>: Cost 3 vext2 <1,4,3,6>, <1,4,3,6>
+ 3763581335U, // <3,6,1,5>: Cost 4 vext3 LHS, <6,1,5,3>
+ 2721321376U, // <3,6,1,6>: Cost 3 vext3 <6,1,6,3>, <6,1,6,3>
+ 2721395113U, // <3,6,1,7>: Cost 3 vext3 <6,1,7,3>, <6,1,7,3>
+ 2628797826U, // <3,6,1,u>: Cost 3 vext2 <1,u,3,6>, <1,u,3,6>
+ 2594390118U, // <3,6,2,0>: Cost 3 vext1 <7,3,6,2>, LHS
+ 2721616324U, // <3,6,2,1>: Cost 3 vext3 <6,2,1,3>, <6,2,1,3>
+ 2630788725U, // <3,6,2,2>: Cost 3 vext2 <2,2,3,6>, <2,2,3,6>
+ 3763581395U, // <3,6,2,3>: Cost 4 vext3 LHS, <6,2,3,0>
+ 2632115991U, // <3,6,2,4>: Cost 3 vext2 <2,4,3,6>, <2,4,3,6>
+ 2632779624U, // <3,6,2,5>: Cost 3 vext2 <2,5,3,6>, <2,5,3,6>
+ 2594394618U, // <3,6,2,6>: Cost 3 vext1 <7,3,6,2>, <6,2,7,3>
+ 1648316922U, // <3,6,2,7>: Cost 2 vext3 <6,2,7,3>, <6,2,7,3>
+ 1648390659U, // <3,6,2,u>: Cost 2 vext3 <6,2,u,3>, <6,2,u,3>
+ 3693914262U, // <3,6,3,0>: Cost 4 vext2 <0,4,3,6>, <3,0,1,2>
+ 3638281176U, // <3,6,3,1>: Cost 4 vext1 <2,3,6,3>, <1,3,1,3>
+ 3696568678U, // <3,6,3,2>: Cost 4 vext2 <0,u,3,6>, <3,2,6,3>
+ 2638088604U, // <3,6,3,3>: Cost 3 vext2 <3,4,3,6>, <3,3,3,3>
+ 2632780290U, // <3,6,3,4>: Cost 3 vext2 <2,5,3,6>, <3,4,5,6>
+ 3712494145U, // <3,6,3,5>: Cost 4 vext2 <3,5,3,6>, <3,5,3,6>
+ 3698559612U, // <3,6,3,6>: Cost 4 vext2 <1,2,3,6>, <3,6,1,2>
+ 2959674678U, // <3,6,3,7>: Cost 3 vzipr <1,1,3,3>, RHS
+ 2959674679U, // <3,6,3,u>: Cost 3 vzipr <1,1,3,3>, RHS
+ 3763581536U, // <3,6,4,0>: Cost 4 vext3 LHS, <6,4,0,6>
+ 2722943590U, // <3,6,4,1>: Cost 3 vext3 <6,4,1,3>, <6,4,1,3>
+ 2732970609U, // <3,6,4,2>: Cost 3 vext3 LHS, <6,4,2,5>
+ 3698560147U, // <3,6,4,3>: Cost 4 vext2 <1,2,3,6>, <4,3,6,6>
+ 2732970628U, // <3,6,4,4>: Cost 3 vext3 LHS, <6,4,4,6>
+ 2689839757U, // <3,6,4,5>: Cost 3 vext3 LHS, <6,4,5,6>
+ 2732970640U, // <3,6,4,6>: Cost 3 vext3 LHS, <6,4,6,0>
+ 2960346422U, // <3,6,4,7>: Cost 3 vzipr <1,2,3,4>, RHS
+ 2689839784U, // <3,6,4,u>: Cost 3 vext3 LHS, <6,4,u,6>
+ 2576498790U, // <3,6,5,0>: Cost 3 vext1 <4,3,6,5>, LHS
+ 3650241270U, // <3,6,5,1>: Cost 4 vext1 <4,3,6,5>, <1,0,3,2>
+ 2732970692U, // <3,6,5,2>: Cost 3 vext3 LHS, <6,5,2,7>
+ 2576501250U, // <3,6,5,3>: Cost 3 vext1 <4,3,6,5>, <3,4,5,6>
+ 2576501906U, // <3,6,5,4>: Cost 3 vext1 <4,3,6,5>, <4,3,6,5>
+ 3650244622U, // <3,6,5,5>: Cost 4 vext1 <4,3,6,5>, <5,5,6,6>
+ 4114633528U, // <3,6,5,6>: Cost 4 vtrnl <3,4,5,6>, <6,6,6,6>
+ 2732970735U, // <3,6,5,7>: Cost 3 vext3 LHS, <6,5,7,5>
+ 2576504622U, // <3,6,5,u>: Cost 3 vext1 <4,3,6,5>, LHS
+ 2732970749U, // <3,6,6,0>: Cost 3 vext3 LHS, <6,6,0,1>
+ 2724270856U, // <3,6,6,1>: Cost 3 vext3 <6,6,1,3>, <6,6,1,3>
+ 2624819706U, // <3,6,6,2>: Cost 3 vext2 <1,2,3,6>, <6,2,7,3>
+ 3656223234U, // <3,6,6,3>: Cost 4 vext1 <5,3,6,6>, <3,4,5,6>
+ 2732970788U, // <3,6,6,4>: Cost 3 vext3 LHS, <6,6,4,4>
+ 2732970800U, // <3,6,6,5>: Cost 3 vext3 LHS, <6,6,5,7>
+ 1659228984U, // <3,6,6,6>: Cost 2 vext3 LHS, <6,6,6,6>
+ 1659228994U, // <3,6,6,7>: Cost 2 vext3 LHS, <6,6,7,7>
+ 1659229003U, // <3,6,6,u>: Cost 2 vext3 LHS, <6,6,u,7>
+ 1659229006U, // <3,6,7,0>: Cost 2 vext3 LHS, <6,7,0,1>
+ 2558600201U, // <3,6,7,1>: Cost 3 vext1 <1,3,6,7>, <1,3,6,7>
+ 2558601146U, // <3,6,7,2>: Cost 3 vext1 <1,3,6,7>, <2,6,3,7>
+ 2725081963U, // <3,6,7,3>: Cost 3 vext3 <6,7,3,3>, <6,7,3,3>
+ 1659229046U, // <3,6,7,4>: Cost 2 vext3 LHS, <6,7,4,5>
+ 2715423611U, // <3,6,7,5>: Cost 3 vext3 <5,1,7,3>, <6,7,5,1>
+ 2722059141U, // <3,6,7,6>: Cost 3 vext3 <6,2,7,3>, <6,7,6,2>
+ 2962361654U, // <3,6,7,7>: Cost 3 vzipr <1,5,3,7>, RHS
+ 1659229078U, // <3,6,7,u>: Cost 2 vext3 LHS, <6,7,u,1>
+ 1659229087U, // <3,6,u,0>: Cost 2 vext3 LHS, <6,u,0,1>
+ 2689840041U, // <3,6,u,1>: Cost 3 vext3 LHS, <6,u,1,2>
+ 2558609339U, // <3,6,u,2>: Cost 3 vext1 <1,3,6,u>, <2,6,3,u>
+ 2576525853U, // <3,6,u,3>: Cost 3 vext1 <4,3,6,u>, <3,4,u,6>
+ 1659229127U, // <3,6,u,4>: Cost 2 vext3 LHS, <6,u,4,5>
+ 2689840081U, // <3,6,u,5>: Cost 3 vext3 LHS, <6,u,5,6>
+ 1659228984U, // <3,6,u,6>: Cost 2 vext3 LHS, <6,6,6,6>
+ 1652298720U, // <3,6,u,7>: Cost 2 vext3 <6,u,7,3>, <6,u,7,3>
+ 1659229159U, // <3,6,u,u>: Cost 2 vext3 LHS, <6,u,u,1>
+ 2626813952U, // <3,7,0,0>: Cost 3 vext2 <1,5,3,7>, <0,0,0,0>
+ 1553072230U, // <3,7,0,1>: Cost 2 vext2 <1,5,3,7>, LHS
+ 2626814116U, // <3,7,0,2>: Cost 3 vext2 <1,5,3,7>, <0,2,0,2>
+ 3700556028U, // <3,7,0,3>: Cost 4 vext2 <1,5,3,7>, <0,3,1,0>
+ 2626814290U, // <3,7,0,4>: Cost 3 vext2 <1,5,3,7>, <0,4,1,5>
+ 2582507375U, // <3,7,0,5>: Cost 3 vext1 <5,3,7,0>, <5,3,7,0>
+ 2588480072U, // <3,7,0,6>: Cost 3 vext1 <6,3,7,0>, <6,3,7,0>
+ 2732971055U, // <3,7,0,7>: Cost 3 vext3 LHS, <7,0,7,1>
+ 1553072797U, // <3,7,0,u>: Cost 2 vext2 <1,5,3,7>, LHS
+ 2626814710U, // <3,7,1,0>: Cost 3 vext2 <1,5,3,7>, <1,0,3,2>
+ 2626814772U, // <3,7,1,1>: Cost 3 vext2 <1,5,3,7>, <1,1,1,1>
+ 2626814870U, // <3,7,1,2>: Cost 3 vext2 <1,5,3,7>, <1,2,3,0>
+ 2625487854U, // <3,7,1,3>: Cost 3 vext2 <1,3,3,7>, <1,3,3,7>
+ 2582514998U, // <3,7,1,4>: Cost 3 vext1 <5,3,7,1>, RHS
+ 1553073296U, // <3,7,1,5>: Cost 2 vext2 <1,5,3,7>, <1,5,3,7>
+ 2627478753U, // <3,7,1,6>: Cost 3 vext2 <1,6,3,7>, <1,6,3,7>
+ 2727367810U, // <3,7,1,7>: Cost 3 vext3 <7,1,7,3>, <7,1,7,3>
+ 1555064195U, // <3,7,1,u>: Cost 2 vext2 <1,u,3,7>, <1,u,3,7>
+ 2588491878U, // <3,7,2,0>: Cost 3 vext1 <6,3,7,2>, LHS
+ 3700557318U, // <3,7,2,1>: Cost 4 vext2 <1,5,3,7>, <2,1,0,3>
+ 2626815592U, // <3,7,2,2>: Cost 3 vext2 <1,5,3,7>, <2,2,2,2>
+ 2626815654U, // <3,7,2,3>: Cost 3 vext2 <1,5,3,7>, <2,3,0,1>
+ 2588495158U, // <3,7,2,4>: Cost 3 vext1 <6,3,7,2>, RHS
+ 2632787817U, // <3,7,2,5>: Cost 3 vext2 <2,5,3,7>, <2,5,3,7>
+ 1559709626U, // <3,7,2,6>: Cost 2 vext2 <2,6,3,7>, <2,6,3,7>
+ 2728031443U, // <3,7,2,7>: Cost 3 vext3 <7,2,7,3>, <7,2,7,3>
+ 1561036892U, // <3,7,2,u>: Cost 2 vext2 <2,u,3,7>, <2,u,3,7>
+ 2626816150U, // <3,7,3,0>: Cost 3 vext2 <1,5,3,7>, <3,0,1,2>
+ 2626816268U, // <3,7,3,1>: Cost 3 vext2 <1,5,3,7>, <3,1,5,3>
+ 2633451878U, // <3,7,3,2>: Cost 3 vext2 <2,6,3,7>, <3,2,6,3>
+ 2626816412U, // <3,7,3,3>: Cost 3 vext2 <1,5,3,7>, <3,3,3,3>
+ 2626816514U, // <3,7,3,4>: Cost 3 vext2 <1,5,3,7>, <3,4,5,6>
+ 2638760514U, // <3,7,3,5>: Cost 3 vext2 <3,5,3,7>, <3,5,3,7>
+ 2639424147U, // <3,7,3,6>: Cost 3 vext2 <3,6,3,7>, <3,6,3,7>
+ 2826961920U, // <3,7,3,7>: Cost 3 vuzpr <1,3,5,7>, <1,3,5,7>
+ 2626816798U, // <3,7,3,u>: Cost 3 vext2 <1,5,3,7>, <3,u,1,2>
+ 2582536294U, // <3,7,4,0>: Cost 3 vext1 <5,3,7,4>, LHS
+ 2582537360U, // <3,7,4,1>: Cost 3 vext1 <5,3,7,4>, <1,5,3,7>
+ 2588510138U, // <3,7,4,2>: Cost 3 vext1 <6,3,7,4>, <2,6,3,7>
+ 3700558996U, // <3,7,4,3>: Cost 4 vext2 <1,5,3,7>, <4,3,6,7>
+ 2582539574U, // <3,7,4,4>: Cost 3 vext1 <5,3,7,4>, RHS
+ 1553075510U, // <3,7,4,5>: Cost 2 vext2 <1,5,3,7>, RHS
+ 2588512844U, // <3,7,4,6>: Cost 3 vext1 <6,3,7,4>, <6,3,7,4>
+ 2564625766U, // <3,7,4,7>: Cost 3 vext1 <2,3,7,4>, <7,4,5,6>
+ 1553075753U, // <3,7,4,u>: Cost 2 vext2 <1,5,3,7>, RHS
+ 2732971398U, // <3,7,5,0>: Cost 3 vext3 LHS, <7,5,0,2>
+ 2626817744U, // <3,7,5,1>: Cost 3 vext2 <1,5,3,7>, <5,1,7,3>
+ 3700559649U, // <3,7,5,2>: Cost 4 vext2 <1,5,3,7>, <5,2,7,3>
+ 2626817903U, // <3,7,5,3>: Cost 3 vext2 <1,5,3,7>, <5,3,7,0>
+ 2258728203U, // <3,7,5,4>: Cost 3 vrev <7,3,4,5>
+ 2732971446U, // <3,7,5,5>: Cost 3 vext3 LHS, <7,5,5,5>
+ 2732971457U, // <3,7,5,6>: Cost 3 vext3 LHS, <7,5,6,7>
+ 2826964278U, // <3,7,5,7>: Cost 3 vuzpr <1,3,5,7>, RHS
+ 2826964279U, // <3,7,5,u>: Cost 3 vuzpr <1,3,5,7>, RHS
+ 2732971478U, // <3,7,6,0>: Cost 3 vext3 LHS, <7,6,0,1>
+ 2732971486U, // <3,7,6,1>: Cost 3 vext3 LHS, <7,6,1,0>
+ 2633454074U, // <3,7,6,2>: Cost 3 vext2 <2,6,3,7>, <6,2,7,3>
+ 2633454152U, // <3,7,6,3>: Cost 3 vext2 <2,6,3,7>, <6,3,7,0>
+ 2732971518U, // <3,7,6,4>: Cost 3 vext3 LHS, <7,6,4,5>
+ 2732971526U, // <3,7,6,5>: Cost 3 vext3 LHS, <7,6,5,4>
+ 2732971537U, // <3,7,6,6>: Cost 3 vext3 LHS, <7,6,6,6>
+ 2732971540U, // <3,7,6,7>: Cost 3 vext3 LHS, <7,6,7,0>
+ 2726041124U, // <3,7,6,u>: Cost 3 vext3 <6,u,7,3>, <7,6,u,7>
+ 2570616934U, // <3,7,7,0>: Cost 3 vext1 <3,3,7,7>, LHS
+ 2570617856U, // <3,7,7,1>: Cost 3 vext1 <3,3,7,7>, <1,3,5,7>
+ 2564646635U, // <3,7,7,2>: Cost 3 vext1 <2,3,7,7>, <2,3,7,7>
+ 2570619332U, // <3,7,7,3>: Cost 3 vext1 <3,3,7,7>, <3,3,7,7>
+ 2570620214U, // <3,7,7,4>: Cost 3 vext1 <3,3,7,7>, RHS
+ 2582564726U, // <3,7,7,5>: Cost 3 vext1 <5,3,7,7>, <5,3,7,7>
+ 2588537423U, // <3,7,7,6>: Cost 3 vext1 <6,3,7,7>, <6,3,7,7>
+ 1659229804U, // <3,7,7,7>: Cost 2 vext3 LHS, <7,7,7,7>
+ 1659229804U, // <3,7,7,u>: Cost 2 vext3 LHS, <7,7,7,7>
+ 2626819795U, // <3,7,u,0>: Cost 3 vext2 <1,5,3,7>, <u,0,1,2>
+ 1553078062U, // <3,7,u,1>: Cost 2 vext2 <1,5,3,7>, LHS
+ 2626819973U, // <3,7,u,2>: Cost 3 vext2 <1,5,3,7>, <u,2,3,0>
+ 2826961565U, // <3,7,u,3>: Cost 3 vuzpr <1,3,5,7>, LHS
+ 2626820159U, // <3,7,u,4>: Cost 3 vext2 <1,5,3,7>, <u,4,5,6>
+ 1553078426U, // <3,7,u,5>: Cost 2 vext2 <1,5,3,7>, RHS
+ 1595545808U, // <3,7,u,6>: Cost 2 vext2 <u,6,3,7>, <u,6,3,7>
+ 1659229804U, // <3,7,u,7>: Cost 2 vext3 LHS, <7,7,7,7>
+ 1553078629U, // <3,7,u,u>: Cost 2 vext2 <1,5,3,7>, LHS
+ 1611448320U, // <3,u,0,0>: Cost 2 vext3 LHS, <0,0,0,0>
+ 1611896531U, // <3,u,0,1>: Cost 2 vext3 LHS, <u,0,1,2>
+ 1659672284U, // <3,u,0,2>: Cost 2 vext3 LHS, <u,0,2,2>
+ 1616099045U, // <3,u,0,3>: Cost 2 vext3 LHS, <u,0,3,2>
+ 2685638381U, // <3,u,0,4>: Cost 3 vext3 LHS, <u,0,4,1>
+ 1663874806U, // <3,u,0,5>: Cost 2 vext3 LHS, <u,0,5,1>
+ 1663874816U, // <3,u,0,6>: Cost 2 vext3 LHS, <u,0,6,2>
+ 2960313672U, // <3,u,0,7>: Cost 3 vzipr <1,2,3,0>, RHS
+ 1611896594U, // <3,u,0,u>: Cost 2 vext3 LHS, <u,0,u,2>
+ 1549763324U, // <3,u,1,0>: Cost 2 vext2 <1,0,3,u>, <1,0,3,u>
+ 1550426957U, // <3,u,1,1>: Cost 2 vext2 <1,1,3,u>, <1,1,3,u>
+ 537712430U, // <3,u,1,2>: Cost 1 vext3 LHS, LHS
+ 1616541495U, // <3,u,1,3>: Cost 2 vext3 LHS, <u,1,3,3>
+ 1490930998U, // <3,u,1,4>: Cost 2 vext1 <2,3,u,1>, RHS
+ 1553081489U, // <3,u,1,5>: Cost 2 vext2 <1,5,3,u>, <1,5,3,u>
+ 2627486946U, // <3,u,1,6>: Cost 3 vext2 <1,6,3,u>, <1,6,3,u>
+ 1659230043U, // <3,u,1,7>: Cost 2 vext3 LHS, <u,1,7,3>
+ 537712484U, // <3,u,1,u>: Cost 1 vext3 LHS, LHS
+ 1611890852U, // <3,u,2,0>: Cost 2 vext3 LHS, <0,2,0,2>
+ 2624833102U, // <3,u,2,1>: Cost 3 vext2 <1,2,3,u>, <2,1,u,3>
+ 1557063287U, // <3,u,2,2>: Cost 2 vext2 <2,2,3,u>, <2,2,3,u>
+ 1616099205U, // <3,u,2,3>: Cost 2 vext3 LHS, <u,2,3,0>
+ 1611890892U, // <3,u,2,4>: Cost 2 vext3 LHS, <0,2,4,6>
+ 2689841054U, // <3,u,2,5>: Cost 3 vext3 LHS, <u,2,5,7>
+ 1559717819U, // <3,u,2,6>: Cost 2 vext2 <2,6,3,u>, <2,6,3,u>
+ 1659230124U, // <3,u,2,7>: Cost 2 vext3 LHS, <u,2,7,3>
+ 1616541618U, // <3,u,2,u>: Cost 2 vext3 LHS, <u,2,u,0>
+ 1611896764U, // <3,u,3,0>: Cost 2 vext3 LHS, <u,3,0,1>
+ 1484973079U, // <3,u,3,1>: Cost 2 vext1 <1,3,u,3>, <1,3,u,3>
+ 2685638607U, // <3,u,3,2>: Cost 3 vext3 LHS, <u,3,2,2>
+ 336380006U, // <3,u,3,3>: Cost 1 vdup3 LHS
+ 1611896804U, // <3,u,3,4>: Cost 2 vext3 LHS, <u,3,4,5>
+ 1616541679U, // <3,u,3,5>: Cost 2 vext3 LHS, <u,3,5,7>
+ 2690283512U, // <3,u,3,6>: Cost 3 vext3 LHS, <u,3,6,7>
+ 2959674696U, // <3,u,3,7>: Cost 3 vzipr <1,1,3,3>, RHS
+ 336380006U, // <3,u,3,u>: Cost 1 vdup3 LHS
+ 2558722150U, // <3,u,4,0>: Cost 3 vext1 <1,3,u,4>, LHS
+ 1659672602U, // <3,u,4,1>: Cost 2 vext3 LHS, <u,4,1,5>
+ 1659672612U, // <3,u,4,2>: Cost 2 vext3 LHS, <u,4,2,6>
+ 2689841196U, // <3,u,4,3>: Cost 3 vext3 LHS, <u,4,3,5>
+ 1659227344U, // <3,u,4,4>: Cost 2 vext3 LHS, <4,4,4,4>
+ 1611896895U, // <3,u,4,5>: Cost 2 vext3 LHS, <u,4,5,6>
+ 1663875144U, // <3,u,4,6>: Cost 2 vext3 LHS, <u,4,6,6>
+ 1659230289U, // <3,u,4,7>: Cost 2 vext3 LHS, <u,4,7,6>
+ 1611896922U, // <3,u,4,u>: Cost 2 vext3 LHS, <u,4,u,6>
+ 1490960486U, // <3,u,5,0>: Cost 2 vext1 <2,3,u,5>, LHS
+ 2689841261U, // <3,u,5,1>: Cost 3 vext3 LHS, <u,5,1,7>
+ 1490962162U, // <3,u,5,2>: Cost 2 vext1 <2,3,u,5>, <2,3,u,5>
+ 1616541823U, // <3,u,5,3>: Cost 2 vext3 LHS, <u,5,3,7>
+ 1490963766U, // <3,u,5,4>: Cost 2 vext1 <2,3,u,5>, RHS
+ 1659228164U, // <3,u,5,5>: Cost 2 vext3 LHS, <5,5,5,5>
+ 537712794U, // <3,u,5,6>: Cost 1 vext3 LHS, RHS
+ 1659230371U, // <3,u,5,7>: Cost 2 vext3 LHS, <u,5,7,7>
+ 537712812U, // <3,u,5,u>: Cost 1 vext3 LHS, RHS
+ 2689841327U, // <3,u,6,0>: Cost 3 vext3 LHS, <u,6,0,1>
+ 2558739482U, // <3,u,6,1>: Cost 3 vext1 <1,3,u,6>, <1,3,u,6>
+ 2689841351U, // <3,u,6,2>: Cost 3 vext3 LHS, <u,6,2,7>
+ 1616099536U, // <3,u,6,3>: Cost 2 vext3 LHS, <u,6,3,7>
+ 1659227508U, // <3,u,6,4>: Cost 2 vext3 LHS, <4,6,4,6>
+ 2690283746U, // <3,u,6,5>: Cost 3 vext3 LHS, <u,6,5,7>
+ 1659228984U, // <3,u,6,6>: Cost 2 vext3 LHS, <6,6,6,6>
+ 1659230445U, // <3,u,6,7>: Cost 2 vext3 LHS, <u,6,7,0>
+ 1616099581U, // <3,u,6,u>: Cost 2 vext3 LHS, <u,6,u,7>
+ 1485004902U, // <3,u,7,0>: Cost 2 vext1 <1,3,u,7>, LHS
+ 1485005851U, // <3,u,7,1>: Cost 2 vext1 <1,3,u,7>, <1,3,u,7>
+ 2558748264U, // <3,u,7,2>: Cost 3 vext1 <1,3,u,7>, <2,2,2,2>
+ 3095397021U, // <3,u,7,3>: Cost 3 vtrnr <1,3,5,7>, LHS
+ 1485008182U, // <3,u,7,4>: Cost 2 vext1 <1,3,u,7>, RHS
+ 1659228328U, // <3,u,7,5>: Cost 2 vext3 LHS, <5,7,5,7>
+ 2722060599U, // <3,u,7,6>: Cost 3 vext3 <6,2,7,3>, <u,7,6,2>
+ 1659229804U, // <3,u,7,7>: Cost 2 vext3 LHS, <7,7,7,7>
+ 1485010734U, // <3,u,7,u>: Cost 2 vext1 <1,3,u,7>, LHS
+ 1616099665U, // <3,u,u,0>: Cost 2 vext3 LHS, <u,u,0,1>
+ 1611897179U, // <3,u,u,1>: Cost 2 vext3 LHS, <u,u,1,2>
+ 537712997U, // <3,u,u,2>: Cost 1 vext3 LHS, LHS
+ 336380006U, // <3,u,u,3>: Cost 1 vdup3 LHS
+ 1616099705U, // <3,u,u,4>: Cost 2 vext3 LHS, <u,u,4,5>
+ 1611897219U, // <3,u,u,5>: Cost 2 vext3 LHS, <u,u,5,6>
+ 537713037U, // <3,u,u,6>: Cost 1 vext3 LHS, RHS
+ 1659230607U, // <3,u,u,7>: Cost 2 vext3 LHS, <u,u,7,0>
+ 537713051U, // <3,u,u,u>: Cost 1 vext3 LHS, LHS
+ 2691907584U, // <4,0,0,0>: Cost 3 vext3 <1,2,3,4>, <0,0,0,0>
+ 2691907594U, // <4,0,0,1>: Cost 3 vext3 <1,2,3,4>, <0,0,1,1>
+ 2691907604U, // <4,0,0,2>: Cost 3 vext3 <1,2,3,4>, <0,0,2,2>
+ 3709862144U, // <4,0,0,3>: Cost 4 vext2 <3,1,4,0>, <0,3,1,4>
+ 2684682280U, // <4,0,0,4>: Cost 3 vext3 <0,0,4,4>, <0,0,4,4>
+ 3694600633U, // <4,0,0,5>: Cost 4 vext2 <0,5,4,0>, <0,5,4,0>
+ 3291431290U, // <4,0,0,6>: Cost 4 vrev <0,4,6,0>
+ 3668342067U, // <4,0,0,7>: Cost 4 vext1 <7,4,0,0>, <7,4,0,0>
+ 2691907657U, // <4,0,0,u>: Cost 3 vext3 <1,2,3,4>, <0,0,u,1>
+ 2570715238U, // <4,0,1,0>: Cost 3 vext1 <3,4,0,1>, LHS
+ 2570716058U, // <4,0,1,1>: Cost 3 vext1 <3,4,0,1>, <1,2,3,4>
+ 1618165862U, // <4,0,1,2>: Cost 2 vext3 <1,2,3,4>, LHS
+ 2570717648U, // <4,0,1,3>: Cost 3 vext1 <3,4,0,1>, <3,4,0,1>
+ 2570718518U, // <4,0,1,4>: Cost 3 vext1 <3,4,0,1>, RHS
+ 2594607206U, // <4,0,1,5>: Cost 3 vext1 <7,4,0,1>, <5,6,7,4>
+ 3662377563U, // <4,0,1,6>: Cost 4 vext1 <6,4,0,1>, <6,4,0,1>
+ 2594608436U, // <4,0,1,7>: Cost 3 vext1 <7,4,0,1>, <7,4,0,1>
+ 1618165916U, // <4,0,1,u>: Cost 2 vext3 <1,2,3,4>, LHS
+ 2685714598U, // <4,0,2,0>: Cost 3 vext3 <0,2,0,4>, <0,2,0,4>
+ 3759530159U, // <4,0,2,1>: Cost 4 vext3 <0,2,1,4>, <0,2,1,4>
+ 2685862072U, // <4,0,2,2>: Cost 3 vext3 <0,2,2,4>, <0,2,2,4>
+ 2631476937U, // <4,0,2,3>: Cost 3 vext2 <2,3,4,0>, <2,3,4,0>
+ 2685714636U, // <4,0,2,4>: Cost 3 vext3 <0,2,0,4>, <0,2,4,6>
+ 3765649622U, // <4,0,2,5>: Cost 4 vext3 <1,2,3,4>, <0,2,5,7>
+ 2686157020U, // <4,0,2,6>: Cost 3 vext3 <0,2,6,4>, <0,2,6,4>
+ 3668358453U, // <4,0,2,7>: Cost 4 vext1 <7,4,0,2>, <7,4,0,2>
+ 2686304494U, // <4,0,2,u>: Cost 3 vext3 <0,2,u,4>, <0,2,u,4>
+ 3632529510U, // <4,0,3,0>: Cost 4 vext1 <1,4,0,3>, LHS
+ 2686451968U, // <4,0,3,1>: Cost 3 vext3 <0,3,1,4>, <0,3,1,4>
+ 2686525705U, // <4,0,3,2>: Cost 3 vext3 <0,3,2,4>, <0,3,2,4>
+ 3760341266U, // <4,0,3,3>: Cost 4 vext3 <0,3,3,4>, <0,3,3,4>
+ 3632532790U, // <4,0,3,4>: Cost 4 vext1 <1,4,0,3>, RHS
+ 3913254606U, // <4,0,3,5>: Cost 4 vuzpr <3,4,5,0>, <2,3,4,5>
+ 3705219740U, // <4,0,3,6>: Cost 4 vext2 <2,3,4,0>, <3,6,4,7>
+ 3713845990U, // <4,0,3,7>: Cost 4 vext2 <3,7,4,0>, <3,7,4,0>
+ 2686451968U, // <4,0,3,u>: Cost 3 vext3 <0,3,1,4>, <0,3,1,4>
+ 2552823910U, // <4,0,4,0>: Cost 3 vext1 <0,4,0,4>, LHS
+ 2691907922U, // <4,0,4,1>: Cost 3 vext3 <1,2,3,4>, <0,4,1,5>
+ 2691907932U, // <4,0,4,2>: Cost 3 vext3 <1,2,3,4>, <0,4,2,6>
+ 3626567830U, // <4,0,4,3>: Cost 4 vext1 <0,4,0,4>, <3,0,1,2>
+ 2552827190U, // <4,0,4,4>: Cost 3 vext1 <0,4,0,4>, RHS
+ 2631478582U, // <4,0,4,5>: Cost 3 vext2 <2,3,4,0>, RHS
+ 3626570017U, // <4,0,4,6>: Cost 4 vext1 <0,4,0,4>, <6,0,1,2>
+ 3668374839U, // <4,0,4,7>: Cost 4 vext1 <7,4,0,4>, <7,4,0,4>
+ 2552829742U, // <4,0,4,u>: Cost 3 vext1 <0,4,0,4>, LHS
+ 2558804070U, // <4,0,5,0>: Cost 3 vext1 <1,4,0,5>, LHS
+ 1839644774U, // <4,0,5,1>: Cost 2 vzipl RHS, LHS
+ 2913386660U, // <4,0,5,2>: Cost 3 vzipl RHS, <0,2,0,2>
+ 2570750420U, // <4,0,5,3>: Cost 3 vext1 <3,4,0,5>, <3,4,0,5>
+ 2558807350U, // <4,0,5,4>: Cost 3 vext1 <1,4,0,5>, RHS
+ 3987128750U, // <4,0,5,5>: Cost 4 vzipl RHS, <0,5,2,7>
+ 3987128822U, // <4,0,5,6>: Cost 4 vzipl RHS, <0,6,1,7>
+ 2594641208U, // <4,0,5,7>: Cost 3 vext1 <7,4,0,5>, <7,4,0,5>
+ 1839645341U, // <4,0,5,u>: Cost 2 vzipl RHS, LHS
+ 2552840294U, // <4,0,6,0>: Cost 3 vext1 <0,4,0,6>, LHS
+ 3047604234U, // <4,0,6,1>: Cost 3 vtrnl RHS, <0,0,1,1>
+ 1973862502U, // <4,0,6,2>: Cost 2 vtrnl RHS, LHS
+ 2570758613U, // <4,0,6,3>: Cost 3 vext1 <3,4,0,6>, <3,4,0,6>
+ 2552843574U, // <4,0,6,4>: Cost 3 vext1 <0,4,0,6>, RHS
+ 2217664887U, // <4,0,6,5>: Cost 3 vrev <0,4,5,6>
+ 3662418528U, // <4,0,6,6>: Cost 4 vext1 <6,4,0,6>, <6,4,0,6>
+ 2658022257U, // <4,0,6,7>: Cost 3 vext2 <6,7,4,0>, <6,7,4,0>
+ 1973862556U, // <4,0,6,u>: Cost 2 vtrnl RHS, LHS
+ 3731764218U, // <4,0,7,0>: Cost 4 vext2 <6,7,4,0>, <7,0,1,2>
+ 3988324454U, // <4,0,7,1>: Cost 4 vzipl <4,7,5,0>, LHS
+ 4122034278U, // <4,0,7,2>: Cost 4 vtrnl <4,6,7,1>, LHS
+ 3735082246U, // <4,0,7,3>: Cost 4 vext2 <7,3,4,0>, <7,3,4,0>
+ 3731764536U, // <4,0,7,4>: Cost 4 vext2 <6,7,4,0>, <7,4,0,5>
+ 3937145718U, // <4,0,7,5>: Cost 4 vuzpr <7,4,5,0>, <6,7,4,5>
+ 3737073145U, // <4,0,7,6>: Cost 4 vext2 <7,6,4,0>, <7,6,4,0>
+ 3731764844U, // <4,0,7,7>: Cost 4 vext2 <6,7,4,0>, <7,7,7,7>
+ 4122034332U, // <4,0,7,u>: Cost 4 vtrnl <4,6,7,1>, LHS
+ 2552856678U, // <4,0,u,0>: Cost 3 vext1 <0,4,0,u>, LHS
+ 1841635430U, // <4,0,u,1>: Cost 2 vzipl RHS, LHS
+ 1618166429U, // <4,0,u,2>: Cost 2 vext3 <1,2,3,4>, LHS
+ 2570774999U, // <4,0,u,3>: Cost 3 vext1 <3,4,0,u>, <3,4,0,u>
+ 2552859958U, // <4,0,u,4>: Cost 3 vext1 <0,4,0,u>, RHS
+ 2631481498U, // <4,0,u,5>: Cost 3 vext2 <2,3,4,0>, RHS
+ 2686157020U, // <4,0,u,6>: Cost 3 vext3 <0,2,6,4>, <0,2,6,4>
+ 2594665787U, // <4,0,u,7>: Cost 3 vext1 <7,4,0,u>, <7,4,0,u>
+ 1618166483U, // <4,0,u,u>: Cost 2 vext3 <1,2,3,4>, LHS
+ 2617548837U, // <4,1,0,0>: Cost 3 vext2 <0,0,4,1>, <0,0,4,1>
+ 2622857318U, // <4,1,0,1>: Cost 3 vext2 <0,u,4,1>, LHS
+ 3693281484U, // <4,1,0,2>: Cost 4 vext2 <0,3,4,1>, <0,2,4,6>
+ 2691908342U, // <4,1,0,3>: Cost 3 vext3 <1,2,3,4>, <1,0,3,2>
+ 2622857554U, // <4,1,0,4>: Cost 3 vext2 <0,u,4,1>, <0,4,1,5>
+ 3764470538U, // <4,1,0,5>: Cost 4 vext3 <1,0,5,4>, <1,0,5,4>
+ 3695272459U, // <4,1,0,6>: Cost 4 vext2 <0,6,4,1>, <0,6,4,1>
+ 3733094980U, // <4,1,0,7>: Cost 4 vext2 <7,0,4,1>, <0,7,1,4>
+ 2622857885U, // <4,1,0,u>: Cost 3 vext2 <0,u,4,1>, LHS
+ 3696599798U, // <4,1,1,0>: Cost 4 vext2 <0,u,4,1>, <1,0,3,2>
+ 2691097399U, // <4,1,1,1>: Cost 3 vext3 <1,1,1,4>, <1,1,1,4>
+ 2631484314U, // <4,1,1,2>: Cost 3 vext2 <2,3,4,1>, <1,2,3,4>
+ 2691908424U, // <4,1,1,3>: Cost 3 vext3 <1,2,3,4>, <1,1,3,3>
+ 3696600125U, // <4,1,1,4>: Cost 4 vext2 <0,u,4,1>, <1,4,3,5>
+ 3696600175U, // <4,1,1,5>: Cost 4 vext2 <0,u,4,1>, <1,5,0,1>
+ 3696600307U, // <4,1,1,6>: Cost 4 vext2 <0,u,4,1>, <1,6,5,7>
+ 3668423997U, // <4,1,1,7>: Cost 4 vext1 <7,4,1,1>, <7,4,1,1>
+ 2691908469U, // <4,1,1,u>: Cost 3 vext3 <1,2,3,4>, <1,1,u,3>
+ 2570797158U, // <4,1,2,0>: Cost 3 vext1 <3,4,1,2>, LHS
+ 2570797978U, // <4,1,2,1>: Cost 3 vext1 <3,4,1,2>, <1,2,3,4>
+ 3696600680U, // <4,1,2,2>: Cost 4 vext2 <0,u,4,1>, <2,2,2,2>
+ 1618166682U, // <4,1,2,3>: Cost 2 vext3 <1,2,3,4>, <1,2,3,4>
+ 2570800438U, // <4,1,2,4>: Cost 3 vext1 <3,4,1,2>, RHS
+ 3765650347U, // <4,1,2,5>: Cost 4 vext3 <1,2,3,4>, <1,2,5,3>
+ 3696601018U, // <4,1,2,6>: Cost 4 vext2 <0,u,4,1>, <2,6,3,7>
+ 3668432190U, // <4,1,2,7>: Cost 4 vext1 <7,4,1,2>, <7,4,1,2>
+ 1618535367U, // <4,1,2,u>: Cost 2 vext3 <1,2,u,4>, <1,2,u,4>
+ 2564833382U, // <4,1,3,0>: Cost 3 vext1 <2,4,1,3>, LHS
+ 2691908568U, // <4,1,3,1>: Cost 3 vext3 <1,2,3,4>, <1,3,1,3>
+ 2691908578U, // <4,1,3,2>: Cost 3 vext3 <1,2,3,4>, <1,3,2,4>
+ 2692572139U, // <4,1,3,3>: Cost 3 vext3 <1,3,3,4>, <1,3,3,4>
+ 2564836662U, // <4,1,3,4>: Cost 3 vext1 <2,4,1,3>, RHS
+ 2691908608U, // <4,1,3,5>: Cost 3 vext3 <1,2,3,4>, <1,3,5,7>
+ 2588725862U, // <4,1,3,6>: Cost 3 vext1 <6,4,1,3>, <6,4,1,3>
+ 3662468090U, // <4,1,3,7>: Cost 4 vext1 <6,4,1,3>, <7,0,1,2>
+ 2691908631U, // <4,1,3,u>: Cost 3 vext3 <1,2,3,4>, <1,3,u,3>
+ 3760194590U, // <4,1,4,0>: Cost 4 vext3 <0,3,1,4>, <1,4,0,1>
+ 3693947874U, // <4,1,4,1>: Cost 4 vext2 <0,4,4,1>, <4,1,5,0>
+ 3765650484U, // <4,1,4,2>: Cost 4 vext3 <1,2,3,4>, <1,4,2,5>
+ 3113877606U, // <4,1,4,3>: Cost 3 vtrnr <4,4,4,4>, LHS
+ 3760194630U, // <4,1,4,4>: Cost 4 vext3 <0,3,1,4>, <1,4,4,5>
+ 2622860598U, // <4,1,4,5>: Cost 3 vext2 <0,u,4,1>, RHS
+ 3297436759U, // <4,1,4,6>: Cost 4 vrev <1,4,6,4>
+ 3800007772U, // <4,1,4,7>: Cost 4 vext3 <7,0,1,4>, <1,4,7,0>
+ 2622860841U, // <4,1,4,u>: Cost 3 vext2 <0,u,4,1>, RHS
+ 1479164006U, // <4,1,5,0>: Cost 2 vext1 <0,4,1,5>, LHS
+ 2552906486U, // <4,1,5,1>: Cost 3 vext1 <0,4,1,5>, <1,0,3,2>
+ 2552907299U, // <4,1,5,2>: Cost 3 vext1 <0,4,1,5>, <2,1,3,5>
+ 2552907926U, // <4,1,5,3>: Cost 3 vext1 <0,4,1,5>, <3,0,1,2>
+ 1479167286U, // <4,1,5,4>: Cost 2 vext1 <0,4,1,5>, RHS
+ 2913387664U, // <4,1,5,5>: Cost 3 vzipl RHS, <1,5,3,7>
+ 2600686074U, // <4,1,5,6>: Cost 3 vext1 <u,4,1,5>, <6,2,7,3>
+ 2600686586U, // <4,1,5,7>: Cost 3 vext1 <u,4,1,5>, <7,0,1,2>
+ 1479169838U, // <4,1,5,u>: Cost 2 vext1 <0,4,1,5>, LHS
+ 2552914022U, // <4,1,6,0>: Cost 3 vext1 <0,4,1,6>, LHS
+ 2558886708U, // <4,1,6,1>: Cost 3 vext1 <1,4,1,6>, <1,1,1,1>
+ 4028205206U, // <4,1,6,2>: Cost 4 vzipr <0,2,4,6>, <3,0,1,2>
+ 3089858662U, // <4,1,6,3>: Cost 3 vtrnr <0,4,2,6>, LHS
+ 2552917302U, // <4,1,6,4>: Cost 3 vext1 <0,4,1,6>, RHS
+ 2223637584U, // <4,1,6,5>: Cost 3 vrev <1,4,5,6>
+ 4121347081U, // <4,1,6,6>: Cost 4 vtrnl RHS, <1,3,6,7>
+ 3721155406U, // <4,1,6,7>: Cost 4 vext2 <5,0,4,1>, <6,7,0,1>
+ 2552919854U, // <4,1,6,u>: Cost 3 vext1 <0,4,1,6>, LHS
+ 2659357716U, // <4,1,7,0>: Cost 3 vext2 <7,0,4,1>, <7,0,4,1>
+ 3733763173U, // <4,1,7,1>: Cost 4 vext2 <7,1,4,1>, <7,1,4,1>
+ 3734426806U, // <4,1,7,2>: Cost 4 vext2 <7,2,4,1>, <7,2,4,1>
+ 2695226671U, // <4,1,7,3>: Cost 3 vext3 <1,7,3,4>, <1,7,3,4>
+ 3721155942U, // <4,1,7,4>: Cost 4 vext2 <5,0,4,1>, <7,4,5,6>
+ 3721155976U, // <4,1,7,5>: Cost 4 vext2 <5,0,4,1>, <7,5,0,4>
+ 3662500458U, // <4,1,7,6>: Cost 4 vext1 <6,4,1,7>, <6,4,1,7>
+ 3721156204U, // <4,1,7,7>: Cost 4 vext2 <5,0,4,1>, <7,7,7,7>
+ 2659357716U, // <4,1,7,u>: Cost 3 vext2 <7,0,4,1>, <7,0,4,1>
+ 1479188582U, // <4,1,u,0>: Cost 2 vext1 <0,4,1,u>, LHS
+ 2552931062U, // <4,1,u,1>: Cost 3 vext1 <0,4,1,u>, <1,0,3,2>
+ 2552931944U, // <4,1,u,2>: Cost 3 vext1 <0,4,1,u>, <2,2,2,2>
+ 1622148480U, // <4,1,u,3>: Cost 2 vext3 <1,u,3,4>, <1,u,3,4>
+ 1479191862U, // <4,1,u,4>: Cost 2 vext1 <0,4,1,u>, RHS
+ 2622863514U, // <4,1,u,5>: Cost 3 vext2 <0,u,4,1>, RHS
+ 2588725862U, // <4,1,u,6>: Cost 3 vext1 <6,4,1,3>, <6,4,1,3>
+ 2600686586U, // <4,1,u,7>: Cost 3 vext1 <u,4,1,5>, <7,0,1,2>
+ 1479194414U, // <4,1,u,u>: Cost 2 vext1 <0,4,1,u>, LHS
+ 2617557030U, // <4,2,0,0>: Cost 3 vext2 <0,0,4,2>, <0,0,4,2>
+ 2622865510U, // <4,2,0,1>: Cost 3 vext2 <0,u,4,2>, LHS
+ 2622865612U, // <4,2,0,2>: Cost 3 vext2 <0,u,4,2>, <0,2,4,6>
+ 3693289753U, // <4,2,0,3>: Cost 4 vext2 <0,3,4,2>, <0,3,4,2>
+ 2635473244U, // <4,2,0,4>: Cost 3 vext2 <3,0,4,2>, <0,4,2,6>
+ 3765650918U, // <4,2,0,5>: Cost 4 vext3 <1,2,3,4>, <2,0,5,7>
+ 2696775148U, // <4,2,0,6>: Cost 3 vext3 <2,0,6,4>, <2,0,6,4>
+ 3695944285U, // <4,2,0,7>: Cost 4 vext2 <0,7,4,2>, <0,7,4,2>
+ 2622866077U, // <4,2,0,u>: Cost 3 vext2 <0,u,4,2>, LHS
+ 3696607990U, // <4,2,1,0>: Cost 4 vext2 <0,u,4,2>, <1,0,3,2>
+ 3696608052U, // <4,2,1,1>: Cost 4 vext2 <0,u,4,2>, <1,1,1,1>
+ 3696608150U, // <4,2,1,2>: Cost 4 vext2 <0,u,4,2>, <1,2,3,0>
+ 3895574630U, // <4,2,1,3>: Cost 4 vuzpr <0,4,u,2>, LHS
+ 2691909162U, // <4,2,1,4>: Cost 3 vext3 <1,2,3,4>, <2,1,4,3>
+ 3696608400U, // <4,2,1,5>: Cost 4 vext2 <0,u,4,2>, <1,5,3,7>
+ 3760784956U, // <4,2,1,6>: Cost 4 vext3 <0,4,0,4>, <2,1,6,3>
+ 3773908549U, // <4,2,1,7>: Cost 5 vext3 <2,5,7,4>, <2,1,7,3>
+ 2691909162U, // <4,2,1,u>: Cost 3 vext3 <1,2,3,4>, <2,1,4,3>
+ 3696608748U, // <4,2,2,0>: Cost 4 vext2 <0,u,4,2>, <2,0,6,4>
+ 3696608828U, // <4,2,2,1>: Cost 4 vext2 <0,u,4,2>, <2,1,6,3>
+ 2691909224U, // <4,2,2,2>: Cost 3 vext3 <1,2,3,4>, <2,2,2,2>
+ 2691909234U, // <4,2,2,3>: Cost 3 vext3 <1,2,3,4>, <2,2,3,3>
+ 3759605368U, // <4,2,2,4>: Cost 4 vext3 <0,2,2,4>, <2,2,4,0>
+ 3696609156U, // <4,2,2,5>: Cost 4 vext2 <0,u,4,2>, <2,5,6,7>
+ 3760785040U, // <4,2,2,6>: Cost 4 vext3 <0,4,0,4>, <2,2,6,6>
+ 3668505927U, // <4,2,2,7>: Cost 4 vext1 <7,4,2,2>, <7,4,2,2>
+ 2691909279U, // <4,2,2,u>: Cost 3 vext3 <1,2,3,4>, <2,2,u,3>
+ 2691909286U, // <4,2,3,0>: Cost 3 vext3 <1,2,3,4>, <2,3,0,1>
+ 3764840111U, // <4,2,3,1>: Cost 4 vext3 <1,1,1,4>, <2,3,1,1>
+ 3765651129U, // <4,2,3,2>: Cost 4 vext3 <1,2,3,4>, <2,3,2,2>
+ 2698544836U, // <4,2,3,3>: Cost 3 vext3 <2,3,3,4>, <2,3,3,4>
+ 2685863630U, // <4,2,3,4>: Cost 3 vext3 <0,2,2,4>, <2,3,4,5>
+ 2698692310U, // <4,2,3,5>: Cost 3 vext3 <2,3,5,4>, <2,3,5,4>
+ 3772507871U, // <4,2,3,6>: Cost 4 vext3 <2,3,6,4>, <2,3,6,4>
+ 2698839784U, // <4,2,3,7>: Cost 3 vext3 <2,3,7,4>, <2,3,7,4>
+ 2691909358U, // <4,2,3,u>: Cost 3 vext3 <1,2,3,4>, <2,3,u,1>
+ 2564915302U, // <4,2,4,0>: Cost 3 vext1 <2,4,2,4>, LHS
+ 2564916122U, // <4,2,4,1>: Cost 3 vext1 <2,4,2,4>, <1,2,3,4>
+ 2564917004U, // <4,2,4,2>: Cost 3 vext1 <2,4,2,4>, <2,4,2,4>
+ 2699208469U, // <4,2,4,3>: Cost 3 vext3 <2,4,3,4>, <2,4,3,4>
+ 2564918582U, // <4,2,4,4>: Cost 3 vext1 <2,4,2,4>, RHS
+ 2622868790U, // <4,2,4,5>: Cost 3 vext2 <0,u,4,2>, RHS
+ 2229667632U, // <4,2,4,6>: Cost 3 vrev <2,4,6,4>
+ 3800082229U, // <4,2,4,7>: Cost 4 vext3 <7,0,2,4>, <2,4,7,0>
+ 2622869033U, // <4,2,4,u>: Cost 3 vext2 <0,u,4,2>, RHS
+ 2552979558U, // <4,2,5,0>: Cost 3 vext1 <0,4,2,5>, LHS
+ 2558952342U, // <4,2,5,1>: Cost 3 vext1 <1,4,2,5>, <1,2,3,0>
+ 2564925032U, // <4,2,5,2>: Cost 3 vext1 <2,4,2,5>, <2,2,2,2>
+ 2967060582U, // <4,2,5,3>: Cost 3 vzipr <2,3,4,5>, LHS
+ 2552982838U, // <4,2,5,4>: Cost 3 vext1 <0,4,2,5>, RHS
+ 3987130190U, // <4,2,5,5>: Cost 4 vzipl RHS, <2,5,0,7>
+ 2913388474U, // <4,2,5,6>: Cost 3 vzipl RHS, <2,6,3,7>
+ 3895577910U, // <4,2,5,7>: Cost 4 vuzpr <0,4,u,2>, RHS
+ 2552985390U, // <4,2,5,u>: Cost 3 vext1 <0,4,2,5>, LHS
+ 1479245926U, // <4,2,6,0>: Cost 2 vext1 <0,4,2,6>, LHS
+ 2552988406U, // <4,2,6,1>: Cost 3 vext1 <0,4,2,6>, <1,0,3,2>
+ 2552989288U, // <4,2,6,2>: Cost 3 vext1 <0,4,2,6>, <2,2,2,2>
+ 2954461286U, // <4,2,6,3>: Cost 3 vzipr <0,2,4,6>, LHS
+ 1479249206U, // <4,2,6,4>: Cost 2 vext1 <0,4,2,6>, RHS
+ 2229610281U, // <4,2,6,5>: Cost 3 vrev <2,4,5,6>
+ 2600767994U, // <4,2,6,6>: Cost 3 vext1 <u,4,2,6>, <6,2,7,3>
+ 2600768506U, // <4,2,6,7>: Cost 3 vext1 <u,4,2,6>, <7,0,1,2>
+ 1479251758U, // <4,2,6,u>: Cost 2 vext1 <0,4,2,6>, LHS
+ 2659365909U, // <4,2,7,0>: Cost 3 vext2 <7,0,4,2>, <7,0,4,2>
+ 3733771366U, // <4,2,7,1>: Cost 4 vext2 <7,1,4,2>, <7,1,4,2>
+ 3734434999U, // <4,2,7,2>: Cost 4 vext2 <7,2,4,2>, <7,2,4,2>
+ 2701199368U, // <4,2,7,3>: Cost 3 vext3 <2,7,3,4>, <2,7,3,4>
+ 4175774618U, // <4,2,7,4>: Cost 4 vtrnr <2,4,5,7>, <1,2,3,4>
+ 3303360298U, // <4,2,7,5>: Cost 4 vrev <2,4,5,7>
+ 3727136217U, // <4,2,7,6>: Cost 4 vext2 <6,0,4,2>, <7,6,0,4>
+ 3727136364U, // <4,2,7,7>: Cost 4 vext2 <6,0,4,2>, <7,7,7,7>
+ 2659365909U, // <4,2,7,u>: Cost 3 vext2 <7,0,4,2>, <7,0,4,2>
+ 1479262310U, // <4,2,u,0>: Cost 2 vext1 <0,4,2,u>, LHS
+ 2553004790U, // <4,2,u,1>: Cost 3 vext1 <0,4,2,u>, <1,0,3,2>
+ 2553005672U, // <4,2,u,2>: Cost 3 vext1 <0,4,2,u>, <2,2,2,2>
+ 2954477670U, // <4,2,u,3>: Cost 3 vzipr <0,2,4,u>, LHS
+ 1479265590U, // <4,2,u,4>: Cost 2 vext1 <0,4,2,u>, RHS
+ 2622871706U, // <4,2,u,5>: Cost 3 vext2 <0,u,4,2>, RHS
+ 2229700404U, // <4,2,u,6>: Cost 3 vrev <2,4,6,u>
+ 2600784890U, // <4,2,u,7>: Cost 3 vext1 <u,4,2,u>, <7,0,1,2>
+ 1479268142U, // <4,2,u,u>: Cost 2 vext1 <0,4,2,u>, LHS
+ 3765651595U, // <4,3,0,0>: Cost 4 vext3 <1,2,3,4>, <3,0,0,0>
+ 2691909782U, // <4,3,0,1>: Cost 3 vext3 <1,2,3,4>, <3,0,1,2>
+ 2702452897U, // <4,3,0,2>: Cost 3 vext3 <3,0,2,4>, <3,0,2,4>
+ 3693297946U, // <4,3,0,3>: Cost 4 vext2 <0,3,4,3>, <0,3,4,3>
+ 3760711856U, // <4,3,0,4>: Cost 4 vext3 <0,3,u,4>, <3,0,4,1>
+ 2235533820U, // <4,3,0,5>: Cost 3 vrev <3,4,5,0>
+ 3309349381U, // <4,3,0,6>: Cost 4 vrev <3,4,6,0>
+ 3668563278U, // <4,3,0,7>: Cost 4 vext1 <7,4,3,0>, <7,4,3,0>
+ 2691909845U, // <4,3,0,u>: Cost 3 vext3 <1,2,3,4>, <3,0,u,2>
+ 2235173328U, // <4,3,1,0>: Cost 3 vrev <3,4,0,1>
+ 3764840678U, // <4,3,1,1>: Cost 4 vext3 <1,1,1,4>, <3,1,1,1>
+ 2630173594U, // <4,3,1,2>: Cost 3 vext2 <2,1,4,3>, <1,2,3,4>
+ 2703190267U, // <4,3,1,3>: Cost 3 vext3 <3,1,3,4>, <3,1,3,4>
+ 3760195840U, // <4,3,1,4>: Cost 4 vext3 <0,3,1,4>, <3,1,4,0>
+ 3765651724U, // <4,3,1,5>: Cost 4 vext3 <1,2,3,4>, <3,1,5,3>
+ 3309357574U, // <4,3,1,6>: Cost 4 vrev <3,4,6,1>
+ 3769633054U, // <4,3,1,7>: Cost 4 vext3 <1,u,3,4>, <3,1,7,3>
+ 2703558952U, // <4,3,1,u>: Cost 3 vext3 <3,1,u,4>, <3,1,u,4>
+ 3626770534U, // <4,3,2,0>: Cost 4 vext1 <0,4,3,2>, LHS
+ 2630174250U, // <4,3,2,1>: Cost 3 vext2 <2,1,4,3>, <2,1,4,3>
+ 3765651777U, // <4,3,2,2>: Cost 4 vext3 <1,2,3,4>, <3,2,2,2>
+ 2703853900U, // <4,3,2,3>: Cost 3 vext3 <3,2,3,4>, <3,2,3,4>
+ 3626773814U, // <4,3,2,4>: Cost 4 vext1 <0,4,3,2>, RHS
+ 2704001374U, // <4,3,2,5>: Cost 3 vext3 <3,2,5,4>, <3,2,5,4>
+ 3765651814U, // <4,3,2,6>: Cost 4 vext3 <1,2,3,4>, <3,2,6,3>
+ 3769633135U, // <4,3,2,7>: Cost 4 vext3 <1,u,3,4>, <3,2,7,3>
+ 2634819681U, // <4,3,2,u>: Cost 3 vext2 <2,u,4,3>, <2,u,4,3>
+ 3765651839U, // <4,3,3,0>: Cost 4 vext3 <1,2,3,4>, <3,3,0,1>
+ 3765651848U, // <4,3,3,1>: Cost 4 vext3 <1,2,3,4>, <3,3,1,1>
+ 3710552404U, // <4,3,3,2>: Cost 4 vext2 <3,2,4,3>, <3,2,4,3>
+ 2691910044U, // <4,3,3,3>: Cost 3 vext3 <1,2,3,4>, <3,3,3,3>
+ 2704591270U, // <4,3,3,4>: Cost 3 vext3 <3,3,4,4>, <3,3,4,4>
+ 3769633202U, // <4,3,3,5>: Cost 4 vext3 <1,u,3,4>, <3,3,5,7>
+ 3703917212U, // <4,3,3,6>: Cost 4 vext2 <2,1,4,3>, <3,6,4,7>
+ 3769633220U, // <4,3,3,7>: Cost 4 vext3 <1,u,3,4>, <3,3,7,7>
+ 2691910044U, // <4,3,3,u>: Cost 3 vext3 <1,2,3,4>, <3,3,3,3>
+ 2691910096U, // <4,3,4,0>: Cost 3 vext3 <1,2,3,4>, <3,4,0,1>
+ 2691910106U, // <4,3,4,1>: Cost 3 vext3 <1,2,3,4>, <3,4,1,2>
+ 2564990741U, // <4,3,4,2>: Cost 3 vext1 <2,4,3,4>, <2,4,3,4>
+ 3765651946U, // <4,3,4,3>: Cost 4 vext3 <1,2,3,4>, <3,4,3,0>
+ 2691910136U, // <4,3,4,4>: Cost 3 vext3 <1,2,3,4>, <3,4,4,5>
+ 2686454274U, // <4,3,4,5>: Cost 3 vext3 <0,3,1,4>, <3,4,5,6>
+ 2235640329U, // <4,3,4,6>: Cost 3 vrev <3,4,6,4>
+ 3801483792U, // <4,3,4,7>: Cost 4 vext3 <7,2,3,4>, <3,4,7,2>
+ 2691910168U, // <4,3,4,u>: Cost 3 vext3 <1,2,3,4>, <3,4,u,1>
+ 2559025254U, // <4,3,5,0>: Cost 3 vext1 <1,4,3,5>, LHS
+ 2559026237U, // <4,3,5,1>: Cost 3 vext1 <1,4,3,5>, <1,4,3,5>
+ 2564998862U, // <4,3,5,2>: Cost 3 vext1 <2,4,3,5>, <2,3,4,5>
+ 2570971548U, // <4,3,5,3>: Cost 3 vext1 <3,4,3,5>, <3,3,3,3>
+ 2559028534U, // <4,3,5,4>: Cost 3 vext1 <1,4,3,5>, RHS
+ 4163519477U, // <4,3,5,5>: Cost 4 vtrnr <0,4,1,5>, <1,3,4,5>
+ 3309390346U, // <4,3,5,6>: Cost 4 vrev <3,4,6,5>
+ 2706139747U, // <4,3,5,7>: Cost 3 vext3 <3,5,7,4>, <3,5,7,4>
+ 2559031086U, // <4,3,5,u>: Cost 3 vext1 <1,4,3,5>, LHS
+ 2559033446U, // <4,3,6,0>: Cost 3 vext1 <1,4,3,6>, LHS
+ 2559034430U, // <4,3,6,1>: Cost 3 vext1 <1,4,3,6>, <1,4,3,6>
+ 2565007127U, // <4,3,6,2>: Cost 3 vext1 <2,4,3,6>, <2,4,3,6>
+ 2570979740U, // <4,3,6,3>: Cost 3 vext1 <3,4,3,6>, <3,3,3,3>
+ 2559036726U, // <4,3,6,4>: Cost 3 vext1 <1,4,3,6>, RHS
+ 1161841154U, // <4,3,6,5>: Cost 2 vrev <3,4,5,6>
+ 4028203932U, // <4,3,6,6>: Cost 4 vzipr <0,2,4,6>, <1,2,3,6>
+ 2706803380U, // <4,3,6,7>: Cost 3 vext3 <3,6,7,4>, <3,6,7,4>
+ 1162062365U, // <4,3,6,u>: Cost 2 vrev <3,4,u,6>
+ 3769633475U, // <4,3,7,0>: Cost 4 vext3 <1,u,3,4>, <3,7,0,1>
+ 3769633488U, // <4,3,7,1>: Cost 4 vext3 <1,u,3,4>, <3,7,1,5>
+ 3638757144U, // <4,3,7,2>: Cost 4 vext1 <2,4,3,7>, <2,4,3,7>
+ 3769633508U, // <4,3,7,3>: Cost 4 vext3 <1,u,3,4>, <3,7,3,7>
+ 3769633515U, // <4,3,7,4>: Cost 4 vext3 <1,u,3,4>, <3,7,4,5>
+ 3769633526U, // <4,3,7,5>: Cost 4 vext3 <1,u,3,4>, <3,7,5,7>
+ 3662647932U, // <4,3,7,6>: Cost 4 vext1 <6,4,3,7>, <6,4,3,7>
+ 3781208837U, // <4,3,7,7>: Cost 4 vext3 <3,7,7,4>, <3,7,7,4>
+ 3769633547U, // <4,3,7,u>: Cost 4 vext3 <1,u,3,4>, <3,7,u,1>
+ 2559049830U, // <4,3,u,0>: Cost 3 vext1 <1,4,3,u>, LHS
+ 2691910430U, // <4,3,u,1>: Cost 3 vext3 <1,2,3,4>, <3,u,1,2>
+ 2565023513U, // <4,3,u,2>: Cost 3 vext1 <2,4,3,u>, <2,4,3,u>
+ 2707835698U, // <4,3,u,3>: Cost 3 vext3 <3,u,3,4>, <3,u,3,4>
+ 2559053110U, // <4,3,u,4>: Cost 3 vext1 <1,4,3,u>, RHS
+ 1161857540U, // <4,3,u,5>: Cost 2 vrev <3,4,5,u>
+ 2235673101U, // <4,3,u,6>: Cost 3 vrev <3,4,6,u>
+ 2708130646U, // <4,3,u,7>: Cost 3 vext3 <3,u,7,4>, <3,u,7,4>
+ 1162078751U, // <4,3,u,u>: Cost 2 vrev <3,4,u,u>
+ 2617573416U, // <4,4,0,0>: Cost 3 vext2 <0,0,4,4>, <0,0,4,4>
+ 1570373734U, // <4,4,0,1>: Cost 2 vext2 <4,4,4,4>, LHS
+ 2779676774U, // <4,4,0,2>: Cost 3 vuzpl <4,6,4,6>, LHS
+ 3760196480U, // <4,4,0,3>: Cost 4 vext3 <0,3,1,4>, <4,0,3,1>
+ 2576977100U, // <4,4,0,4>: Cost 3 vext1 <4,4,4,0>, <4,4,4,0>
+ 2718747538U, // <4,4,0,5>: Cost 3 vext3 <5,6,7,4>, <4,0,5,1>
+ 2718747548U, // <4,4,0,6>: Cost 3 vext3 <5,6,7,4>, <4,0,6,2>
+ 3668637015U, // <4,4,0,7>: Cost 4 vext1 <7,4,4,0>, <7,4,4,0>
+ 1570374301U, // <4,4,0,u>: Cost 2 vext2 <4,4,4,4>, LHS
+ 2644116214U, // <4,4,1,0>: Cost 3 vext2 <4,4,4,4>, <1,0,3,2>
+ 2644116276U, // <4,4,1,1>: Cost 3 vext2 <4,4,4,4>, <1,1,1,1>
+ 2691910602U, // <4,4,1,2>: Cost 3 vext3 <1,2,3,4>, <4,1,2,3>
+ 2644116440U, // <4,4,1,3>: Cost 3 vext2 <4,4,4,4>, <1,3,1,3>
+ 2711227356U, // <4,4,1,4>: Cost 3 vext3 <4,4,4,4>, <4,1,4,3>
+ 2709310438U, // <4,4,1,5>: Cost 3 vext3 <4,1,5,4>, <4,1,5,4>
+ 3765652462U, // <4,4,1,6>: Cost 4 vext3 <1,2,3,4>, <4,1,6,3>
+ 3768970231U, // <4,4,1,7>: Cost 4 vext3 <1,7,3,4>, <4,1,7,3>
+ 2695891968U, // <4,4,1,u>: Cost 3 vext3 <1,u,3,4>, <4,1,u,3>
+ 3703260634U, // <4,4,2,0>: Cost 4 vext2 <2,0,4,4>, <2,0,4,4>
+ 3765652499U, // <4,4,2,1>: Cost 4 vext3 <1,2,3,4>, <4,2,1,4>
+ 2644117096U, // <4,4,2,2>: Cost 3 vext2 <4,4,4,4>, <2,2,2,2>
+ 2631509709U, // <4,4,2,3>: Cost 3 vext2 <2,3,4,4>, <2,3,4,4>
+ 2644117269U, // <4,4,2,4>: Cost 3 vext2 <4,4,4,4>, <2,4,3,4>
+ 3705251698U, // <4,4,2,5>: Cost 4 vext2 <2,3,4,4>, <2,5,4,7>
+ 2710047808U, // <4,4,2,6>: Cost 3 vext3 <4,2,6,4>, <4,2,6,4>
+ 3783863369U, // <4,4,2,7>: Cost 4 vext3 <4,2,7,4>, <4,2,7,4>
+ 2634827874U, // <4,4,2,u>: Cost 3 vext2 <2,u,4,4>, <2,u,4,4>
+ 2644117654U, // <4,4,3,0>: Cost 3 vext2 <4,4,4,4>, <3,0,1,2>
+ 3638797210U, // <4,4,3,1>: Cost 4 vext1 <2,4,4,3>, <1,2,3,4>
+ 3638798082U, // <4,4,3,2>: Cost 4 vext1 <2,4,4,3>, <2,4,1,3>
+ 2637482406U, // <4,4,3,3>: Cost 3 vext2 <3,3,4,4>, <3,3,4,4>
+ 2638146039U, // <4,4,3,4>: Cost 3 vext2 <3,4,4,4>, <3,4,4,4>
+ 3913287374U, // <4,4,3,5>: Cost 4 vuzpr <3,4,5,4>, <2,3,4,5>
+ 3765652625U, // <4,4,3,6>: Cost 4 vext3 <1,2,3,4>, <4,3,6,4>
+ 3713878762U, // <4,4,3,7>: Cost 4 vext2 <3,7,4,4>, <3,7,4,4>
+ 2637482406U, // <4,4,3,u>: Cost 3 vext2 <3,3,4,4>, <3,3,4,4>
+ 1503264870U, // <4,4,4,0>: Cost 2 vext1 <4,4,4,4>, LHS
+ 2577007514U, // <4,4,4,1>: Cost 3 vext1 <4,4,4,4>, <1,2,3,4>
+ 2577008232U, // <4,4,4,2>: Cost 3 vext1 <4,4,4,4>, <2,2,2,2>
+ 2571037175U, // <4,4,4,3>: Cost 3 vext1 <3,4,4,4>, <3,4,4,4>
+ 161926454U, // <4,4,4,4>: Cost 1 vdup0 RHS
+ 1570377014U, // <4,4,4,5>: Cost 2 vext2 <4,4,4,4>, RHS
+ 2779680054U, // <4,4,4,6>: Cost 3 vuzpl <4,6,4,6>, RHS
+ 2594927963U, // <4,4,4,7>: Cost 3 vext1 <7,4,4,4>, <7,4,4,4>
+ 161926454U, // <4,4,4,u>: Cost 1 vdup0 RHS
+ 2571042918U, // <4,4,5,0>: Cost 3 vext1 <3,4,4,5>, LHS
+ 2571043738U, // <4,4,5,1>: Cost 3 vext1 <3,4,4,5>, <1,2,3,4>
+ 3638814495U, // <4,4,5,2>: Cost 4 vext1 <2,4,4,5>, <2,4,4,5>
+ 2571045368U, // <4,4,5,3>: Cost 3 vext1 <3,4,4,5>, <3,4,4,5>
+ 2571046198U, // <4,4,5,4>: Cost 3 vext1 <3,4,4,5>, RHS
+ 1839648054U, // <4,4,5,5>: Cost 2 vzipl RHS, RHS
+ 1618169142U, // <4,4,5,6>: Cost 2 vext3 <1,2,3,4>, RHS
+ 2594936156U, // <4,4,5,7>: Cost 3 vext1 <7,4,4,5>, <7,4,4,5>
+ 1618169160U, // <4,4,5,u>: Cost 2 vext3 <1,2,3,4>, RHS
+ 2553135206U, // <4,4,6,0>: Cost 3 vext1 <0,4,4,6>, LHS
+ 3626877686U, // <4,4,6,1>: Cost 4 vext1 <0,4,4,6>, <1,0,3,2>
+ 2565080782U, // <4,4,6,2>: Cost 3 vext1 <2,4,4,6>, <2,3,4,5>
+ 2571053561U, // <4,4,6,3>: Cost 3 vext1 <3,4,4,6>, <3,4,4,6>
+ 2553138486U, // <4,4,6,4>: Cost 3 vext1 <0,4,4,6>, RHS
+ 2241555675U, // <4,4,6,5>: Cost 3 vrev <4,4,5,6>
+ 1973865782U, // <4,4,6,6>: Cost 2 vtrnl RHS, RHS
+ 2658055029U, // <4,4,6,7>: Cost 3 vext2 <6,7,4,4>, <6,7,4,4>
+ 1973865800U, // <4,4,6,u>: Cost 2 vtrnl RHS, RHS
+ 2644120570U, // <4,4,7,0>: Cost 3 vext2 <4,4,4,4>, <7,0,1,2>
+ 3638829978U, // <4,4,7,1>: Cost 4 vext1 <2,4,4,7>, <1,2,3,4>
+ 3638830881U, // <4,4,7,2>: Cost 4 vext1 <2,4,4,7>, <2,4,4,7>
+ 3735115018U, // <4,4,7,3>: Cost 4 vext2 <7,3,4,4>, <7,3,4,4>
+ 2662036827U, // <4,4,7,4>: Cost 3 vext2 <7,4,4,4>, <7,4,4,4>
+ 2713292236U, // <4,4,7,5>: Cost 3 vext3 <4,7,5,4>, <4,7,5,4>
+ 2713365973U, // <4,4,7,6>: Cost 3 vext3 <4,7,6,4>, <4,7,6,4>
+ 2644121196U, // <4,4,7,7>: Cost 3 vext2 <4,4,4,4>, <7,7,7,7>
+ 2662036827U, // <4,4,7,u>: Cost 3 vext2 <7,4,4,4>, <7,4,4,4>
+ 1503297638U, // <4,4,u,0>: Cost 2 vext1 <4,4,4,u>, LHS
+ 1570379566U, // <4,4,u,1>: Cost 2 vext2 <4,4,4,4>, LHS
+ 2779682606U, // <4,4,u,2>: Cost 3 vuzpl <4,6,4,6>, LHS
+ 2571069947U, // <4,4,u,3>: Cost 3 vext1 <3,4,4,u>, <3,4,4,u>
+ 161926454U, // <4,4,u,4>: Cost 1 vdup0 RHS
+ 1841638710U, // <4,4,u,5>: Cost 2 vzipl RHS, RHS
+ 1618169385U, // <4,4,u,6>: Cost 2 vext3 <1,2,3,4>, RHS
+ 2594960735U, // <4,4,u,7>: Cost 3 vext1 <7,4,4,u>, <7,4,4,u>
+ 161926454U, // <4,4,u,u>: Cost 1 vdup0 RHS
+ 2631516160U, // <4,5,0,0>: Cost 3 vext2 <2,3,4,5>, <0,0,0,0>
+ 1557774438U, // <4,5,0,1>: Cost 2 vext2 <2,3,4,5>, LHS
+ 2618908875U, // <4,5,0,2>: Cost 3 vext2 <0,2,4,5>, <0,2,4,5>
+ 2571078140U, // <4,5,0,3>: Cost 3 vext1 <3,4,5,0>, <3,4,5,0>
+ 2626871634U, // <4,5,0,4>: Cost 3 vext2 <1,5,4,5>, <0,4,1,5>
+ 3705258414U, // <4,5,0,5>: Cost 4 vext2 <2,3,4,5>, <0,5,2,7>
+ 2594968438U, // <4,5,0,6>: Cost 3 vext1 <7,4,5,0>, <6,7,4,5>
+ 2594968928U, // <4,5,0,7>: Cost 3 vext1 <7,4,5,0>, <7,4,5,0>
+ 1557775005U, // <4,5,0,u>: Cost 2 vext2 <2,3,4,5>, LHS
+ 2631516918U, // <4,5,1,0>: Cost 3 vext2 <2,3,4,5>, <1,0,3,2>
+ 2624217939U, // <4,5,1,1>: Cost 3 vext2 <1,1,4,5>, <1,1,4,5>
+ 2631517078U, // <4,5,1,2>: Cost 3 vext2 <2,3,4,5>, <1,2,3,0>
+ 2821341286U, // <4,5,1,3>: Cost 3 vuzpr <0,4,1,5>, LHS
+ 3895086054U, // <4,5,1,4>: Cost 4 vuzpr <0,4,1,5>, <4,1,5,4>
+ 2626872471U, // <4,5,1,5>: Cost 3 vext2 <1,5,4,5>, <1,5,4,5>
+ 3895083131U, // <4,5,1,6>: Cost 4 vuzpr <0,4,1,5>, <0,1,4,6>
+ 2718748368U, // <4,5,1,7>: Cost 3 vext3 <5,6,7,4>, <5,1,7,3>
+ 2821341291U, // <4,5,1,u>: Cost 3 vuzpr <0,4,1,5>, LHS
+ 2571092070U, // <4,5,2,0>: Cost 3 vext1 <3,4,5,2>, LHS
+ 3699287585U, // <4,5,2,1>: Cost 4 vext2 <1,3,4,5>, <2,1,3,3>
+ 2630854269U, // <4,5,2,2>: Cost 3 vext2 <2,2,4,5>, <2,2,4,5>
+ 1557776078U, // <4,5,2,3>: Cost 2 vext2 <2,3,4,5>, <2,3,4,5>
+ 2631517974U, // <4,5,2,4>: Cost 3 vext2 <2,3,4,5>, <2,4,3,5>
+ 3692652384U, // <4,5,2,5>: Cost 4 vext2 <0,2,4,5>, <2,5,2,7>
+ 2631518138U, // <4,5,2,6>: Cost 3 vext2 <2,3,4,5>, <2,6,3,7>
+ 4164013366U, // <4,5,2,7>: Cost 4 vtrnr <0,4,u,2>, RHS
+ 1561094243U, // <4,5,2,u>: Cost 2 vext2 <2,u,4,5>, <2,u,4,5>
+ 2631518358U, // <4,5,3,0>: Cost 3 vext2 <2,3,4,5>, <3,0,1,2>
+ 3895084710U, // <4,5,3,1>: Cost 4 vuzpr <0,4,1,5>, <2,3,0,1>
+ 2631518540U, // <4,5,3,2>: Cost 3 vext2 <2,3,4,5>, <3,2,3,4>
+ 2631518620U, // <4,5,3,3>: Cost 3 vext2 <2,3,4,5>, <3,3,3,3>
+ 2631518716U, // <4,5,3,4>: Cost 3 vext2 <2,3,4,5>, <3,4,5,0>
+ 2631518784U, // <4,5,3,5>: Cost 3 vext2 <2,3,4,5>, <3,5,3,5>
+ 2658060980U, // <4,5,3,6>: Cost 3 vext2 <6,7,4,5>, <3,6,7,4>
+ 2640145131U, // <4,5,3,7>: Cost 3 vext2 <3,7,4,5>, <3,7,4,5>
+ 2631519006U, // <4,5,3,u>: Cost 3 vext2 <2,3,4,5>, <3,u,1,2>
+ 2571108454U, // <4,5,4,0>: Cost 3 vext1 <3,4,5,4>, LHS
+ 3632907342U, // <4,5,4,1>: Cost 4 vext1 <1,4,5,4>, <1,4,5,4>
+ 2571110094U, // <4,5,4,2>: Cost 3 vext1 <3,4,5,4>, <2,3,4,5>
+ 2571110912U, // <4,5,4,3>: Cost 3 vext1 <3,4,5,4>, <3,4,5,4>
+ 2571111734U, // <4,5,4,4>: Cost 3 vext1 <3,4,5,4>, RHS
+ 1557777718U, // <4,5,4,5>: Cost 2 vext2 <2,3,4,5>, RHS
+ 2645454195U, // <4,5,4,6>: Cost 3 vext2 <4,6,4,5>, <4,6,4,5>
+ 2718748614U, // <4,5,4,7>: Cost 3 vext3 <5,6,7,4>, <5,4,7,6>
+ 1557777961U, // <4,5,4,u>: Cost 2 vext2 <2,3,4,5>, RHS
+ 1503346790U, // <4,5,5,0>: Cost 2 vext1 <4,4,5,5>, LHS
+ 2913398480U, // <4,5,5,1>: Cost 3 vzipl RHS, <5,1,7,3>
+ 2631519998U, // <4,5,5,2>: Cost 3 vext2 <2,3,4,5>, <5,2,3,4>
+ 2577090710U, // <4,5,5,3>: Cost 3 vext1 <4,4,5,5>, <3,0,1,2>
+ 1503349978U, // <4,5,5,4>: Cost 2 vext1 <4,4,5,5>, <4,4,5,5>
+ 2631520260U, // <4,5,5,5>: Cost 3 vext2 <2,3,4,5>, <5,5,5,5>
+ 2913390690U, // <4,5,5,6>: Cost 3 vzipl RHS, <5,6,7,0>
+ 2821344566U, // <4,5,5,7>: Cost 3 vuzpr <0,4,1,5>, RHS
+ 1503352622U, // <4,5,5,u>: Cost 2 vext1 <4,4,5,5>, LHS
+ 1497383014U, // <4,5,6,0>: Cost 2 vext1 <3,4,5,6>, LHS
+ 2559181904U, // <4,5,6,1>: Cost 3 vext1 <1,4,5,6>, <1,4,5,6>
+ 2565154601U, // <4,5,6,2>: Cost 3 vext1 <2,4,5,6>, <2,4,5,6>
+ 1497385474U, // <4,5,6,3>: Cost 2 vext1 <3,4,5,6>, <3,4,5,6>
+ 1497386294U, // <4,5,6,4>: Cost 2 vext1 <3,4,5,6>, RHS
+ 3047608324U, // <4,5,6,5>: Cost 3 vtrnl RHS, <5,5,5,5>
+ 2571129656U, // <4,5,6,6>: Cost 3 vext1 <3,4,5,6>, <6,6,6,6>
+ 27705344U, // <4,5,6,7>: Cost 0 copy RHS
+ 27705344U, // <4,5,6,u>: Cost 0 copy RHS
+ 2565161062U, // <4,5,7,0>: Cost 3 vext1 <2,4,5,7>, LHS
+ 2565161882U, // <4,5,7,1>: Cost 3 vext1 <2,4,5,7>, <1,2,3,4>
+ 2565162794U, // <4,5,7,2>: Cost 3 vext1 <2,4,5,7>, <2,4,5,7>
+ 2661381387U, // <4,5,7,3>: Cost 3 vext2 <7,3,4,5>, <7,3,4,5>
+ 2565164342U, // <4,5,7,4>: Cost 3 vext1 <2,4,5,7>, RHS
+ 2718748840U, // <4,5,7,5>: Cost 3 vext3 <5,6,7,4>, <5,7,5,7>
+ 2718748846U, // <4,5,7,6>: Cost 3 vext3 <5,6,7,4>, <5,7,6,4>
+ 2719412407U, // <4,5,7,7>: Cost 3 vext3 <5,7,7,4>, <5,7,7,4>
+ 2565166894U, // <4,5,7,u>: Cost 3 vext1 <2,4,5,7>, LHS
+ 1497399398U, // <4,5,u,0>: Cost 2 vext1 <3,4,5,u>, LHS
+ 1557780270U, // <4,5,u,1>: Cost 2 vext2 <2,3,4,5>, LHS
+ 2631522181U, // <4,5,u,2>: Cost 3 vext2 <2,3,4,5>, <u,2,3,0>
+ 1497401860U, // <4,5,u,3>: Cost 2 vext1 <3,4,5,u>, <3,4,5,u>
+ 1497402678U, // <4,5,u,4>: Cost 2 vext1 <3,4,5,u>, RHS
+ 1557780634U, // <4,5,u,5>: Cost 2 vext2 <2,3,4,5>, RHS
+ 2631522512U, // <4,5,u,6>: Cost 3 vext2 <2,3,4,5>, <u,6,3,7>
+ 27705344U, // <4,5,u,7>: Cost 0 copy RHS
+ 27705344U, // <4,5,u,u>: Cost 0 copy RHS
+ 2618916864U, // <4,6,0,0>: Cost 3 vext2 <0,2,4,6>, <0,0,0,0>
+ 1545175142U, // <4,6,0,1>: Cost 2 vext2 <0,2,4,6>, LHS
+ 1545175244U, // <4,6,0,2>: Cost 2 vext2 <0,2,4,6>, <0,2,4,6>
+ 3692658940U, // <4,6,0,3>: Cost 4 vext2 <0,2,4,6>, <0,3,1,0>
+ 2618917202U, // <4,6,0,4>: Cost 3 vext2 <0,2,4,6>, <0,4,1,5>
+ 3852910806U, // <4,6,0,5>: Cost 4 vuzpl RHS, <0,2,5,7>
+ 2253525648U, // <4,6,0,6>: Cost 3 vrev <6,4,6,0>
+ 4040764726U, // <4,6,0,7>: Cost 4 vzipr <2,3,4,0>, RHS
+ 1545175709U, // <4,6,0,u>: Cost 2 vext2 <0,2,4,6>, LHS
+ 2618917622U, // <4,6,1,0>: Cost 3 vext2 <0,2,4,6>, <1,0,3,2>
+ 2618917684U, // <4,6,1,1>: Cost 3 vext2 <0,2,4,6>, <1,1,1,1>
+ 2618917782U, // <4,6,1,2>: Cost 3 vext2 <0,2,4,6>, <1,2,3,0>
+ 2618917848U, // <4,6,1,3>: Cost 3 vext2 <0,2,4,6>, <1,3,1,3>
+ 3692659773U, // <4,6,1,4>: Cost 4 vext2 <0,2,4,6>, <1,4,3,5>
+ 2618918032U, // <4,6,1,5>: Cost 3 vext2 <0,2,4,6>, <1,5,3,7>
+ 3692659937U, // <4,6,1,6>: Cost 4 vext2 <0,2,4,6>, <1,6,3,7>
+ 4032146742U, // <4,6,1,7>: Cost 4 vzipr <0,u,4,1>, RHS
+ 2618918253U, // <4,6,1,u>: Cost 3 vext2 <0,2,4,6>, <1,u,1,3>
+ 2618918380U, // <4,6,2,0>: Cost 3 vext2 <0,2,4,6>, <2,0,6,4>
+ 2618918460U, // <4,6,2,1>: Cost 3 vext2 <0,2,4,6>, <2,1,6,3>
+ 2618918504U, // <4,6,2,2>: Cost 3 vext2 <0,2,4,6>, <2,2,2,2>
+ 2618918566U, // <4,6,2,3>: Cost 3 vext2 <0,2,4,6>, <2,3,0,1>
+ 2618918679U, // <4,6,2,4>: Cost 3 vext2 <0,2,4,6>, <2,4,3,6>
+ 2618918788U, // <4,6,2,5>: Cost 3 vext2 <0,2,4,6>, <2,5,6,7>
+ 2618918842U, // <4,6,2,6>: Cost 3 vext2 <0,2,4,6>, <2,6,3,7>
+ 2718749178U, // <4,6,2,7>: Cost 3 vext3 <5,6,7,4>, <6,2,7,3>
+ 2618918971U, // <4,6,2,u>: Cost 3 vext2 <0,2,4,6>, <2,u,0,1>
+ 2618919062U, // <4,6,3,0>: Cost 3 vext2 <0,2,4,6>, <3,0,1,2>
+ 2636171526U, // <4,6,3,1>: Cost 3 vext2 <3,1,4,6>, <3,1,4,6>
+ 3692661057U, // <4,6,3,2>: Cost 4 vext2 <0,2,4,6>, <3,2,2,2>
+ 2618919324U, // <4,6,3,3>: Cost 3 vext2 <0,2,4,6>, <3,3,3,3>
+ 2618919426U, // <4,6,3,4>: Cost 3 vext2 <0,2,4,6>, <3,4,5,6>
+ 2638826058U, // <4,6,3,5>: Cost 3 vext2 <3,5,4,6>, <3,5,4,6>
+ 3913303030U, // <4,6,3,6>: Cost 4 vuzpr <3,4,5,6>, <1,3,4,6>
+ 2722730572U, // <4,6,3,7>: Cost 3 vext3 <6,3,7,4>, <6,3,7,4>
+ 2618919710U, // <4,6,3,u>: Cost 3 vext2 <0,2,4,6>, <3,u,1,2>
+ 2565210214U, // <4,6,4,0>: Cost 3 vext1 <2,4,6,4>, LHS
+ 2718749286U, // <4,6,4,1>: Cost 3 vext3 <5,6,7,4>, <6,4,1,3>
+ 2565211952U, // <4,6,4,2>: Cost 3 vext1 <2,4,6,4>, <2,4,6,4>
+ 2571184649U, // <4,6,4,3>: Cost 3 vext1 <3,4,6,4>, <3,4,6,4>
+ 2565213494U, // <4,6,4,4>: Cost 3 vext1 <2,4,6,4>, RHS
+ 1545178422U, // <4,6,4,5>: Cost 2 vext2 <0,2,4,6>, RHS
+ 1705430326U, // <4,6,4,6>: Cost 2 vuzpl RHS, RHS
+ 2595075437U, // <4,6,4,7>: Cost 3 vext1 <7,4,6,4>, <7,4,6,4>
+ 1545178665U, // <4,6,4,u>: Cost 2 vext2 <0,2,4,6>, RHS
+ 2565218406U, // <4,6,5,0>: Cost 3 vext1 <2,4,6,5>, LHS
+ 2645462736U, // <4,6,5,1>: Cost 3 vext2 <4,6,4,6>, <5,1,7,3>
+ 2913399290U, // <4,6,5,2>: Cost 3 vzipl RHS, <6,2,7,3>
+ 3913305394U, // <4,6,5,3>: Cost 4 vuzpr <3,4,5,6>, <4,5,6,3>
+ 2645462982U, // <4,6,5,4>: Cost 3 vext2 <4,6,4,6>, <5,4,7,6>
+ 2779172868U, // <4,6,5,5>: Cost 3 vuzpl RHS, <5,5,5,5>
+ 2913391416U, // <4,6,5,6>: Cost 3 vzipl RHS, <6,6,6,6>
+ 2821426486U, // <4,6,5,7>: Cost 3 vuzpr <0,4,2,6>, RHS
+ 2821426487U, // <4,6,5,u>: Cost 3 vuzpr <0,4,2,6>, RHS
+ 1503428710U, // <4,6,6,0>: Cost 2 vext1 <4,4,6,6>, LHS
+ 2577171190U, // <4,6,6,1>: Cost 3 vext1 <4,4,6,6>, <1,0,3,2>
+ 2645463546U, // <4,6,6,2>: Cost 3 vext2 <4,6,4,6>, <6,2,7,3>
+ 2577172630U, // <4,6,6,3>: Cost 3 vext1 <4,4,6,6>, <3,0,1,2>
+ 1503431908U, // <4,6,6,4>: Cost 2 vext1 <4,4,6,6>, <4,4,6,6>
+ 2253501069U, // <4,6,6,5>: Cost 3 vrev <6,4,5,6>
+ 2618921784U, // <4,6,6,6>: Cost 3 vext2 <0,2,4,6>, <6,6,6,6>
+ 2954464566U, // <4,6,6,7>: Cost 3 vzipr <0,2,4,6>, RHS
+ 1503434542U, // <4,6,6,u>: Cost 2 vext1 <4,4,6,6>, LHS
+ 2645464058U, // <4,6,7,0>: Cost 3 vext2 <4,6,4,6>, <7,0,1,2>
+ 2779173882U, // <4,6,7,1>: Cost 3 vuzpl RHS, <7,0,1,2>
+ 3638978355U, // <4,6,7,2>: Cost 4 vext1 <2,4,6,7>, <2,4,6,7>
+ 2725090156U, // <4,6,7,3>: Cost 3 vext3 <6,7,3,4>, <6,7,3,4>
+ 2645464422U, // <4,6,7,4>: Cost 3 vext2 <4,6,4,6>, <7,4,5,6>
+ 2779174246U, // <4,6,7,5>: Cost 3 vuzpl RHS, <7,4,5,6>
+ 3852915914U, // <4,6,7,6>: Cost 4 vuzpl RHS, <7,2,6,3>
+ 2779174508U, // <4,6,7,7>: Cost 3 vuzpl RHS, <7,7,7,7>
+ 2779173945U, // <4,6,7,u>: Cost 3 vuzpl RHS, <7,0,u,2>
+ 1503445094U, // <4,6,u,0>: Cost 2 vext1 <4,4,6,u>, LHS
+ 1545180974U, // <4,6,u,1>: Cost 2 vext2 <0,2,4,6>, LHS
+ 1705432878U, // <4,6,u,2>: Cost 2 vuzpl RHS, LHS
+ 2618922940U, // <4,6,u,3>: Cost 3 vext2 <0,2,4,6>, <u,3,0,1>
+ 1503448294U, // <4,6,u,4>: Cost 2 vext1 <4,4,6,u>, <4,4,6,u>
+ 1545181338U, // <4,6,u,5>: Cost 2 vext2 <0,2,4,6>, RHS
+ 1705433242U, // <4,6,u,6>: Cost 2 vuzpl RHS, RHS
+ 2954480950U, // <4,6,u,7>: Cost 3 vzipr <0,2,4,u>, RHS
+ 1545181541U, // <4,6,u,u>: Cost 2 vext2 <0,2,4,6>, LHS
+ 3706601472U, // <4,7,0,0>: Cost 4 vext2 <2,5,4,7>, <0,0,0,0>
+ 2632859750U, // <4,7,0,1>: Cost 3 vext2 <2,5,4,7>, LHS
+ 2726343685U, // <4,7,0,2>: Cost 3 vext3 <7,0,2,4>, <7,0,2,4>
+ 3701293312U, // <4,7,0,3>: Cost 4 vext2 <1,6,4,7>, <0,3,1,4>
+ 3706601810U, // <4,7,0,4>: Cost 4 vext2 <2,5,4,7>, <0,4,1,5>
+ 2259424608U, // <4,7,0,5>: Cost 3 vrev <7,4,5,0>
+ 3695321617U, // <4,7,0,6>: Cost 4 vext2 <0,6,4,7>, <0,6,4,7>
+ 3800454194U, // <4,7,0,7>: Cost 4 vext3 <7,0,7,4>, <7,0,7,4>
+ 2632860317U, // <4,7,0,u>: Cost 3 vext2 <2,5,4,7>, LHS
+ 2259064116U, // <4,7,1,0>: Cost 3 vrev <7,4,0,1>
+ 3700630324U, // <4,7,1,1>: Cost 4 vext2 <1,5,4,7>, <1,1,1,1>
+ 2632860570U, // <4,7,1,2>: Cost 3 vext2 <2,5,4,7>, <1,2,3,4>
+ 3769635936U, // <4,7,1,3>: Cost 4 vext3 <1,u,3,4>, <7,1,3,5>
+ 3656920374U, // <4,7,1,4>: Cost 4 vext1 <5,4,7,1>, RHS
+ 3700630681U, // <4,7,1,5>: Cost 4 vext2 <1,5,4,7>, <1,5,4,7>
+ 3701294314U, // <4,7,1,6>: Cost 4 vext2 <1,6,4,7>, <1,6,4,7>
+ 3793818754U, // <4,7,1,7>: Cost 4 vext3 <5,u,7,4>, <7,1,7,3>
+ 2259654012U, // <4,7,1,u>: Cost 3 vrev <7,4,u,1>
+ 3656925286U, // <4,7,2,0>: Cost 4 vext1 <5,4,7,2>, LHS
+ 3706603050U, // <4,7,2,1>: Cost 4 vext2 <2,5,4,7>, <2,1,4,3>
+ 3706603112U, // <4,7,2,2>: Cost 4 vext2 <2,5,4,7>, <2,2,2,2>
+ 2727744688U, // <4,7,2,3>: Cost 3 vext3 <7,2,3,4>, <7,2,3,4>
+ 3705939745U, // <4,7,2,4>: Cost 4 vext2 <2,4,4,7>, <2,4,4,7>
+ 2632861554U, // <4,7,2,5>: Cost 3 vext2 <2,5,4,7>, <2,5,4,7>
+ 3706603450U, // <4,7,2,6>: Cost 4 vext2 <2,5,4,7>, <2,6,3,7>
+ 3792491731U, // <4,7,2,7>: Cost 4 vext3 <5,6,7,4>, <7,2,7,3>
+ 2634852453U, // <4,7,2,u>: Cost 3 vext2 <2,u,4,7>, <2,u,4,7>
+ 3706603670U, // <4,7,3,0>: Cost 4 vext2 <2,5,4,7>, <3,0,1,2>
+ 3662906266U, // <4,7,3,1>: Cost 4 vext1 <6,4,7,3>, <1,2,3,4>
+ 3725183326U, // <4,7,3,2>: Cost 4 vext2 <5,6,4,7>, <3,2,5,4>
+ 3706603932U, // <4,7,3,3>: Cost 4 vext2 <2,5,4,7>, <3,3,3,3>
+ 3701295618U, // <4,7,3,4>: Cost 4 vext2 <1,6,4,7>, <3,4,5,6>
+ 2638834251U, // <4,7,3,5>: Cost 3 vext2 <3,5,4,7>, <3,5,4,7>
+ 2639497884U, // <4,7,3,6>: Cost 3 vext2 <3,6,4,7>, <3,6,4,7>
+ 3802445093U, // <4,7,3,7>: Cost 4 vext3 <7,3,7,4>, <7,3,7,4>
+ 2640825150U, // <4,7,3,u>: Cost 3 vext2 <3,u,4,7>, <3,u,4,7>
+ 2718750004U, // <4,7,4,0>: Cost 3 vext3 <5,6,7,4>, <7,4,0,1>
+ 3706604490U, // <4,7,4,1>: Cost 4 vext2 <2,5,4,7>, <4,1,2,3>
+ 3656943474U, // <4,7,4,2>: Cost 4 vext1 <5,4,7,4>, <2,5,4,7>
+ 3779884371U, // <4,7,4,3>: Cost 4 vext3 <3,5,7,4>, <7,4,3,5>
+ 2259383643U, // <4,7,4,4>: Cost 3 vrev <7,4,4,4>
+ 2632863030U, // <4,7,4,5>: Cost 3 vext2 <2,5,4,7>, RHS
+ 2259531117U, // <4,7,4,6>: Cost 3 vrev <7,4,6,4>
+ 3907340074U, // <4,7,4,7>: Cost 4 vuzpr <2,4,5,7>, <2,4,5,7>
+ 2632863273U, // <4,7,4,u>: Cost 3 vext2 <2,5,4,7>, RHS
+ 2913391610U, // <4,7,5,0>: Cost 3 vzipl RHS, <7,0,1,2>
+ 3645006848U, // <4,7,5,1>: Cost 4 vext1 <3,4,7,5>, <1,3,5,7>
+ 2589181646U, // <4,7,5,2>: Cost 3 vext1 <6,4,7,5>, <2,3,4,5>
+ 3645008403U, // <4,7,5,3>: Cost 4 vext1 <3,4,7,5>, <3,4,7,5>
+ 2913391974U, // <4,7,5,4>: Cost 3 vzipl RHS, <7,4,5,6>
+ 2583211973U, // <4,7,5,5>: Cost 3 vext1 <5,4,7,5>, <5,4,7,5>
+ 2589184670U, // <4,7,5,6>: Cost 3 vext1 <6,4,7,5>, <6,4,7,5>
+ 2913392236U, // <4,7,5,7>: Cost 3 vzipl RHS, <7,7,7,7>
+ 2913392258U, // <4,7,5,u>: Cost 3 vzipl RHS, <7,u,1,2>
+ 1509474406U, // <4,7,6,0>: Cost 2 vext1 <5,4,7,6>, LHS
+ 3047609338U, // <4,7,6,1>: Cost 3 vtrnl RHS, <7,0,1,2>
+ 2583217768U, // <4,7,6,2>: Cost 3 vext1 <5,4,7,6>, <2,2,2,2>
+ 2583218326U, // <4,7,6,3>: Cost 3 vext1 <5,4,7,6>, <3,0,1,2>
+ 1509477686U, // <4,7,6,4>: Cost 2 vext1 <5,4,7,6>, RHS
+ 1509478342U, // <4,7,6,5>: Cost 2 vext1 <5,4,7,6>, <5,4,7,6>
+ 2583220730U, // <4,7,6,6>: Cost 3 vext1 <5,4,7,6>, <6,2,7,3>
+ 3047609964U, // <4,7,6,7>: Cost 3 vtrnl RHS, <7,7,7,7>
+ 1509480238U, // <4,7,6,u>: Cost 2 vext1 <5,4,7,6>, LHS
+ 3650994278U, // <4,7,7,0>: Cost 4 vext1 <4,4,7,7>, LHS
+ 3650995098U, // <4,7,7,1>: Cost 4 vext1 <4,4,7,7>, <1,2,3,4>
+ 3650996010U, // <4,7,7,2>: Cost 4 vext1 <4,4,7,7>, <2,4,5,7>
+ 3804804677U, // <4,7,7,3>: Cost 4 vext3 <7,7,3,4>, <7,7,3,4>
+ 3650997486U, // <4,7,7,4>: Cost 4 vext1 <4,4,7,7>, <4,4,7,7>
+ 2662725039U, // <4,7,7,5>: Cost 3 vext2 <7,5,4,7>, <7,5,4,7>
+ 3662942880U, // <4,7,7,6>: Cost 4 vext1 <6,4,7,7>, <6,4,7,7>
+ 2718750316U, // <4,7,7,7>: Cost 3 vext3 <5,6,7,4>, <7,7,7,7>
+ 2664715938U, // <4,7,7,u>: Cost 3 vext2 <7,u,4,7>, <7,u,4,7>
+ 1509490790U, // <4,7,u,0>: Cost 2 vext1 <5,4,7,u>, LHS
+ 2632865582U, // <4,7,u,1>: Cost 3 vext2 <2,5,4,7>, LHS
+ 2583234152U, // <4,7,u,2>: Cost 3 vext1 <5,4,7,u>, <2,2,2,2>
+ 2583234710U, // <4,7,u,3>: Cost 3 vext1 <5,4,7,u>, <3,0,1,2>
+ 1509494070U, // <4,7,u,4>: Cost 2 vext1 <5,4,7,u>, RHS
+ 1509494728U, // <4,7,u,5>: Cost 2 vext1 <5,4,7,u>, <5,4,7,u>
+ 2583237114U, // <4,7,u,6>: Cost 3 vext1 <5,4,7,u>, <6,2,7,3>
+ 3047757420U, // <4,7,u,7>: Cost 3 vtrnl RHS, <7,7,7,7>
+ 1509496622U, // <4,7,u,u>: Cost 2 vext1 <5,4,7,u>, LHS
+ 2618933248U, // <4,u,0,0>: Cost 3 vext2 <0,2,4,u>, <0,0,0,0>
+ 1545191526U, // <4,u,0,1>: Cost 2 vext2 <0,2,4,u>, LHS
+ 1545191630U, // <4,u,0,2>: Cost 2 vext2 <0,2,4,u>, <0,2,4,u>
+ 2691913445U, // <4,u,0,3>: Cost 3 vext3 <1,2,3,4>, <u,0,3,2>
+ 2618933586U, // <4,u,0,4>: Cost 3 vext2 <0,2,4,u>, <0,4,1,5>
+ 2265397305U, // <4,u,0,5>: Cost 3 vrev <u,4,5,0>
+ 2595189625U, // <4,u,0,6>: Cost 3 vext1 <7,4,u,0>, <6,7,4,u>
+ 2595190139U, // <4,u,0,7>: Cost 3 vext1 <7,4,u,0>, <7,4,u,0>
+ 1545192093U, // <4,u,0,u>: Cost 2 vext2 <0,2,4,u>, LHS
+ 2618934006U, // <4,u,1,0>: Cost 3 vext2 <0,2,4,u>, <1,0,3,2>
+ 2618934068U, // <4,u,1,1>: Cost 3 vext2 <0,2,4,u>, <1,1,1,1>
+ 1618171694U, // <4,u,1,2>: Cost 2 vext3 <1,2,3,4>, LHS
+ 2618934232U, // <4,u,1,3>: Cost 3 vext2 <0,2,4,u>, <1,3,1,3>
+ 2695894848U, // <4,u,1,4>: Cost 3 vext3 <1,u,3,4>, <u,1,4,3>
+ 2618934416U, // <4,u,1,5>: Cost 3 vext2 <0,2,4,u>, <1,5,3,7>
+ 3692676321U, // <4,u,1,6>: Cost 4 vext2 <0,2,4,u>, <1,6,3,7>
+ 2718750555U, // <4,u,1,7>: Cost 3 vext3 <5,6,7,4>, <u,1,7,3>
+ 1618171748U, // <4,u,1,u>: Cost 2 vext3 <1,2,3,4>, LHS
+ 2553397350U, // <4,u,2,0>: Cost 3 vext1 <0,4,u,2>, LHS
+ 2630215215U, // <4,u,2,1>: Cost 3 vext2 <2,1,4,u>, <2,1,4,u>
+ 2618934888U, // <4,u,2,2>: Cost 3 vext2 <0,2,4,u>, <2,2,2,2>
+ 1557800657U, // <4,u,2,3>: Cost 2 vext2 <2,3,4,u>, <2,3,4,u>
+ 2618935065U, // <4,u,2,4>: Cost 3 vext2 <0,2,4,u>, <2,4,3,u>
+ 2733864859U, // <4,u,2,5>: Cost 3 vext3 <u,2,5,4>, <u,2,5,4>
+ 2618935226U, // <4,u,2,6>: Cost 3 vext2 <0,2,4,u>, <2,6,3,7>
+ 2718750636U, // <4,u,2,7>: Cost 3 vext3 <5,6,7,4>, <u,2,7,3>
+ 1561118822U, // <4,u,2,u>: Cost 2 vext2 <2,u,4,u>, <2,u,4,u>
+ 2618935446U, // <4,u,3,0>: Cost 3 vext2 <0,2,4,u>, <3,0,1,2>
+ 2779318422U, // <4,u,3,1>: Cost 3 vuzpl RHS, <3,0,1,2>
+ 2636851545U, // <4,u,3,2>: Cost 3 vext2 <3,2,4,u>, <3,2,4,u>
+ 2618935708U, // <4,u,3,3>: Cost 3 vext2 <0,2,4,u>, <3,3,3,3>
+ 2618935810U, // <4,u,3,4>: Cost 3 vext2 <0,2,4,u>, <3,4,5,6>
+ 2691913711U, // <4,u,3,5>: Cost 3 vext3 <1,2,3,4>, <u,3,5,7>
+ 2588725862U, // <4,u,3,6>: Cost 3 vext1 <6,4,1,3>, <6,4,1,3>
+ 2640169710U, // <4,u,3,7>: Cost 3 vext2 <3,7,4,u>, <3,7,4,u>
+ 2618936094U, // <4,u,3,u>: Cost 3 vext2 <0,2,4,u>, <3,u,1,2>
+ 1503559782U, // <4,u,4,0>: Cost 2 vext1 <4,4,u,4>, LHS
+ 2692282391U, // <4,u,4,1>: Cost 3 vext3 <1,2,u,4>, <u,4,1,2>
+ 2565359426U, // <4,u,4,2>: Cost 3 vext1 <2,4,u,4>, <2,4,u,4>
+ 2571332123U, // <4,u,4,3>: Cost 3 vext1 <3,4,u,4>, <3,4,u,4>
+ 161926454U, // <4,u,4,4>: Cost 1 vdup0 RHS
+ 1545194806U, // <4,u,4,5>: Cost 2 vext2 <0,2,4,u>, RHS
+ 1705577782U, // <4,u,4,6>: Cost 2 vuzpl RHS, RHS
+ 2718750801U, // <4,u,4,7>: Cost 3 vext3 <5,6,7,4>, <u,4,7,6>
+ 161926454U, // <4,u,4,u>: Cost 1 vdup0 RHS
+ 1479164006U, // <4,u,5,0>: Cost 2 vext1 <0,4,1,5>, LHS
+ 1839650606U, // <4,u,5,1>: Cost 2 vzipl RHS, LHS
+ 2565367502U, // <4,u,5,2>: Cost 3 vext1 <2,4,u,5>, <2,3,4,5>
+ 3089777309U, // <4,u,5,3>: Cost 3 vtrnr <0,4,1,5>, LHS
+ 1479167286U, // <4,u,5,4>: Cost 2 vext1 <0,4,1,5>, RHS
+ 1839650970U, // <4,u,5,5>: Cost 2 vzipl RHS, RHS
+ 1618172058U, // <4,u,5,6>: Cost 2 vext3 <1,2,3,4>, RHS
+ 3089780265U, // <4,u,5,7>: Cost 3 vtrnr <0,4,1,5>, RHS
+ 1618172076U, // <4,u,5,u>: Cost 2 vext3 <1,2,3,4>, RHS
+ 1479688294U, // <4,u,6,0>: Cost 2 vext1 <0,4,u,6>, LHS
+ 2553430774U, // <4,u,6,1>: Cost 3 vext1 <0,4,u,6>, <1,0,3,2>
+ 1973868334U, // <4,u,6,2>: Cost 2 vtrnl RHS, LHS
+ 1497606685U, // <4,u,6,3>: Cost 2 vext1 <3,4,u,6>, <3,4,u,6>
+ 1479691574U, // <4,u,6,4>: Cost 2 vext1 <0,4,u,6>, RHS
+ 1509552079U, // <4,u,6,5>: Cost 2 vext1 <5,4,u,6>, <5,4,u,6>
+ 1973868698U, // <4,u,6,6>: Cost 2 vtrnl RHS, RHS
+ 27705344U, // <4,u,6,7>: Cost 0 copy RHS
+ 27705344U, // <4,u,6,u>: Cost 0 copy RHS
+ 2565382246U, // <4,u,7,0>: Cost 3 vext1 <2,4,u,7>, LHS
+ 2565383066U, // <4,u,7,1>: Cost 3 vext1 <2,4,u,7>, <1,2,3,4>
+ 2565384005U, // <4,u,7,2>: Cost 3 vext1 <2,4,u,7>, <2,4,u,7>
+ 2661405966U, // <4,u,7,3>: Cost 3 vext2 <7,3,4,u>, <7,3,4,u>
+ 2565385526U, // <4,u,7,4>: Cost 3 vext1 <2,4,u,7>, RHS
+ 2779321702U, // <4,u,7,5>: Cost 3 vuzpl RHS, <7,4,5,6>
+ 2589274793U, // <4,u,7,6>: Cost 3 vext1 <6,4,u,7>, <6,4,u,7>
+ 2779321964U, // <4,u,7,7>: Cost 3 vuzpl RHS, <7,7,7,7>
+ 2565388078U, // <4,u,7,u>: Cost 3 vext1 <2,4,u,7>, LHS
+ 1479704678U, // <4,u,u,0>: Cost 2 vext1 <0,4,u,u>, LHS
+ 1545197358U, // <4,u,u,1>: Cost 2 vext2 <0,2,4,u>, LHS
+ 1618172261U, // <4,u,u,2>: Cost 2 vext3 <1,2,3,4>, LHS
+ 1497623071U, // <4,u,u,3>: Cost 2 vext1 <3,4,u,u>, <3,4,u,u>
+ 161926454U, // <4,u,u,4>: Cost 1 vdup0 RHS
+ 1545197722U, // <4,u,u,5>: Cost 2 vext2 <0,2,4,u>, RHS
+ 1618172301U, // <4,u,u,6>: Cost 2 vext3 <1,2,3,4>, RHS
+ 27705344U, // <4,u,u,7>: Cost 0 copy RHS
+ 27705344U, // <4,u,u,u>: Cost 0 copy RHS
+ 2687123456U, // <5,0,0,0>: Cost 3 vext3 <0,4,1,5>, <0,0,0,0>
+ 2687123466U, // <5,0,0,1>: Cost 3 vext3 <0,4,1,5>, <0,0,1,1>
+ 2687123476U, // <5,0,0,2>: Cost 3 vext3 <0,4,1,5>, <0,0,2,2>
+ 3710599434U, // <5,0,0,3>: Cost 4 vext2 <3,2,5,0>, <0,3,2,5>
+ 2642166098U, // <5,0,0,4>: Cost 3 vext2 <4,1,5,0>, <0,4,1,5>
+ 3657060306U, // <5,0,0,5>: Cost 4 vext1 <5,5,0,0>, <5,5,0,0>
+ 3292094923U, // <5,0,0,6>: Cost 4 vrev <0,5,6,0>
+ 3669005700U, // <5,0,0,7>: Cost 4 vext1 <7,5,0,0>, <7,5,0,0>
+ 2687123530U, // <5,0,0,u>: Cost 3 vext3 <0,4,1,5>, <0,0,u,2>
+ 2559434854U, // <5,0,1,0>: Cost 3 vext1 <1,5,0,1>, LHS
+ 2559435887U, // <5,0,1,1>: Cost 3 vext1 <1,5,0,1>, <1,5,0,1>
+ 1613381734U, // <5,0,1,2>: Cost 2 vext3 <0,4,1,5>, LHS
+ 3698656256U, // <5,0,1,3>: Cost 4 vext2 <1,2,5,0>, <1,3,5,7>
+ 2559438134U, // <5,0,1,4>: Cost 3 vext1 <1,5,0,1>, RHS
+ 2583326675U, // <5,0,1,5>: Cost 3 vext1 <5,5,0,1>, <5,5,0,1>
+ 3715908851U, // <5,0,1,6>: Cost 4 vext2 <4,1,5,0>, <1,6,5,7>
+ 3657069562U, // <5,0,1,7>: Cost 4 vext1 <5,5,0,1>, <7,0,1,2>
+ 1613381788U, // <5,0,1,u>: Cost 2 vext3 <0,4,1,5>, LHS
+ 2686017700U, // <5,0,2,0>: Cost 3 vext3 <0,2,4,5>, <0,2,0,2>
+ 2685796528U, // <5,0,2,1>: Cost 3 vext3 <0,2,1,5>, <0,2,1,5>
+ 2698625208U, // <5,0,2,2>: Cost 3 vext3 <2,3,4,5>, <0,2,2,4>
+ 2685944002U, // <5,0,2,3>: Cost 3 vext3 <0,2,3,5>, <0,2,3,5>
+ 2686017739U, // <5,0,2,4>: Cost 3 vext3 <0,2,4,5>, <0,2,4,5>
+ 2686091476U, // <5,0,2,5>: Cost 3 vext3 <0,2,5,5>, <0,2,5,5>
+ 2725167324U, // <5,0,2,6>: Cost 3 vext3 <6,7,4,5>, <0,2,6,4>
+ 2595280230U, // <5,0,2,7>: Cost 3 vext1 <7,5,0,2>, <7,4,5,6>
+ 2686312687U, // <5,0,2,u>: Cost 3 vext3 <0,2,u,5>, <0,2,u,5>
+ 3760128248U, // <5,0,3,0>: Cost 4 vext3 <0,3,0,5>, <0,3,0,5>
+ 3759685888U, // <5,0,3,1>: Cost 4 vext3 <0,2,3,5>, <0,3,1,4>
+ 2686533898U, // <5,0,3,2>: Cost 3 vext3 <0,3,2,5>, <0,3,2,5>
+ 3760349459U, // <5,0,3,3>: Cost 4 vext3 <0,3,3,5>, <0,3,3,5>
+ 2638187004U, // <5,0,3,4>: Cost 3 vext2 <3,4,5,0>, <3,4,5,0>
+ 3776348452U, // <5,0,3,5>: Cost 4 vext3 <3,0,4,5>, <0,3,5,4>
+ 3713256094U, // <5,0,3,6>: Cost 4 vext2 <3,6,5,0>, <3,6,5,0>
+ 3914064896U, // <5,0,3,7>: Cost 4 vuzpr <3,5,7,0>, <1,3,5,7>
+ 2686976320U, // <5,0,3,u>: Cost 3 vext3 <0,3,u,5>, <0,3,u,5>
+ 2559459430U, // <5,0,4,0>: Cost 3 vext1 <1,5,0,4>, LHS
+ 1613381970U, // <5,0,4,1>: Cost 2 vext3 <0,4,1,5>, <0,4,1,5>
+ 2687123804U, // <5,0,4,2>: Cost 3 vext3 <0,4,1,5>, <0,4,2,6>
+ 3761013092U, // <5,0,4,3>: Cost 4 vext3 <0,4,3,5>, <0,4,3,5>
+ 2559462710U, // <5,0,4,4>: Cost 3 vext1 <1,5,0,4>, RHS
+ 2638187830U, // <5,0,4,5>: Cost 3 vext2 <3,4,5,0>, RHS
+ 3761234303U, // <5,0,4,6>: Cost 4 vext3 <0,4,6,5>, <0,4,6,5>
+ 2646150600U, // <5,0,4,7>: Cost 3 vext2 <4,7,5,0>, <4,7,5,0>
+ 1613381970U, // <5,0,4,u>: Cost 2 vext3 <0,4,1,5>, <0,4,1,5>
+ 3766763926U, // <5,0,5,0>: Cost 4 vext3 <1,4,0,5>, <0,5,0,1>
+ 2919268454U, // <5,0,5,1>: Cost 3 vzipl <5,5,5,5>, LHS
+ 3053486182U, // <5,0,5,2>: Cost 3 vtrnl <5,5,5,5>, LHS
+ 3723210589U, // <5,0,5,3>: Cost 4 vext2 <5,3,5,0>, <5,3,5,0>
+ 3766763966U, // <5,0,5,4>: Cost 4 vext3 <1,4,0,5>, <0,5,4,5>
+ 2650796031U, // <5,0,5,5>: Cost 3 vext2 <5,5,5,0>, <5,5,5,0>
+ 3719893090U, // <5,0,5,6>: Cost 4 vext2 <4,7,5,0>, <5,6,7,0>
+ 3914067254U, // <5,0,5,7>: Cost 4 vuzpr <3,5,7,0>, RHS
+ 2919269021U, // <5,0,5,u>: Cost 3 vzipl <5,5,5,5>, LHS
+ 4047519744U, // <5,0,6,0>: Cost 4 vzipr <3,4,5,6>, <0,0,0,0>
+ 2920038502U, // <5,0,6,1>: Cost 3 vzipl <5,6,7,0>, LHS
+ 3759759871U, // <5,0,6,2>: Cost 4 vext3 <0,2,4,5>, <0,6,2,7>
+ 3645164070U, // <5,0,6,3>: Cost 4 vext1 <3,5,0,6>, <3,5,0,6>
+ 3762414095U, // <5,0,6,4>: Cost 4 vext3 <0,6,4,5>, <0,6,4,5>
+ 3993780690U, // <5,0,6,5>: Cost 4 vzipl <5,6,7,0>, <0,5,6,7>
+ 3719893816U, // <5,0,6,6>: Cost 4 vext2 <4,7,5,0>, <6,6,6,6>
+ 2662077302U, // <5,0,6,7>: Cost 3 vext2 <7,4,5,0>, <6,7,4,5>
+ 2920039069U, // <5,0,6,u>: Cost 3 vzipl <5,6,7,0>, LHS
+ 2565455974U, // <5,0,7,0>: Cost 3 vext1 <2,5,0,7>, LHS
+ 2565456790U, // <5,0,7,1>: Cost 3 vext1 <2,5,0,7>, <1,2,3,0>
+ 2565457742U, // <5,0,7,2>: Cost 3 vext1 <2,5,0,7>, <2,5,0,7>
+ 3639199894U, // <5,0,7,3>: Cost 4 vext1 <2,5,0,7>, <3,0,1,2>
+ 2565459254U, // <5,0,7,4>: Cost 3 vext1 <2,5,0,7>, RHS
+ 2589347938U, // <5,0,7,5>: Cost 3 vext1 <6,5,0,7>, <5,6,7,0>
+ 2589348530U, // <5,0,7,6>: Cost 3 vext1 <6,5,0,7>, <6,5,0,7>
+ 4188456422U, // <5,0,7,7>: Cost 4 vtrnr RHS, <2,0,5,7>
+ 2565461806U, // <5,0,7,u>: Cost 3 vext1 <2,5,0,7>, LHS
+ 2687124106U, // <5,0,u,0>: Cost 3 vext3 <0,4,1,5>, <0,u,0,2>
+ 1616036502U, // <5,0,u,1>: Cost 2 vext3 <0,u,1,5>, <0,u,1,5>
+ 1613382301U, // <5,0,u,2>: Cost 2 vext3 <0,4,1,5>, LHS
+ 2689925800U, // <5,0,u,3>: Cost 3 vext3 <0,u,3,5>, <0,u,3,5>
+ 2687124146U, // <5,0,u,4>: Cost 3 vext3 <0,4,1,5>, <0,u,4,6>
+ 2638190746U, // <5,0,u,5>: Cost 3 vext2 <3,4,5,0>, RHS
+ 2589356723U, // <5,0,u,6>: Cost 3 vext1 <6,5,0,u>, <6,5,0,u>
+ 2595280230U, // <5,0,u,7>: Cost 3 vext1 <7,5,0,2>, <7,4,5,6>
+ 1613382355U, // <5,0,u,u>: Cost 2 vext3 <0,4,1,5>, LHS
+ 2646818816U, // <5,1,0,0>: Cost 3 vext2 <4,u,5,1>, <0,0,0,0>
+ 1573077094U, // <5,1,0,1>: Cost 2 vext2 <4,u,5,1>, LHS
+ 2646818980U, // <5,1,0,2>: Cost 3 vext2 <4,u,5,1>, <0,2,0,2>
+ 2687124214U, // <5,1,0,3>: Cost 3 vext3 <0,4,1,5>, <1,0,3,2>
+ 2641510738U, // <5,1,0,4>: Cost 3 vext2 <4,0,5,1>, <0,4,1,5>
+ 2641510814U, // <5,1,0,5>: Cost 3 vext2 <4,0,5,1>, <0,5,1,0>
+ 3720561142U, // <5,1,0,6>: Cost 4 vext2 <4,u,5,1>, <0,6,1,7>
+ 3298141357U, // <5,1,0,7>: Cost 4 vrev <1,5,7,0>
+ 1573077661U, // <5,1,0,u>: Cost 2 vext2 <4,u,5,1>, LHS
+ 2223891567U, // <5,1,1,0>: Cost 3 vrev <1,5,0,1>
+ 2687124276U, // <5,1,1,1>: Cost 3 vext3 <0,4,1,5>, <1,1,1,1>
+ 2646819734U, // <5,1,1,2>: Cost 3 vext2 <4,u,5,1>, <1,2,3,0>
+ 2687124296U, // <5,1,1,3>: Cost 3 vext3 <0,4,1,5>, <1,1,3,3>
+ 2691326803U, // <5,1,1,4>: Cost 3 vext3 <1,1,4,5>, <1,1,4,5>
+ 2691400540U, // <5,1,1,5>: Cost 3 vext3 <1,1,5,5>, <1,1,5,5>
+ 3765216101U, // <5,1,1,6>: Cost 4 vext3 <1,1,6,5>, <1,1,6,5>
+ 3765289838U, // <5,1,1,7>: Cost 4 vext3 <1,1,7,5>, <1,1,7,5>
+ 2687124341U, // <5,1,1,u>: Cost 3 vext3 <0,4,1,5>, <1,1,u,3>
+ 3297641584U, // <5,1,2,0>: Cost 4 vrev <1,5,0,2>
+ 3763520391U, // <5,1,2,1>: Cost 4 vext3 <0,u,1,5>, <1,2,1,3>
+ 2646820456U, // <5,1,2,2>: Cost 3 vext2 <4,u,5,1>, <2,2,2,2>
+ 2687124374U, // <5,1,2,3>: Cost 3 vext3 <0,4,1,5>, <1,2,3,0>
+ 2691990436U, // <5,1,2,4>: Cost 3 vext3 <1,2,4,5>, <1,2,4,5>
+ 2687124395U, // <5,1,2,5>: Cost 3 vext3 <0,4,1,5>, <1,2,5,3>
+ 2646820794U, // <5,1,2,6>: Cost 3 vext2 <4,u,5,1>, <2,6,3,7>
+ 3808199610U, // <5,1,2,7>: Cost 4 vext3 <u,3,4,5>, <1,2,7,0>
+ 2687124419U, // <5,1,2,u>: Cost 3 vext3 <0,4,1,5>, <1,2,u,0>
+ 2577440870U, // <5,1,3,0>: Cost 3 vext1 <4,5,1,3>, LHS
+ 2687124440U, // <5,1,3,1>: Cost 3 vext3 <0,4,1,5>, <1,3,1,3>
+ 3759686627U, // <5,1,3,2>: Cost 4 vext3 <0,2,3,5>, <1,3,2,5>
+ 2692580332U, // <5,1,3,3>: Cost 3 vext3 <1,3,3,5>, <1,3,3,5>
+ 2687124469U, // <5,1,3,4>: Cost 3 vext3 <0,4,1,5>, <1,3,4,5>
+ 2685207552U, // <5,1,3,5>: Cost 3 vext3 <0,1,2,5>, <1,3,5,7>
+ 3760866313U, // <5,1,3,6>: Cost 4 vext3 <0,4,1,5>, <1,3,6,7>
+ 2692875280U, // <5,1,3,7>: Cost 3 vext3 <1,3,7,5>, <1,3,7,5>
+ 2687124503U, // <5,1,3,u>: Cost 3 vext3 <0,4,1,5>, <1,3,u,3>
+ 1567771538U, // <5,1,4,0>: Cost 2 vext2 <4,0,5,1>, <4,0,5,1>
+ 2693096491U, // <5,1,4,1>: Cost 3 vext3 <1,4,1,5>, <1,4,1,5>
+ 2693170228U, // <5,1,4,2>: Cost 3 vext3 <1,4,2,5>, <1,4,2,5>
+ 2687124541U, // <5,1,4,3>: Cost 3 vext3 <0,4,1,5>, <1,4,3,5>
+ 2646822096U, // <5,1,4,4>: Cost 3 vext2 <4,u,5,1>, <4,4,4,4>
+ 1573080374U, // <5,1,4,5>: Cost 2 vext2 <4,u,5,1>, RHS
+ 2646822260U, // <5,1,4,6>: Cost 3 vext2 <4,u,5,1>, <4,6,4,6>
+ 3298174129U, // <5,1,4,7>: Cost 4 vrev <1,5,7,4>
+ 1573080602U, // <5,1,4,u>: Cost 2 vext2 <4,u,5,1>, <4,u,5,1>
+ 2687124591U, // <5,1,5,0>: Cost 3 vext3 <0,4,1,5>, <1,5,0,1>
+ 2646822543U, // <5,1,5,1>: Cost 3 vext2 <4,u,5,1>, <5,1,0,1>
+ 3760866433U, // <5,1,5,2>: Cost 4 vext3 <0,4,1,5>, <1,5,2,1>
+ 2687124624U, // <5,1,5,3>: Cost 3 vext3 <0,4,1,5>, <1,5,3,7>
+ 2687124631U, // <5,1,5,4>: Cost 3 vext3 <0,4,1,5>, <1,5,4,5>
+ 2646822916U, // <5,1,5,5>: Cost 3 vext2 <4,u,5,1>, <5,5,5,5>
+ 2646823010U, // <5,1,5,6>: Cost 3 vext2 <4,u,5,1>, <5,6,7,0>
+ 2646823080U, // <5,1,5,7>: Cost 3 vext2 <4,u,5,1>, <5,7,5,7>
+ 2687124663U, // <5,1,5,u>: Cost 3 vext3 <0,4,1,5>, <1,5,u,1>
+ 2553577574U, // <5,1,6,0>: Cost 3 vext1 <0,5,1,6>, LHS
+ 3763520719U, // <5,1,6,1>: Cost 4 vext3 <0,u,1,5>, <1,6,1,7>
+ 2646823418U, // <5,1,6,2>: Cost 3 vext2 <4,u,5,1>, <6,2,7,3>
+ 3760866529U, // <5,1,6,3>: Cost 4 vext3 <0,4,1,5>, <1,6,3,7>
+ 2553580854U, // <5,1,6,4>: Cost 3 vext1 <0,5,1,6>, RHS
+ 2687124723U, // <5,1,6,5>: Cost 3 vext3 <0,4,1,5>, <1,6,5,7>
+ 2646823736U, // <5,1,6,6>: Cost 3 vext2 <4,u,5,1>, <6,6,6,6>
+ 2646823758U, // <5,1,6,7>: Cost 3 vext2 <4,u,5,1>, <6,7,0,1>
+ 2646823839U, // <5,1,6,u>: Cost 3 vext2 <4,u,5,1>, <6,u,0,1>
+ 2559557734U, // <5,1,7,0>: Cost 3 vext1 <1,5,1,7>, LHS
+ 2559558452U, // <5,1,7,1>: Cost 3 vext1 <1,5,1,7>, <1,1,1,1>
+ 2571503270U, // <5,1,7,2>: Cost 3 vext1 <3,5,1,7>, <2,3,0,1>
+ 2040971366U, // <5,1,7,3>: Cost 2 vtrnr RHS, LHS
+ 2559561014U, // <5,1,7,4>: Cost 3 vext1 <1,5,1,7>, RHS
+ 2595393232U, // <5,1,7,5>: Cost 3 vext1 <7,5,1,7>, <5,1,7,3>
+ 4188455035U, // <5,1,7,6>: Cost 4 vtrnr RHS, <0,1,4,6>
+ 2646824556U, // <5,1,7,7>: Cost 3 vext2 <4,u,5,1>, <7,7,7,7>
+ 2040971371U, // <5,1,7,u>: Cost 2 vtrnr RHS, LHS
+ 1591662326U, // <5,1,u,0>: Cost 2 vext2 <u,0,5,1>, <u,0,5,1>
+ 1573082926U, // <5,1,u,1>: Cost 2 vext2 <4,u,5,1>, LHS
+ 2695824760U, // <5,1,u,2>: Cost 3 vext3 <1,u,2,5>, <1,u,2,5>
+ 2040979558U, // <5,1,u,3>: Cost 2 vtrnr RHS, LHS
+ 2687124874U, // <5,1,u,4>: Cost 3 vext3 <0,4,1,5>, <1,u,4,5>
+ 1573083290U, // <5,1,u,5>: Cost 2 vext2 <4,u,5,1>, RHS
+ 2646825168U, // <5,1,u,6>: Cost 3 vext2 <4,u,5,1>, <u,6,3,7>
+ 2646825216U, // <5,1,u,7>: Cost 3 vext2 <4,u,5,1>, <u,7,0,1>
+ 2040979563U, // <5,1,u,u>: Cost 2 vtrnr RHS, LHS
+ 3702652928U, // <5,2,0,0>: Cost 4 vext2 <1,u,5,2>, <0,0,0,0>
+ 2628911206U, // <5,2,0,1>: Cost 3 vext2 <1,u,5,2>, LHS
+ 2641518756U, // <5,2,0,2>: Cost 3 vext2 <4,0,5,2>, <0,2,0,2>
+ 3759760847U, // <5,2,0,3>: Cost 4 vext3 <0,2,4,5>, <2,0,3,2>
+ 3760866775U, // <5,2,0,4>: Cost 4 vext3 <0,4,1,5>, <2,0,4,1>
+ 3759539680U, // <5,2,0,5>: Cost 4 vext3 <0,2,1,5>, <2,0,5,1>
+ 3760866796U, // <5,2,0,6>: Cost 4 vext3 <0,4,1,5>, <2,0,6,4>
+ 3304114054U, // <5,2,0,7>: Cost 4 vrev <2,5,7,0>
+ 2628911773U, // <5,2,0,u>: Cost 3 vext2 <1,u,5,2>, LHS
+ 2623603464U, // <5,2,1,0>: Cost 3 vext2 <1,0,5,2>, <1,0,5,2>
+ 3698008921U, // <5,2,1,1>: Cost 4 vext2 <1,1,5,2>, <1,1,5,2>
+ 3633325603U, // <5,2,1,2>: Cost 4 vext1 <1,5,2,1>, <2,1,3,5>
+ 2687125027U, // <5,2,1,3>: Cost 3 vext3 <0,4,1,5>, <2,1,3,5>
+ 3633327414U, // <5,2,1,4>: Cost 4 vext1 <1,5,2,1>, RHS
+ 3759539760U, // <5,2,1,5>: Cost 4 vext3 <0,2,1,5>, <2,1,5,0>
+ 3760866876U, // <5,2,1,6>: Cost 4 vext3 <0,4,1,5>, <2,1,6,3>
+ 3304122247U, // <5,2,1,7>: Cost 4 vrev <2,5,7,1>
+ 2687125072U, // <5,2,1,u>: Cost 3 vext3 <0,4,1,5>, <2,1,u,5>
+ 3633332326U, // <5,2,2,0>: Cost 4 vext1 <1,5,2,2>, LHS
+ 3759760992U, // <5,2,2,1>: Cost 4 vext3 <0,2,4,5>, <2,2,1,3>
+ 2687125096U, // <5,2,2,2>: Cost 3 vext3 <0,4,1,5>, <2,2,2,2>
+ 2687125106U, // <5,2,2,3>: Cost 3 vext3 <0,4,1,5>, <2,2,3,3>
+ 2697963133U, // <5,2,2,4>: Cost 3 vext3 <2,2,4,5>, <2,2,4,5>
+ 3759466120U, // <5,2,2,5>: Cost 4 vext3 <0,2,0,5>, <2,2,5,7>
+ 3760866960U, // <5,2,2,6>: Cost 4 vext3 <0,4,1,5>, <2,2,6,6>
+ 3771926168U, // <5,2,2,7>: Cost 4 vext3 <2,2,7,5>, <2,2,7,5>
+ 2687125151U, // <5,2,2,u>: Cost 3 vext3 <0,4,1,5>, <2,2,u,3>
+ 2687125158U, // <5,2,3,0>: Cost 3 vext3 <0,4,1,5>, <2,3,0,1>
+ 2698405555U, // <5,2,3,1>: Cost 3 vext3 <2,3,1,5>, <2,3,1,5>
+ 2577516238U, // <5,2,3,2>: Cost 3 vext1 <4,5,2,3>, <2,3,4,5>
+ 3759687365U, // <5,2,3,3>: Cost 4 vext3 <0,2,3,5>, <2,3,3,5>
+ 1624884942U, // <5,2,3,4>: Cost 2 vext3 <2,3,4,5>, <2,3,4,5>
+ 2698700503U, // <5,2,3,5>: Cost 3 vext3 <2,3,5,5>, <2,3,5,5>
+ 3772368608U, // <5,2,3,6>: Cost 4 vext3 <2,3,4,5>, <2,3,6,5>
+ 3702655716U, // <5,2,3,7>: Cost 4 vext2 <1,u,5,2>, <3,7,3,7>
+ 1625179890U, // <5,2,3,u>: Cost 2 vext3 <2,3,u,5>, <2,3,u,5>
+ 2641521555U, // <5,2,4,0>: Cost 3 vext2 <4,0,5,2>, <4,0,5,2>
+ 3772368642U, // <5,2,4,1>: Cost 4 vext3 <2,3,4,5>, <2,4,1,3>
+ 2699142925U, // <5,2,4,2>: Cost 3 vext3 <2,4,2,5>, <2,4,2,5>
+ 2698626838U, // <5,2,4,3>: Cost 3 vext3 <2,3,4,5>, <2,4,3,5>
+ 2698626848U, // <5,2,4,4>: Cost 3 vext3 <2,3,4,5>, <2,4,4,6>
+ 2628914486U, // <5,2,4,5>: Cost 3 vext2 <1,u,5,2>, RHS
+ 2645503353U, // <5,2,4,6>: Cost 3 vext2 <4,6,5,2>, <4,6,5,2>
+ 3304146826U, // <5,2,4,7>: Cost 4 vrev <2,5,7,4>
+ 2628914729U, // <5,2,4,u>: Cost 3 vext2 <1,u,5,2>, RHS
+ 2553643110U, // <5,2,5,0>: Cost 3 vext1 <0,5,2,5>, LHS
+ 3758950227U, // <5,2,5,1>: Cost 4 vext3 <0,1,2,5>, <2,5,1,3>
+ 3759761248U, // <5,2,5,2>: Cost 4 vext3 <0,2,4,5>, <2,5,2,7>
+ 2982396006U, // <5,2,5,3>: Cost 3 vzipr <4,u,5,5>, LHS
+ 2553646390U, // <5,2,5,4>: Cost 3 vext1 <0,5,2,5>, RHS
+ 2553647108U, // <5,2,5,5>: Cost 3 vext1 <0,5,2,5>, <5,5,5,5>
+ 3760867204U, // <5,2,5,6>: Cost 4 vext3 <0,4,1,5>, <2,5,6,7>
+ 3702657141U, // <5,2,5,7>: Cost 4 vext2 <1,u,5,2>, <5,7,0,1>
+ 2982396011U, // <5,2,5,u>: Cost 3 vzipr <4,u,5,5>, LHS
+ 3627393126U, // <5,2,6,0>: Cost 4 vext1 <0,5,2,6>, LHS
+ 3760867236U, // <5,2,6,1>: Cost 4 vext3 <0,4,1,5>, <2,6,1,3>
+ 2645504506U, // <5,2,6,2>: Cost 3 vext2 <4,6,5,2>, <6,2,7,3>
+ 2687125434U, // <5,2,6,3>: Cost 3 vext3 <0,4,1,5>, <2,6,3,7>
+ 2700617665U, // <5,2,6,4>: Cost 3 vext3 <2,6,4,5>, <2,6,4,5>
+ 3760867276U, // <5,2,6,5>: Cost 4 vext3 <0,4,1,5>, <2,6,5,7>
+ 3763521493U, // <5,2,6,6>: Cost 4 vext3 <0,u,1,5>, <2,6,6,7>
+ 3719246670U, // <5,2,6,7>: Cost 4 vext2 <4,6,5,2>, <6,7,0,1>
+ 2687125479U, // <5,2,6,u>: Cost 3 vext3 <0,4,1,5>, <2,6,u,7>
+ 2565603430U, // <5,2,7,0>: Cost 3 vext1 <2,5,2,7>, LHS
+ 2553660150U, // <5,2,7,1>: Cost 3 vext1 <0,5,2,7>, <1,0,3,2>
+ 2565605216U, // <5,2,7,2>: Cost 3 vext1 <2,5,2,7>, <2,5,2,7>
+ 2961178726U, // <5,2,7,3>: Cost 3 vzipr <1,3,5,7>, LHS
+ 2565606710U, // <5,2,7,4>: Cost 3 vext1 <2,5,2,7>, RHS
+ 4034920552U, // <5,2,7,5>: Cost 4 vzipr <1,3,5,7>, <0,1,2,5>
+ 3114713292U, // <5,2,7,6>: Cost 3 vtrnr RHS, <0,2,4,6>
+ 3702658668U, // <5,2,7,7>: Cost 4 vext2 <1,u,5,2>, <7,7,7,7>
+ 2961178731U, // <5,2,7,u>: Cost 3 vzipr <1,3,5,7>, LHS
+ 2687125563U, // <5,2,u,0>: Cost 3 vext3 <0,4,1,5>, <2,u,0,1>
+ 2628917038U, // <5,2,u,1>: Cost 3 vext2 <1,u,5,2>, LHS
+ 2565613409U, // <5,2,u,2>: Cost 3 vext1 <2,5,2,u>, <2,5,2,u>
+ 2687125592U, // <5,2,u,3>: Cost 3 vext3 <0,4,1,5>, <2,u,3,3>
+ 1628203107U, // <5,2,u,4>: Cost 2 vext3 <2,u,4,5>, <2,u,4,5>
+ 2628917402U, // <5,2,u,5>: Cost 3 vext2 <1,u,5,2>, RHS
+ 2702092405U, // <5,2,u,6>: Cost 3 vext3 <2,u,6,5>, <2,u,6,5>
+ 3304179598U, // <5,2,u,7>: Cost 4 vrev <2,5,7,u>
+ 1628498055U, // <5,2,u,u>: Cost 2 vext3 <2,u,u,5>, <2,u,u,5>
+ 3760867467U, // <5,3,0,0>: Cost 4 vext3 <0,4,1,5>, <3,0,0,0>
+ 2687125654U, // <5,3,0,1>: Cost 3 vext3 <0,4,1,5>, <3,0,1,2>
+ 3759761565U, // <5,3,0,2>: Cost 4 vext3 <0,2,4,5>, <3,0,2,0>
+ 3633391766U, // <5,3,0,3>: Cost 4 vext1 <1,5,3,0>, <3,0,1,2>
+ 2687125680U, // <5,3,0,4>: Cost 3 vext3 <0,4,1,5>, <3,0,4,1>
+ 3760277690U, // <5,3,0,5>: Cost 4 vext3 <0,3,2,5>, <3,0,5,2>
+ 3310013014U, // <5,3,0,6>: Cost 4 vrev <3,5,6,0>
+ 2236344927U, // <5,3,0,7>: Cost 3 vrev <3,5,7,0>
+ 2687125717U, // <5,3,0,u>: Cost 3 vext3 <0,4,1,5>, <3,0,u,2>
+ 3760867551U, // <5,3,1,0>: Cost 4 vext3 <0,4,1,5>, <3,1,0,3>
+ 3760867558U, // <5,3,1,1>: Cost 4 vext3 <0,4,1,5>, <3,1,1,1>
+ 2624938923U, // <5,3,1,2>: Cost 3 vext2 <1,2,5,3>, <1,2,5,3>
+ 2703198460U, // <5,3,1,3>: Cost 3 vext3 <3,1,3,5>, <3,1,3,5>
+ 3760867587U, // <5,3,1,4>: Cost 4 vext3 <0,4,1,5>, <3,1,4,3>
+ 2636219536U, // <5,3,1,5>: Cost 3 vext2 <3,1,5,3>, <1,5,3,7>
+ 3698681075U, // <5,3,1,6>: Cost 4 vext2 <1,2,5,3>, <1,6,5,7>
+ 2703493408U, // <5,3,1,7>: Cost 3 vext3 <3,1,7,5>, <3,1,7,5>
+ 2628920721U, // <5,3,1,u>: Cost 3 vext2 <1,u,5,3>, <1,u,5,3>
+ 3766765870U, // <5,3,2,0>: Cost 4 vext3 <1,4,0,5>, <3,2,0,1>
+ 3698681379U, // <5,3,2,1>: Cost 4 vext2 <1,2,5,3>, <2,1,3,5>
+ 3760867649U, // <5,3,2,2>: Cost 4 vext3 <0,4,1,5>, <3,2,2,2>
+ 2698627404U, // <5,3,2,3>: Cost 3 vext3 <2,3,4,5>, <3,2,3,4>
+ 2703935830U, // <5,3,2,4>: Cost 3 vext3 <3,2,4,5>, <3,2,4,5>
+ 2698627422U, // <5,3,2,5>: Cost 3 vext3 <2,3,4,5>, <3,2,5,4>
+ 3760867686U, // <5,3,2,6>: Cost 4 vext3 <0,4,1,5>, <3,2,6,3>
+ 3769788783U, // <5,3,2,7>: Cost 4 vext3 <1,u,5,5>, <3,2,7,3>
+ 2701945209U, // <5,3,2,u>: Cost 3 vext3 <2,u,4,5>, <3,2,u,4>
+ 3760867711U, // <5,3,3,0>: Cost 4 vext3 <0,4,1,5>, <3,3,0,1>
+ 2636220684U, // <5,3,3,1>: Cost 3 vext2 <3,1,5,3>, <3,1,5,3>
+ 3772369298U, // <5,3,3,2>: Cost 4 vext3 <2,3,4,5>, <3,3,2,2>
+ 2687125916U, // <5,3,3,3>: Cost 3 vext3 <0,4,1,5>, <3,3,3,3>
+ 2704599463U, // <5,3,3,4>: Cost 3 vext3 <3,3,4,5>, <3,3,4,5>
+ 2704673200U, // <5,3,3,5>: Cost 3 vext3 <3,3,5,5>, <3,3,5,5>
+ 3709962935U, // <5,3,3,6>: Cost 4 vext2 <3,1,5,3>, <3,6,7,7>
+ 3772369346U, // <5,3,3,7>: Cost 4 vext3 <2,3,4,5>, <3,3,7,5>
+ 2704894411U, // <5,3,3,u>: Cost 3 vext3 <3,3,u,5>, <3,3,u,5>
+ 2704968148U, // <5,3,4,0>: Cost 3 vext3 <3,4,0,5>, <3,4,0,5>
+ 3698682850U, // <5,3,4,1>: Cost 4 vext2 <1,2,5,3>, <4,1,5,0>
+ 2642857014U, // <5,3,4,2>: Cost 3 vext2 <4,2,5,3>, <4,2,5,3>
+ 2705189359U, // <5,3,4,3>: Cost 3 vext3 <3,4,3,5>, <3,4,3,5>
+ 2705263096U, // <5,3,4,4>: Cost 3 vext3 <3,4,4,5>, <3,4,4,5>
+ 2685946370U, // <5,3,4,5>: Cost 3 vext3 <0,2,3,5>, <3,4,5,6>
+ 3779152394U, // <5,3,4,6>: Cost 4 vext3 <3,4,6,5>, <3,4,6,5>
+ 2236377699U, // <5,3,4,7>: Cost 3 vrev <3,5,7,4>
+ 2687126045U, // <5,3,4,u>: Cost 3 vext3 <0,4,1,5>, <3,4,u,6>
+ 2571632742U, // <5,3,5,0>: Cost 3 vext1 <3,5,3,5>, LHS
+ 2559689870U, // <5,3,5,1>: Cost 3 vext1 <1,5,3,5>, <1,5,3,5>
+ 2571634382U, // <5,3,5,2>: Cost 3 vext1 <3,5,3,5>, <2,3,4,5>
+ 2571635264U, // <5,3,5,3>: Cost 3 vext1 <3,5,3,5>, <3,5,3,5>
+ 2571636022U, // <5,3,5,4>: Cost 3 vext1 <3,5,3,5>, RHS
+ 2559692804U, // <5,3,5,5>: Cost 3 vext1 <1,5,3,5>, <5,5,5,5>
+ 3720581218U, // <5,3,5,6>: Cost 4 vext2 <4,u,5,3>, <5,6,7,0>
+ 2236385892U, // <5,3,5,7>: Cost 3 vrev <3,5,7,5>
+ 2571638574U, // <5,3,5,u>: Cost 3 vext1 <3,5,3,5>, LHS
+ 2565668966U, // <5,3,6,0>: Cost 3 vext1 <2,5,3,6>, LHS
+ 3633439887U, // <5,3,6,1>: Cost 4 vext1 <1,5,3,6>, <1,5,3,6>
+ 2565670760U, // <5,3,6,2>: Cost 3 vext1 <2,5,3,6>, <2,5,3,6>
+ 2565671426U, // <5,3,6,3>: Cost 3 vext1 <2,5,3,6>, <3,4,5,6>
+ 2565672246U, // <5,3,6,4>: Cost 3 vext1 <2,5,3,6>, RHS
+ 3639414630U, // <5,3,6,5>: Cost 4 vext1 <2,5,3,6>, <5,3,6,0>
+ 4047521640U, // <5,3,6,6>: Cost 4 vzipr <3,4,5,6>, <2,5,3,6>
+ 2725169844U, // <5,3,6,7>: Cost 3 vext3 <6,7,4,5>, <3,6,7,4>
+ 2565674798U, // <5,3,6,u>: Cost 3 vext1 <2,5,3,6>, LHS
+ 1485963366U, // <5,3,7,0>: Cost 2 vext1 <1,5,3,7>, LHS
+ 1485964432U, // <5,3,7,1>: Cost 2 vext1 <1,5,3,7>, <1,5,3,7>
+ 2559706728U, // <5,3,7,2>: Cost 3 vext1 <1,5,3,7>, <2,2,2,2>
+ 2559707286U, // <5,3,7,3>: Cost 3 vext1 <1,5,3,7>, <3,0,1,2>
+ 1485966646U, // <5,3,7,4>: Cost 2 vext1 <1,5,3,7>, RHS
+ 2559708880U, // <5,3,7,5>: Cost 3 vext1 <1,5,3,7>, <5,1,7,3>
+ 2601513466U, // <5,3,7,6>: Cost 3 vext1 <u,5,3,7>, <6,2,7,3>
+ 3114714112U, // <5,3,7,7>: Cost 3 vtrnr RHS, <1,3,5,7>
+ 1485969198U, // <5,3,7,u>: Cost 2 vext1 <1,5,3,7>, LHS
+ 1485971558U, // <5,3,u,0>: Cost 2 vext1 <1,5,3,u>, LHS
+ 1485972625U, // <5,3,u,1>: Cost 2 vext1 <1,5,3,u>, <1,5,3,u>
+ 2559714920U, // <5,3,u,2>: Cost 3 vext1 <1,5,3,u>, <2,2,2,2>
+ 2559715478U, // <5,3,u,3>: Cost 3 vext1 <1,5,3,u>, <3,0,1,2>
+ 1485974838U, // <5,3,u,4>: Cost 2 vext1 <1,5,3,u>, RHS
+ 2687126342U, // <5,3,u,5>: Cost 3 vext3 <0,4,1,5>, <3,u,5,6>
+ 2601521658U, // <5,3,u,6>: Cost 3 vext1 <u,5,3,u>, <6,2,7,3>
+ 2236410471U, // <5,3,u,7>: Cost 3 vrev <3,5,7,u>
+ 1485977390U, // <5,3,u,u>: Cost 2 vext1 <1,5,3,u>, LHS
+ 3627491430U, // <5,4,0,0>: Cost 4 vext1 <0,5,4,0>, LHS
+ 2636890214U, // <5,4,0,1>: Cost 3 vext2 <3,2,5,4>, LHS
+ 3703333028U, // <5,4,0,2>: Cost 4 vext2 <2,0,5,4>, <0,2,0,2>
+ 3782249348U, // <5,4,0,3>: Cost 4 vext3 <4,0,3,5>, <4,0,3,5>
+ 2642198866U, // <5,4,0,4>: Cost 3 vext2 <4,1,5,4>, <0,4,1,5>
+ 2687126418U, // <5,4,0,5>: Cost 3 vext3 <0,4,1,5>, <4,0,5,1>
+ 2242243887U, // <5,4,0,6>: Cost 3 vrev <4,5,6,0>
+ 3316059448U, // <5,4,0,7>: Cost 4 vrev <4,5,7,0>
+ 2636890781U, // <5,4,0,u>: Cost 3 vext2 <3,2,5,4>, LHS
+ 2241809658U, // <5,4,1,0>: Cost 3 vrev <4,5,0,1>
+ 3698025307U, // <5,4,1,1>: Cost 4 vext2 <1,1,5,4>, <1,1,5,4>
+ 3698688940U, // <5,4,1,2>: Cost 4 vext2 <1,2,5,4>, <1,2,5,4>
+ 3698689024U, // <5,4,1,3>: Cost 4 vext2 <1,2,5,4>, <1,3,5,7>
+ 3700016206U, // <5,4,1,4>: Cost 4 vext2 <1,4,5,4>, <1,4,5,4>
+ 2687126498U, // <5,4,1,5>: Cost 3 vext3 <0,4,1,5>, <4,1,5,0>
+ 3760868336U, // <5,4,1,6>: Cost 4 vext3 <0,4,1,5>, <4,1,6,5>
+ 3316067641U, // <5,4,1,7>: Cost 4 vrev <4,5,7,1>
+ 2242399554U, // <5,4,1,u>: Cost 3 vrev <4,5,u,1>
+ 3703334371U, // <5,4,2,0>: Cost 4 vext2 <2,0,5,4>, <2,0,5,4>
+ 3703998004U, // <5,4,2,1>: Cost 4 vext2 <2,1,5,4>, <2,1,5,4>
+ 3704661637U, // <5,4,2,2>: Cost 4 vext2 <2,2,5,4>, <2,2,5,4>
+ 2636891854U, // <5,4,2,3>: Cost 3 vext2 <3,2,5,4>, <2,3,4,5>
+ 3705988903U, // <5,4,2,4>: Cost 4 vext2 <2,4,5,4>, <2,4,5,4>
+ 2698628150U, // <5,4,2,5>: Cost 3 vext3 <2,3,4,5>, <4,2,5,3>
+ 3760868415U, // <5,4,2,6>: Cost 4 vext3 <0,4,1,5>, <4,2,6,3>
+ 3783871562U, // <5,4,2,7>: Cost 4 vext3 <4,2,7,5>, <4,2,7,5>
+ 2666752099U, // <5,4,2,u>: Cost 3 vext2 <u,2,5,4>, <2,u,4,5>
+ 3639459942U, // <5,4,3,0>: Cost 4 vext1 <2,5,4,3>, LHS
+ 3709970701U, // <5,4,3,1>: Cost 4 vext2 <3,1,5,4>, <3,1,5,4>
+ 2636892510U, // <5,4,3,2>: Cost 3 vext2 <3,2,5,4>, <3,2,5,4>
+ 3710634396U, // <5,4,3,3>: Cost 4 vext2 <3,2,5,4>, <3,3,3,3>
+ 2638219776U, // <5,4,3,4>: Cost 3 vext2 <3,4,5,4>, <3,4,5,4>
+ 3766987908U, // <5,4,3,5>: Cost 4 vext3 <1,4,3,5>, <4,3,5,0>
+ 2710719634U, // <5,4,3,6>: Cost 3 vext3 <4,3,6,5>, <4,3,6,5>
+ 3914097664U, // <5,4,3,7>: Cost 4 vuzpr <3,5,7,4>, <1,3,5,7>
+ 2640874308U, // <5,4,3,u>: Cost 3 vext2 <3,u,5,4>, <3,u,5,4>
+ 2583642214U, // <5,4,4,0>: Cost 3 vext1 <5,5,4,4>, LHS
+ 2642201574U, // <5,4,4,1>: Cost 3 vext2 <4,1,5,4>, <4,1,5,4>
+ 3710635062U, // <5,4,4,2>: Cost 4 vext2 <3,2,5,4>, <4,2,5,3>
+ 3717270664U, // <5,4,4,3>: Cost 4 vext2 <4,3,5,4>, <4,3,5,4>
+ 2713963728U, // <5,4,4,4>: Cost 3 vext3 <4,u,5,5>, <4,4,4,4>
+ 1637567706U, // <5,4,4,5>: Cost 2 vext3 <4,4,5,5>, <4,4,5,5>
+ 2242276659U, // <5,4,4,6>: Cost 3 vrev <4,5,6,4>
+ 2646183372U, // <5,4,4,7>: Cost 3 vext2 <4,7,5,4>, <4,7,5,4>
+ 1637788917U, // <5,4,4,u>: Cost 2 vext3 <4,4,u,5>, <4,4,u,5>
+ 2559762534U, // <5,4,5,0>: Cost 3 vext1 <1,5,4,5>, LHS
+ 2559763607U, // <5,4,5,1>: Cost 3 vext1 <1,5,4,5>, <1,5,4,5>
+ 2698628366U, // <5,4,5,2>: Cost 3 vext3 <2,3,4,5>, <4,5,2,3>
+ 3633506454U, // <5,4,5,3>: Cost 4 vext1 <1,5,4,5>, <3,0,1,2>
+ 2559765814U, // <5,4,5,4>: Cost 3 vext1 <1,5,4,5>, RHS
+ 2583654395U, // <5,4,5,5>: Cost 3 vext1 <5,5,4,5>, <5,5,4,5>
+ 1613385014U, // <5,4,5,6>: Cost 2 vext3 <0,4,1,5>, RHS
+ 3901639990U, // <5,4,5,7>: Cost 4 vuzpr <1,5,0,4>, RHS
+ 1613385032U, // <5,4,5,u>: Cost 2 vext3 <0,4,1,5>, RHS
+ 2559770726U, // <5,4,6,0>: Cost 3 vext1 <1,5,4,6>, LHS
+ 2559771648U, // <5,4,6,1>: Cost 3 vext1 <1,5,4,6>, <1,3,5,7>
+ 3633514088U, // <5,4,6,2>: Cost 4 vext1 <1,5,4,6>, <2,2,2,2>
+ 2571717122U, // <5,4,6,3>: Cost 3 vext1 <3,5,4,6>, <3,4,5,6>
+ 2559774006U, // <5,4,6,4>: Cost 3 vext1 <1,5,4,6>, RHS
+ 2712636796U, // <5,4,6,5>: Cost 3 vext3 <4,6,5,5>, <4,6,5,5>
+ 3760868743U, // <5,4,6,6>: Cost 4 vext3 <0,4,1,5>, <4,6,6,7>
+ 2712784270U, // <5,4,6,7>: Cost 3 vext3 <4,6,7,5>, <4,6,7,5>
+ 2559776558U, // <5,4,6,u>: Cost 3 vext1 <1,5,4,6>, LHS
+ 2565750886U, // <5,4,7,0>: Cost 3 vext1 <2,5,4,7>, LHS
+ 2565751706U, // <5,4,7,1>: Cost 3 vext1 <2,5,4,7>, <1,2,3,4>
+ 2565752690U, // <5,4,7,2>: Cost 3 vext1 <2,5,4,7>, <2,5,4,7>
+ 2571725387U, // <5,4,7,3>: Cost 3 vext1 <3,5,4,7>, <3,5,4,7>
+ 2565754166U, // <5,4,7,4>: Cost 3 vext1 <2,5,4,7>, RHS
+ 3114713426U, // <5,4,7,5>: Cost 3 vtrnr RHS, <0,4,1,5>
+ 94817590U, // <5,4,7,6>: Cost 1 vrev RHS
+ 2595616175U, // <5,4,7,7>: Cost 3 vext1 <7,5,4,7>, <7,5,4,7>
+ 94965064U, // <5,4,7,u>: Cost 1 vrev RHS
+ 2559787110U, // <5,4,u,0>: Cost 3 vext1 <1,5,4,u>, LHS
+ 2559788186U, // <5,4,u,1>: Cost 3 vext1 <1,5,4,u>, <1,5,4,u>
+ 2242014483U, // <5,4,u,2>: Cost 3 vrev <4,5,2,u>
+ 2667419628U, // <5,4,u,3>: Cost 3 vext2 <u,3,5,4>, <u,3,5,4>
+ 2559790390U, // <5,4,u,4>: Cost 3 vext1 <1,5,4,u>, RHS
+ 1640222238U, // <5,4,u,5>: Cost 2 vext3 <4,u,5,5>, <4,u,5,5>
+ 94825783U, // <5,4,u,6>: Cost 1 vrev RHS
+ 2714111536U, // <5,4,u,7>: Cost 3 vext3 <4,u,7,5>, <4,u,7,5>
+ 94973257U, // <5,4,u,u>: Cost 1 vrev RHS
+ 2646851584U, // <5,5,0,0>: Cost 3 vext2 <4,u,5,5>, <0,0,0,0>
+ 1573109862U, // <5,5,0,1>: Cost 2 vext2 <4,u,5,5>, LHS
+ 2646851748U, // <5,5,0,2>: Cost 3 vext2 <4,u,5,5>, <0,2,0,2>
+ 3760279130U, // <5,5,0,3>: Cost 4 vext3 <0,3,2,5>, <5,0,3,2>
+ 2687127138U, // <5,5,0,4>: Cost 3 vext3 <0,4,1,5>, <5,0,4,1>
+ 2248142847U, // <5,5,0,5>: Cost 3 vrev <5,5,5,0>
+ 3720593910U, // <5,5,0,6>: Cost 4 vext2 <4,u,5,5>, <0,6,1,7>
+ 4182502710U, // <5,5,0,7>: Cost 4 vtrnr <3,5,7,0>, RHS
+ 1573110429U, // <5,5,0,u>: Cost 2 vext2 <4,u,5,5>, LHS
+ 2646852342U, // <5,5,1,0>: Cost 3 vext2 <4,u,5,5>, <1,0,3,2>
+ 2624291676U, // <5,5,1,1>: Cost 3 vext2 <1,1,5,5>, <1,1,5,5>
+ 2646852502U, // <5,5,1,2>: Cost 3 vext2 <4,u,5,5>, <1,2,3,0>
+ 2646852568U, // <5,5,1,3>: Cost 3 vext2 <4,u,5,5>, <1,3,1,3>
+ 2715217591U, // <5,5,1,4>: Cost 3 vext3 <5,1,4,5>, <5,1,4,5>
+ 2628936848U, // <5,5,1,5>: Cost 3 vext2 <1,u,5,5>, <1,5,3,7>
+ 3698033907U, // <5,5,1,6>: Cost 4 vext2 <1,1,5,5>, <1,6,5,7>
+ 2713964240U, // <5,5,1,7>: Cost 3 vext3 <4,u,5,5>, <5,1,7,3>
+ 2628937107U, // <5,5,1,u>: Cost 3 vext2 <1,u,5,5>, <1,u,5,5>
+ 3645497446U, // <5,5,2,0>: Cost 4 vext1 <3,5,5,2>, LHS
+ 3760869099U, // <5,5,2,1>: Cost 4 vext3 <0,4,1,5>, <5,2,1,3>
+ 2646853224U, // <5,5,2,2>: Cost 3 vext2 <4,u,5,5>, <2,2,2,2>
+ 2698628862U, // <5,5,2,3>: Cost 3 vext3 <2,3,4,5>, <5,2,3,4>
+ 3772370694U, // <5,5,2,4>: Cost 4 vext3 <2,3,4,5>, <5,2,4,3>
+ 2713964303U, // <5,5,2,5>: Cost 3 vext3 <4,u,5,5>, <5,2,5,3>
+ 2646853562U, // <5,5,2,6>: Cost 3 vext2 <4,u,5,5>, <2,6,3,7>
+ 4038198272U, // <5,5,2,7>: Cost 4 vzipr <1,u,5,2>, <1,3,5,7>
+ 2701946667U, // <5,5,2,u>: Cost 3 vext3 <2,u,4,5>, <5,2,u,4>
+ 2646853782U, // <5,5,3,0>: Cost 3 vext2 <4,u,5,5>, <3,0,1,2>
+ 3698034922U, // <5,5,3,1>: Cost 4 vext2 <1,1,5,5>, <3,1,1,5>
+ 3702679919U, // <5,5,3,2>: Cost 4 vext2 <1,u,5,5>, <3,2,7,3>
+ 2637564336U, // <5,5,3,3>: Cost 3 vext2 <3,3,5,5>, <3,3,5,5>
+ 2646854146U, // <5,5,3,4>: Cost 3 vext2 <4,u,5,5>, <3,4,5,6>
+ 2638891602U, // <5,5,3,5>: Cost 3 vext2 <3,5,5,5>, <3,5,5,5>
+ 3702680247U, // <5,5,3,6>: Cost 4 vext2 <1,u,5,5>, <3,6,7,7>
+ 3702680259U, // <5,5,3,7>: Cost 4 vext2 <1,u,5,5>, <3,7,0,1>
+ 2646854430U, // <5,5,3,u>: Cost 3 vext2 <4,u,5,5>, <3,u,1,2>
+ 2646854546U, // <5,5,4,0>: Cost 3 vext2 <4,u,5,5>, <4,0,5,1>
+ 2642209767U, // <5,5,4,1>: Cost 3 vext2 <4,1,5,5>, <4,1,5,5>
+ 3711306806U, // <5,5,4,2>: Cost 4 vext2 <3,3,5,5>, <4,2,5,3>
+ 3645516369U, // <5,5,4,3>: Cost 4 vext1 <3,5,5,4>, <3,5,5,4>
+ 1570458842U, // <5,5,4,4>: Cost 2 vext2 <4,4,5,5>, <4,4,5,5>
+ 1573113142U, // <5,5,4,5>: Cost 2 vext2 <4,u,5,5>, RHS
+ 2645527932U, // <5,5,4,6>: Cost 3 vext2 <4,6,5,5>, <4,6,5,5>
+ 2713964486U, // <5,5,4,7>: Cost 3 vext3 <4,u,5,5>, <5,4,7,6>
+ 1573113374U, // <5,5,4,u>: Cost 2 vext2 <4,u,5,5>, <4,u,5,5>
+ 1509982310U, // <5,5,5,0>: Cost 2 vext1 <5,5,5,5>, LHS
+ 2646855376U, // <5,5,5,1>: Cost 3 vext2 <4,u,5,5>, <5,1,7,3>
+ 2583725672U, // <5,5,5,2>: Cost 3 vext1 <5,5,5,5>, <2,2,2,2>
+ 2583726230U, // <5,5,5,3>: Cost 3 vext1 <5,5,5,5>, <3,0,1,2>
+ 1509985590U, // <5,5,5,4>: Cost 2 vext1 <5,5,5,5>, RHS
+ 229035318U, // <5,5,5,5>: Cost 1 vdup1 RHS
+ 2646855778U, // <5,5,5,6>: Cost 3 vext2 <4,u,5,5>, <5,6,7,0>
+ 2646855848U, // <5,5,5,7>: Cost 3 vext2 <4,u,5,5>, <5,7,5,7>
+ 229035318U, // <5,5,5,u>: Cost 1 vdup1 RHS
+ 2577760358U, // <5,5,6,0>: Cost 3 vext1 <4,5,5,6>, LHS
+ 3633587361U, // <5,5,6,1>: Cost 4 vext1 <1,5,5,6>, <1,5,5,6>
+ 2646856186U, // <5,5,6,2>: Cost 3 vext2 <4,u,5,5>, <6,2,7,3>
+ 3633588738U, // <5,5,6,3>: Cost 4 vext1 <1,5,5,6>, <3,4,5,6>
+ 2718535756U, // <5,5,6,4>: Cost 3 vext3 <5,6,4,5>, <5,6,4,5>
+ 2644202223U, // <5,5,6,5>: Cost 3 vext2 <4,4,5,5>, <6,5,7,5>
+ 2973780482U, // <5,5,6,6>: Cost 3 vzipr <3,4,5,6>, <3,4,5,6>
+ 2646856526U, // <5,5,6,7>: Cost 3 vext2 <4,u,5,5>, <6,7,0,1>
+ 2646856607U, // <5,5,6,u>: Cost 3 vext2 <4,u,5,5>, <6,u,0,1>
+ 2571796582U, // <5,5,7,0>: Cost 3 vext1 <3,5,5,7>, LHS
+ 3633595392U, // <5,5,7,1>: Cost 4 vext1 <1,5,5,7>, <1,3,5,7>
+ 2571798222U, // <5,5,7,2>: Cost 3 vext1 <3,5,5,7>, <2,3,4,5>
+ 2571799124U, // <5,5,7,3>: Cost 3 vext1 <3,5,5,7>, <3,5,5,7>
+ 2571799862U, // <5,5,7,4>: Cost 3 vext1 <3,5,5,7>, RHS
+ 3114717188U, // <5,5,7,5>: Cost 3 vtrnr RHS, <5,5,5,5>
+ 4034923010U, // <5,5,7,6>: Cost 4 vzipr <1,3,5,7>, <3,4,5,6>
+ 2040974646U, // <5,5,7,7>: Cost 2 vtrnr RHS, RHS
+ 2040974647U, // <5,5,7,u>: Cost 2 vtrnr RHS, RHS
+ 1509982310U, // <5,5,u,0>: Cost 2 vext1 <5,5,5,5>, LHS
+ 1573115694U, // <5,5,u,1>: Cost 2 vext2 <4,u,5,5>, LHS
+ 2571806414U, // <5,5,u,2>: Cost 3 vext1 <3,5,5,u>, <2,3,4,5>
+ 2571807317U, // <5,5,u,3>: Cost 3 vext1 <3,5,5,u>, <3,5,5,u>
+ 1509985590U, // <5,5,u,4>: Cost 2 vext1 <5,5,5,5>, RHS
+ 229035318U, // <5,5,u,5>: Cost 1 vdup1 RHS
+ 2646857936U, // <5,5,u,6>: Cost 3 vext2 <4,u,5,5>, <u,6,3,7>
+ 2040982838U, // <5,5,u,7>: Cost 2 vtrnr RHS, RHS
+ 229035318U, // <5,5,u,u>: Cost 1 vdup1 RHS
+ 2638233600U, // <5,6,0,0>: Cost 3 vext2 <3,4,5,6>, <0,0,0,0>
+ 1564491878U, // <5,6,0,1>: Cost 2 vext2 <3,4,5,6>, LHS
+ 2632261796U, // <5,6,0,2>: Cost 3 vext2 <2,4,5,6>, <0,2,0,2>
+ 2638233856U, // <5,6,0,3>: Cost 3 vext2 <3,4,5,6>, <0,3,1,4>
+ 2638233938U, // <5,6,0,4>: Cost 3 vext2 <3,4,5,6>, <0,4,1,5>
+ 3706003885U, // <5,6,0,5>: Cost 4 vext2 <2,4,5,6>, <0,5,2,6>
+ 3706003967U, // <5,6,0,6>: Cost 4 vext2 <2,4,5,6>, <0,6,2,7>
+ 4047473974U, // <5,6,0,7>: Cost 4 vzipr <3,4,5,0>, RHS
+ 1564492445U, // <5,6,0,u>: Cost 2 vext2 <3,4,5,6>, LHS
+ 2638234358U, // <5,6,1,0>: Cost 3 vext2 <3,4,5,6>, <1,0,3,2>
+ 2638234420U, // <5,6,1,1>: Cost 3 vext2 <3,4,5,6>, <1,1,1,1>
+ 2638234518U, // <5,6,1,2>: Cost 3 vext2 <3,4,5,6>, <1,2,3,0>
+ 2638234584U, // <5,6,1,3>: Cost 3 vext2 <3,4,5,6>, <1,3,1,3>
+ 2626290768U, // <5,6,1,4>: Cost 3 vext2 <1,4,5,6>, <1,4,5,6>
+ 2638234768U, // <5,6,1,5>: Cost 3 vext2 <3,4,5,6>, <1,5,3,7>
+ 3700032719U, // <5,6,1,6>: Cost 4 vext2 <1,4,5,6>, <1,6,1,7>
+ 2982366518U, // <5,6,1,7>: Cost 3 vzipr <4,u,5,1>, RHS
+ 2628945300U, // <5,6,1,u>: Cost 3 vext2 <1,u,5,6>, <1,u,5,6>
+ 3706004925U, // <5,6,2,0>: Cost 4 vext2 <2,4,5,6>, <2,0,1,2>
+ 3711976966U, // <5,6,2,1>: Cost 4 vext2 <3,4,5,6>, <2,1,0,3>
+ 2638235240U, // <5,6,2,2>: Cost 3 vext2 <3,4,5,6>, <2,2,2,2>
+ 2638235302U, // <5,6,2,3>: Cost 3 vext2 <3,4,5,6>, <2,3,0,1>
+ 2632263465U, // <5,6,2,4>: Cost 3 vext2 <2,4,5,6>, <2,4,5,6>
+ 2638235496U, // <5,6,2,5>: Cost 3 vext2 <3,4,5,6>, <2,5,3,6>
+ 2638235578U, // <5,6,2,6>: Cost 3 vext2 <3,4,5,6>, <2,6,3,7>
+ 2713965050U, // <5,6,2,7>: Cost 3 vext3 <4,u,5,5>, <6,2,7,3>
+ 2634917997U, // <5,6,2,u>: Cost 3 vext2 <2,u,5,6>, <2,u,5,6>
+ 2638235798U, // <5,6,3,0>: Cost 3 vext2 <3,4,5,6>, <3,0,1,2>
+ 3711977695U, // <5,6,3,1>: Cost 4 vext2 <3,4,5,6>, <3,1,0,3>
+ 3710650720U, // <5,6,3,2>: Cost 4 vext2 <3,2,5,6>, <3,2,5,6>
+ 2638236060U, // <5,6,3,3>: Cost 3 vext2 <3,4,5,6>, <3,3,3,3>
+ 1564494338U, // <5,6,3,4>: Cost 2 vext2 <3,4,5,6>, <3,4,5,6>
+ 2638236234U, // <5,6,3,5>: Cost 3 vext2 <3,4,5,6>, <3,5,4,6>
+ 3711978104U, // <5,6,3,6>: Cost 4 vext2 <3,4,5,6>, <3,6,0,7>
+ 4034227510U, // <5,6,3,7>: Cost 4 vzipr <1,2,5,3>, RHS
+ 1567148870U, // <5,6,3,u>: Cost 2 vext2 <3,u,5,6>, <3,u,5,6>
+ 2577817702U, // <5,6,4,0>: Cost 3 vext1 <4,5,6,4>, LHS
+ 3700034544U, // <5,6,4,1>: Cost 4 vext2 <1,4,5,6>, <4,1,6,5>
+ 2723033713U, // <5,6,4,2>: Cost 3 vext3 <6,4,2,5>, <6,4,2,5>
+ 2638236818U, // <5,6,4,3>: Cost 3 vext2 <3,4,5,6>, <4,3,6,5>
+ 2644208859U, // <5,6,4,4>: Cost 3 vext2 <4,4,5,6>, <4,4,5,6>
+ 1564495158U, // <5,6,4,5>: Cost 2 vext2 <3,4,5,6>, RHS
+ 2645536125U, // <5,6,4,6>: Cost 3 vext2 <4,6,5,6>, <4,6,5,6>
+ 2723402398U, // <5,6,4,7>: Cost 3 vext3 <6,4,7,5>, <6,4,7,5>
+ 1564495401U, // <5,6,4,u>: Cost 2 vext2 <3,4,5,6>, RHS
+ 2577825894U, // <5,6,5,0>: Cost 3 vext1 <4,5,6,5>, LHS
+ 2662125264U, // <5,6,5,1>: Cost 3 vext2 <7,4,5,6>, <5,1,7,3>
+ 3775836867U, // <5,6,5,2>: Cost 4 vext3 <2,u,6,5>, <6,5,2,6>
+ 3711979343U, // <5,6,5,3>: Cost 4 vext2 <3,4,5,6>, <5,3,3,4>
+ 2650181556U, // <5,6,5,4>: Cost 3 vext2 <5,4,5,6>, <5,4,5,6>
+ 2662125572U, // <5,6,5,5>: Cost 3 vext2 <7,4,5,6>, <5,5,5,5>
+ 2638237732U, // <5,6,5,6>: Cost 3 vext2 <3,4,5,6>, <5,6,0,1>
+ 2982399286U, // <5,6,5,7>: Cost 3 vzipr <4,u,5,5>, RHS
+ 2982399287U, // <5,6,5,u>: Cost 3 vzipr <4,u,5,5>, RHS
+ 2583806054U, // <5,6,6,0>: Cost 3 vext1 <5,5,6,6>, LHS
+ 3711979910U, // <5,6,6,1>: Cost 4 vext2 <3,4,5,6>, <6,1,3,4>
+ 2662126074U, // <5,6,6,2>: Cost 3 vext2 <7,4,5,6>, <6,2,7,3>
+ 2583808514U, // <5,6,6,3>: Cost 3 vext1 <5,5,6,6>, <3,4,5,6>
+ 2583809334U, // <5,6,6,4>: Cost 3 vext1 <5,5,6,6>, RHS
+ 2583810062U, // <5,6,6,5>: Cost 3 vext1 <5,5,6,6>, <5,5,6,6>
+ 2638238520U, // <5,6,6,6>: Cost 3 vext2 <3,4,5,6>, <6,6,6,6>
+ 2973781302U, // <5,6,6,7>: Cost 3 vzipr <3,4,5,6>, RHS
+ 2973781303U, // <5,6,6,u>: Cost 3 vzipr <3,4,5,6>, RHS
+ 430358630U, // <5,6,7,0>: Cost 1 vext1 RHS, LHS
+ 1504101110U, // <5,6,7,1>: Cost 2 vext1 RHS, <1,0,3,2>
+ 1504101992U, // <5,6,7,2>: Cost 2 vext1 RHS, <2,2,2,2>
+ 1504102550U, // <5,6,7,3>: Cost 2 vext1 RHS, <3,0,1,2>
+ 430361910U, // <5,6,7,4>: Cost 1 vext1 RHS, RHS
+ 1504104390U, // <5,6,7,5>: Cost 2 vext1 RHS, <5,4,7,6>
+ 1504105272U, // <5,6,7,6>: Cost 2 vext1 RHS, <6,6,6,6>
+ 1504106092U, // <5,6,7,7>: Cost 2 vext1 RHS, <7,7,7,7>
+ 430364462U, // <5,6,7,u>: Cost 1 vext1 RHS, LHS
+ 430366822U, // <5,6,u,0>: Cost 1 vext1 RHS, LHS
+ 1564497710U, // <5,6,u,1>: Cost 2 vext2 <3,4,5,6>, LHS
+ 1504110184U, // <5,6,u,2>: Cost 2 vext1 RHS, <2,2,2,2>
+ 1504110742U, // <5,6,u,3>: Cost 2 vext1 RHS, <3,0,1,2>
+ 430370103U, // <5,6,u,4>: Cost 1 vext1 RHS, RHS
+ 1564498074U, // <5,6,u,5>: Cost 2 vext2 <3,4,5,6>, RHS
+ 1504113146U, // <5,6,u,6>: Cost 2 vext1 RHS, <6,2,7,3>
+ 1504113658U, // <5,6,u,7>: Cost 2 vext1 RHS, <7,0,1,2>
+ 430372654U, // <5,6,u,u>: Cost 1 vext1 RHS, LHS
+ 2625634304U, // <5,7,0,0>: Cost 3 vext2 <1,3,5,7>, <0,0,0,0>
+ 1551892582U, // <5,7,0,1>: Cost 2 vext2 <1,3,5,7>, LHS
+ 2625634468U, // <5,7,0,2>: Cost 3 vext2 <1,3,5,7>, <0,2,0,2>
+ 2571889247U, // <5,7,0,3>: Cost 3 vext1 <3,5,7,0>, <3,5,7,0>
+ 2625634642U, // <5,7,0,4>: Cost 3 vext2 <1,3,5,7>, <0,4,1,5>
+ 2595778728U, // <5,7,0,5>: Cost 3 vext1 <7,5,7,0>, <5,7,5,7>
+ 3699376639U, // <5,7,0,6>: Cost 4 vext2 <1,3,5,7>, <0,6,2,7>
+ 2260235715U, // <5,7,0,7>: Cost 3 vrev <7,5,7,0>
+ 1551893149U, // <5,7,0,u>: Cost 2 vext2 <1,3,5,7>, LHS
+ 2625635062U, // <5,7,1,0>: Cost 3 vext2 <1,3,5,7>, <1,0,3,2>
+ 2624308020U, // <5,7,1,1>: Cost 3 vext2 <1,1,5,7>, <1,1,1,1>
+ 2625635222U, // <5,7,1,2>: Cost 3 vext2 <1,3,5,7>, <1,2,3,0>
+ 1551893504U, // <5,7,1,3>: Cost 2 vext2 <1,3,5,7>, <1,3,5,7>
+ 2571898166U, // <5,7,1,4>: Cost 3 vext1 <3,5,7,1>, RHS
+ 2625635472U, // <5,7,1,5>: Cost 3 vext2 <1,3,5,7>, <1,5,3,7>
+ 2627626227U, // <5,7,1,6>: Cost 3 vext2 <1,6,5,7>, <1,6,5,7>
+ 3702031684U, // <5,7,1,7>: Cost 4 vext2 <1,7,5,7>, <1,7,5,7>
+ 1555211669U, // <5,7,1,u>: Cost 2 vext2 <1,u,5,7>, <1,u,5,7>
+ 2629617126U, // <5,7,2,0>: Cost 3 vext2 <2,0,5,7>, <2,0,5,7>
+ 3699377670U, // <5,7,2,1>: Cost 4 vext2 <1,3,5,7>, <2,1,0,3>
+ 2625635944U, // <5,7,2,2>: Cost 3 vext2 <1,3,5,7>, <2,2,2,2>
+ 2625636006U, // <5,7,2,3>: Cost 3 vext2 <1,3,5,7>, <2,3,0,1>
+ 2632271658U, // <5,7,2,4>: Cost 3 vext2 <2,4,5,7>, <2,4,5,7>
+ 2625636201U, // <5,7,2,5>: Cost 3 vext2 <1,3,5,7>, <2,5,3,7>
+ 2625636282U, // <5,7,2,6>: Cost 3 vext2 <1,3,5,7>, <2,6,3,7>
+ 3708004381U, // <5,7,2,7>: Cost 4 vext2 <2,7,5,7>, <2,7,5,7>
+ 2625636411U, // <5,7,2,u>: Cost 3 vext2 <1,3,5,7>, <2,u,0,1>
+ 2625636502U, // <5,7,3,0>: Cost 3 vext2 <1,3,5,7>, <3,0,1,2>
+ 2625636604U, // <5,7,3,1>: Cost 3 vext2 <1,3,5,7>, <3,1,3,5>
+ 3699378478U, // <5,7,3,2>: Cost 4 vext2 <1,3,5,7>, <3,2,0,1>
+ 2625636764U, // <5,7,3,3>: Cost 3 vext2 <1,3,5,7>, <3,3,3,3>
+ 2625636866U, // <5,7,3,4>: Cost 3 vext2 <1,3,5,7>, <3,4,5,6>
+ 2625636959U, // <5,7,3,5>: Cost 3 vext2 <1,3,5,7>, <3,5,7,0>
+ 3699378808U, // <5,7,3,6>: Cost 4 vext2 <1,3,5,7>, <3,6,0,7>
+ 2640235254U, // <5,7,3,7>: Cost 3 vext2 <3,7,5,7>, <3,7,5,7>
+ 2625637150U, // <5,7,3,u>: Cost 3 vext2 <1,3,5,7>, <3,u,1,2>
+ 2571919462U, // <5,7,4,0>: Cost 3 vext1 <3,5,7,4>, LHS
+ 2571920384U, // <5,7,4,1>: Cost 3 vext1 <3,5,7,4>, <1,3,5,7>
+ 3699379260U, // <5,7,4,2>: Cost 4 vext2 <1,3,5,7>, <4,2,6,0>
+ 2571922019U, // <5,7,4,3>: Cost 3 vext1 <3,5,7,4>, <3,5,7,4>
+ 2571922742U, // <5,7,4,4>: Cost 3 vext1 <3,5,7,4>, RHS
+ 1551895862U, // <5,7,4,5>: Cost 2 vext2 <1,3,5,7>, RHS
+ 2846277980U, // <5,7,4,6>: Cost 3 vuzpr RHS, <0,4,2,6>
+ 2646207951U, // <5,7,4,7>: Cost 3 vext2 <4,7,5,7>, <4,7,5,7>
+ 1551896105U, // <5,7,4,u>: Cost 2 vext2 <1,3,5,7>, RHS
+ 2583871590U, // <5,7,5,0>: Cost 3 vext1 <5,5,7,5>, LHS
+ 2652180176U, // <5,7,5,1>: Cost 3 vext2 <5,7,5,7>, <5,1,7,3>
+ 2625638177U, // <5,7,5,2>: Cost 3 vext2 <1,3,5,7>, <5,2,7,3>
+ 2625638262U, // <5,7,5,3>: Cost 3 vext2 <1,3,5,7>, <5,3,7,7>
+ 2583874870U, // <5,7,5,4>: Cost 3 vext1 <5,5,7,5>, RHS
+ 2846281732U, // <5,7,5,5>: Cost 3 vuzpr RHS, <5,5,5,5>
+ 2651517015U, // <5,7,5,6>: Cost 3 vext2 <5,6,5,7>, <5,6,5,7>
+ 1772539190U, // <5,7,5,7>: Cost 2 vuzpr RHS, RHS
+ 1772539191U, // <5,7,5,u>: Cost 2 vuzpr RHS, RHS
+ 2846281826U, // <5,7,6,0>: Cost 3 vuzpr RHS, <5,6,7,0>
+ 3699380615U, // <5,7,6,1>: Cost 4 vext2 <1,3,5,7>, <6,1,3,5>
+ 2846281108U, // <5,7,6,2>: Cost 3 vuzpr RHS, <4,6,u,2>
+ 2589854210U, // <5,7,6,3>: Cost 3 vext1 <6,5,7,6>, <3,4,5,6>
+ 2846281830U, // <5,7,6,4>: Cost 3 vuzpr RHS, <5,6,7,4>
+ 2725467658U, // <5,7,6,5>: Cost 3 vext3 <6,7,u,5>, <7,6,5,u>
+ 2846281076U, // <5,7,6,6>: Cost 3 vuzpr RHS, <4,6,4,6>
+ 2846279610U, // <5,7,6,7>: Cost 3 vuzpr RHS, <2,6,3,7>
+ 2846279611U, // <5,7,6,u>: Cost 3 vuzpr RHS, <2,6,3,u>
+ 1510146150U, // <5,7,7,0>: Cost 2 vext1 <5,5,7,7>, LHS
+ 2846282574U, // <5,7,7,1>: Cost 3 vuzpr RHS, <6,7,0,1>
+ 2583889512U, // <5,7,7,2>: Cost 3 vext1 <5,5,7,7>, <2,2,2,2>
+ 2846281919U, // <5,7,7,3>: Cost 3 vuzpr RHS, <5,7,u,3>
+ 1510149430U, // <5,7,7,4>: Cost 2 vext1 <5,5,7,7>, RHS
+ 1510150168U, // <5,7,7,5>: Cost 2 vext1 <5,5,7,7>, <5,5,7,7>
+ 2583892474U, // <5,7,7,6>: Cost 3 vext1 <5,5,7,7>, <6,2,7,3>
+ 2625640044U, // <5,7,7,7>: Cost 3 vext2 <1,3,5,7>, <7,7,7,7>
+ 1510151982U, // <5,7,7,u>: Cost 2 vext1 <5,5,7,7>, LHS
+ 1510154342U, // <5,7,u,0>: Cost 2 vext1 <5,5,7,u>, LHS
+ 1551898414U, // <5,7,u,1>: Cost 2 vext2 <1,3,5,7>, LHS
+ 2625640325U, // <5,7,u,2>: Cost 3 vext2 <1,3,5,7>, <u,2,3,0>
+ 1772536477U, // <5,7,u,3>: Cost 2 vuzpr RHS, LHS
+ 1510157622U, // <5,7,u,4>: Cost 2 vext1 <5,5,7,u>, RHS
+ 1551898778U, // <5,7,u,5>: Cost 2 vext2 <1,3,5,7>, RHS
+ 2625640656U, // <5,7,u,6>: Cost 3 vext2 <1,3,5,7>, <u,6,3,7>
+ 1772539433U, // <5,7,u,7>: Cost 2 vuzpr RHS, RHS
+ 1551898981U, // <5,7,u,u>: Cost 2 vext2 <1,3,5,7>, LHS
+ 2625642496U, // <5,u,0,0>: Cost 3 vext2 <1,3,5,u>, <0,0,0,0>
+ 1551900774U, // <5,u,0,1>: Cost 2 vext2 <1,3,5,u>, LHS
+ 2625642660U, // <5,u,0,2>: Cost 3 vext2 <1,3,5,u>, <0,2,0,2>
+ 2698630885U, // <5,u,0,3>: Cost 3 vext3 <2,3,4,5>, <u,0,3,2>
+ 2687129325U, // <5,u,0,4>: Cost 3 vext3 <0,4,1,5>, <u,0,4,1>
+ 2689783542U, // <5,u,0,5>: Cost 3 vext3 <0,u,1,5>, <u,0,5,1>
+ 2266134675U, // <5,u,0,6>: Cost 3 vrev <u,5,6,0>
+ 2595853772U, // <5,u,0,7>: Cost 3 vext1 <7,5,u,0>, <7,5,u,0>
+ 1551901341U, // <5,u,0,u>: Cost 2 vext2 <1,3,5,u>, LHS
+ 2625643254U, // <5,u,1,0>: Cost 3 vext2 <1,3,5,u>, <1,0,3,2>
+ 2625643316U, // <5,u,1,1>: Cost 3 vext2 <1,3,5,u>, <1,1,1,1>
+ 1613387566U, // <5,u,1,2>: Cost 2 vext3 <0,4,1,5>, LHS
+ 1551901697U, // <5,u,1,3>: Cost 2 vext2 <1,3,5,u>, <1,3,5,u>
+ 2626307154U, // <5,u,1,4>: Cost 3 vext2 <1,4,5,u>, <1,4,5,u>
+ 2689783622U, // <5,u,1,5>: Cost 3 vext3 <0,u,1,5>, <u,1,5,0>
+ 2627634420U, // <5,u,1,6>: Cost 3 vext2 <1,6,5,u>, <1,6,5,u>
+ 2982366536U, // <5,u,1,7>: Cost 3 vzipr <4,u,5,1>, RHS
+ 1613387620U, // <5,u,1,u>: Cost 2 vext3 <0,4,1,5>, LHS
+ 2846286742U, // <5,u,2,0>: Cost 3 vuzpr RHS, <1,2,3,0>
+ 2685796528U, // <5,u,2,1>: Cost 3 vext3 <0,2,1,5>, <0,2,1,5>
+ 2625644136U, // <5,u,2,2>: Cost 3 vext2 <1,3,5,u>, <2,2,2,2>
+ 2687129480U, // <5,u,2,3>: Cost 3 vext3 <0,4,1,5>, <u,2,3,3>
+ 2632279851U, // <5,u,2,4>: Cost 3 vext2 <2,4,5,u>, <2,4,5,u>
+ 2625644394U, // <5,u,2,5>: Cost 3 vext2 <1,3,5,u>, <2,5,3,u>
+ 2625644474U, // <5,u,2,6>: Cost 3 vext2 <1,3,5,u>, <2,6,3,7>
+ 2713966508U, // <5,u,2,7>: Cost 3 vext3 <4,u,5,5>, <u,2,7,3>
+ 2625644603U, // <5,u,2,u>: Cost 3 vext2 <1,3,5,u>, <2,u,0,1>
+ 2687129532U, // <5,u,3,0>: Cost 3 vext3 <0,4,1,5>, <u,3,0,1>
+ 2636261649U, // <5,u,3,1>: Cost 3 vext2 <3,1,5,u>, <3,1,5,u>
+ 2636925282U, // <5,u,3,2>: Cost 3 vext2 <3,2,5,u>, <3,2,5,u>
+ 2625644956U, // <5,u,3,3>: Cost 3 vext2 <1,3,5,u>, <3,3,3,3>
+ 1564510724U, // <5,u,3,4>: Cost 2 vext2 <3,4,5,u>, <3,4,5,u>
+ 2625645160U, // <5,u,3,5>: Cost 3 vext2 <1,3,5,u>, <3,5,u,0>
+ 2734610422U, // <5,u,3,6>: Cost 3 vext3 <u,3,6,5>, <u,3,6,5>
+ 2640243447U, // <5,u,3,7>: Cost 3 vext2 <3,7,5,u>, <3,7,5,u>
+ 1567165256U, // <5,u,3,u>: Cost 2 vext2 <3,u,5,u>, <3,u,5,u>
+ 1567828889U, // <5,u,4,0>: Cost 2 vext2 <4,0,5,u>, <4,0,5,u>
+ 1661163546U, // <5,u,4,1>: Cost 2 vext3 <u,4,1,5>, <u,4,1,5>
+ 2734463012U, // <5,u,4,2>: Cost 3 vext3 <u,3,4,5>, <u,4,2,6>
+ 2698631212U, // <5,u,4,3>: Cost 3 vext3 <2,3,4,5>, <u,4,3,5>
+ 1570458842U, // <5,u,4,4>: Cost 2 vext2 <4,4,5,5>, <4,4,5,5>
+ 1551904054U, // <5,u,4,5>: Cost 2 vext2 <1,3,5,u>, RHS
+ 2846286172U, // <5,u,4,6>: Cost 3 vuzpr RHS, <0,4,2,6>
+ 2646216144U, // <5,u,4,7>: Cost 3 vext2 <4,7,5,u>, <4,7,5,u>
+ 1551904297U, // <5,u,4,u>: Cost 2 vext2 <1,3,5,u>, RHS
+ 1509982310U, // <5,u,5,0>: Cost 2 vext1 <5,5,5,5>, LHS
+ 2560058555U, // <5,u,5,1>: Cost 3 vext1 <1,5,u,5>, <1,5,u,5>
+ 2698926194U, // <5,u,5,2>: Cost 3 vext3 <2,3,u,5>, <u,5,2,3>
+ 2698631295U, // <5,u,5,3>: Cost 3 vext3 <2,3,4,5>, <u,5,3,7>
+ 1509985590U, // <5,u,5,4>: Cost 2 vext1 <5,5,5,5>, RHS
+ 229035318U, // <5,u,5,5>: Cost 1 vdup1 RHS
+ 1613387930U, // <5,u,5,6>: Cost 2 vext3 <0,4,1,5>, RHS
+ 1772547382U, // <5,u,5,7>: Cost 2 vuzpr RHS, RHS
+ 229035318U, // <5,u,5,u>: Cost 1 vdup1 RHS
+ 2566037606U, // <5,u,6,0>: Cost 3 vext1 <2,5,u,6>, LHS
+ 2920044334U, // <5,u,6,1>: Cost 3 vzipl <5,6,7,0>, LHS
+ 2566039445U, // <5,u,6,2>: Cost 3 vext1 <2,5,u,6>, <2,5,u,6>
+ 2687129808U, // <5,u,6,3>: Cost 3 vext3 <0,4,1,5>, <u,6,3,7>
+ 2566040886U, // <5,u,6,4>: Cost 3 vext1 <2,5,u,6>, RHS
+ 2920044698U, // <5,u,6,5>: Cost 3 vzipl <5,6,7,0>, RHS
+ 2846289268U, // <5,u,6,6>: Cost 3 vuzpr RHS, <4,6,4,6>
+ 2973781320U, // <5,u,6,7>: Cost 3 vzipr <3,4,5,6>, RHS
+ 2687129853U, // <5,u,6,u>: Cost 3 vext3 <0,4,1,5>, <u,6,u,7>
+ 430506086U, // <5,u,7,0>: Cost 1 vext1 RHS, LHS
+ 1486333117U, // <5,u,7,1>: Cost 2 vext1 <1,5,u,7>, <1,5,u,7>
+ 1504249448U, // <5,u,7,2>: Cost 2 vext1 RHS, <2,2,2,2>
+ 2040971933U, // <5,u,7,3>: Cost 2 vtrnr RHS, LHS
+ 430509384U, // <5,u,7,4>: Cost 1 vext1 RHS, RHS
+ 1504251600U, // <5,u,7,5>: Cost 2 vext1 RHS, <5,1,7,3>
+ 118708378U, // <5,u,7,6>: Cost 1 vrev RHS
+ 2040974889U, // <5,u,7,7>: Cost 2 vtrnr RHS, RHS
+ 430511918U, // <5,u,7,u>: Cost 1 vext1 RHS, LHS
+ 430514278U, // <5,u,u,0>: Cost 1 vext1 RHS, LHS
+ 1551906606U, // <5,u,u,1>: Cost 2 vext2 <1,3,5,u>, LHS
+ 1613388133U, // <5,u,u,2>: Cost 2 vext3 <0,4,1,5>, LHS
+ 1772544669U, // <5,u,u,3>: Cost 2 vuzpr RHS, LHS
+ 430517577U, // <5,u,u,4>: Cost 1 vext1 RHS, RHS
+ 229035318U, // <5,u,u,5>: Cost 1 vdup1 RHS
+ 118716571U, // <5,u,u,6>: Cost 1 vrev RHS
+ 1772547625U, // <5,u,u,7>: Cost 2 vuzpr RHS, RHS
+ 430520110U, // <5,u,u,u>: Cost 1 vext1 RHS, LHS
+ 2686025728U, // <6,0,0,0>: Cost 3 vext3 <0,2,4,6>, <0,0,0,0>
+ 2686025738U, // <6,0,0,1>: Cost 3 vext3 <0,2,4,6>, <0,0,1,1>
+ 2686025748U, // <6,0,0,2>: Cost 3 vext3 <0,2,4,6>, <0,0,2,2>
+ 3779084320U, // <6,0,0,3>: Cost 4 vext3 <3,4,5,6>, <0,0,3,5>
+ 2642903388U, // <6,0,0,4>: Cost 3 vext2 <4,2,6,0>, <0,4,2,6>
+ 3657723939U, // <6,0,0,5>: Cost 4 vext1 <5,6,0,0>, <5,6,0,0>
+ 3926676514U, // <6,0,0,6>: Cost 4 vuzpr <5,6,7,0>, <7,0,5,6>
+ 3926675786U, // <6,0,0,7>: Cost 4 vuzpr <5,6,7,0>, <6,0,5,7>
+ 2686025802U, // <6,0,0,u>: Cost 3 vext3 <0,2,4,6>, <0,0,u,2>
+ 2566070374U, // <6,0,1,0>: Cost 3 vext1 <2,6,0,1>, LHS
+ 3759767642U, // <6,0,1,1>: Cost 4 vext3 <0,2,4,6>, <0,1,1,0>
+ 1612284006U, // <6,0,1,2>: Cost 2 vext3 <0,2,4,6>, LHS
+ 2583988738U, // <6,0,1,3>: Cost 3 vext1 <5,6,0,1>, <3,4,5,6>
+ 2566073654U, // <6,0,1,4>: Cost 3 vext1 <2,6,0,1>, RHS
+ 2583990308U, // <6,0,1,5>: Cost 3 vext1 <5,6,0,1>, <5,6,0,1>
+ 2589963005U, // <6,0,1,6>: Cost 3 vext1 <6,6,0,1>, <6,6,0,1>
+ 2595935702U, // <6,0,1,7>: Cost 3 vext1 <7,6,0,1>, <7,6,0,1>
+ 1612284060U, // <6,0,1,u>: Cost 2 vext3 <0,2,4,6>, LHS
+ 2686025892U, // <6,0,2,0>: Cost 3 vext3 <0,2,4,6>, <0,2,0,2>
+ 2685804721U, // <6,0,2,1>: Cost 3 vext3 <0,2,1,6>, <0,2,1,6>
+ 3759620282U, // <6,0,2,2>: Cost 4 vext3 <0,2,2,6>, <0,2,2,6>
+ 2705342658U, // <6,0,2,3>: Cost 3 vext3 <3,4,5,6>, <0,2,3,5>
+ 1612284108U, // <6,0,2,4>: Cost 2 vext3 <0,2,4,6>, <0,2,4,6>
+ 3706029956U, // <6,0,2,5>: Cost 4 vext2 <2,4,6,0>, <2,5,6,7>
+ 2686173406U, // <6,0,2,6>: Cost 3 vext3 <0,2,6,6>, <0,2,6,6>
+ 3651769338U, // <6,0,2,7>: Cost 4 vext1 <4,6,0,2>, <7,0,1,2>
+ 1612579056U, // <6,0,2,u>: Cost 2 vext3 <0,2,u,6>, <0,2,u,6>
+ 3706030230U, // <6,0,3,0>: Cost 4 vext2 <2,4,6,0>, <3,0,1,2>
+ 2705342720U, // <6,0,3,1>: Cost 3 vext3 <3,4,5,6>, <0,3,1,4>
+ 2705342730U, // <6,0,3,2>: Cost 3 vext3 <3,4,5,6>, <0,3,2,5>
+ 3706030492U, // <6,0,3,3>: Cost 4 vext2 <2,4,6,0>, <3,3,3,3>
+ 2644896258U, // <6,0,3,4>: Cost 3 vext2 <4,5,6,0>, <3,4,5,6>
+ 3718638154U, // <6,0,3,5>: Cost 4 vext2 <4,5,6,0>, <3,5,4,6>
+ 3729918619U, // <6,0,3,6>: Cost 4 vext2 <6,4,6,0>, <3,6,4,6>
+ 3926672384U, // <6,0,3,7>: Cost 4 vuzpr <5,6,7,0>, <1,3,5,7>
+ 2705342784U, // <6,0,3,u>: Cost 3 vext3 <3,4,5,6>, <0,3,u,5>
+ 2687058250U, // <6,0,4,0>: Cost 3 vext3 <0,4,0,6>, <0,4,0,6>
+ 2686026066U, // <6,0,4,1>: Cost 3 vext3 <0,2,4,6>, <0,4,1,5>
+ 1613463900U, // <6,0,4,2>: Cost 2 vext3 <0,4,2,6>, <0,4,2,6>
+ 3761021285U, // <6,0,4,3>: Cost 4 vext3 <0,4,3,6>, <0,4,3,6>
+ 2687353198U, // <6,0,4,4>: Cost 3 vext3 <0,4,4,6>, <0,4,4,6>
+ 2632289590U, // <6,0,4,5>: Cost 3 vext2 <2,4,6,0>, RHS
+ 2645560704U, // <6,0,4,6>: Cost 3 vext2 <4,6,6,0>, <4,6,6,0>
+ 2646224337U, // <6,0,4,7>: Cost 3 vext2 <4,7,6,0>, <4,7,6,0>
+ 1613906322U, // <6,0,4,u>: Cost 2 vext3 <0,4,u,6>, <0,4,u,6>
+ 3651788902U, // <6,0,5,0>: Cost 4 vext1 <4,6,0,5>, LHS
+ 2687795620U, // <6,0,5,1>: Cost 3 vext3 <0,5,1,6>, <0,5,1,6>
+ 3761611181U, // <6,0,5,2>: Cost 4 vext3 <0,5,2,6>, <0,5,2,6>
+ 3723284326U, // <6,0,5,3>: Cost 4 vext2 <5,3,6,0>, <5,3,6,0>
+ 2646224838U, // <6,0,5,4>: Cost 3 vext2 <4,7,6,0>, <5,4,7,6>
+ 3718639630U, // <6,0,5,5>: Cost 4 vext2 <4,5,6,0>, <5,5,6,6>
+ 2652196962U, // <6,0,5,6>: Cost 3 vext2 <5,7,6,0>, <5,6,7,0>
+ 2852932918U, // <6,0,5,7>: Cost 3 vuzpr <5,6,7,0>, RHS
+ 2852932919U, // <6,0,5,u>: Cost 3 vuzpr <5,6,7,0>, RHS
+ 2852933730U, // <6,0,6,0>: Cost 3 vuzpr <5,6,7,0>, <5,6,7,0>
+ 2925985894U, // <6,0,6,1>: Cost 3 vzipl <6,6,6,6>, LHS
+ 3060203622U, // <6,0,6,2>: Cost 3 vtrnl <6,6,6,6>, LHS
+ 3718640178U, // <6,0,6,3>: Cost 4 vext2 <4,5,6,0>, <6,3,4,5>
+ 2656178832U, // <6,0,6,4>: Cost 3 vext2 <6,4,6,0>, <6,4,6,0>
+ 3725939378U, // <6,0,6,5>: Cost 4 vext2 <5,7,6,0>, <6,5,0,7>
+ 2657506098U, // <6,0,6,6>: Cost 3 vext2 <6,6,6,0>, <6,6,6,0>
+ 2619020110U, // <6,0,6,7>: Cost 3 vext2 <0,2,6,0>, <6,7,0,1>
+ 2925986461U, // <6,0,6,u>: Cost 3 vzipl <6,6,6,6>, LHS
+ 2572091494U, // <6,0,7,0>: Cost 3 vext1 <3,6,0,7>, LHS
+ 2572092310U, // <6,0,7,1>: Cost 3 vext1 <3,6,0,7>, <1,2,3,0>
+ 2980495524U, // <6,0,7,2>: Cost 3 vzipr RHS, <0,2,0,2>
+ 2572094072U, // <6,0,7,3>: Cost 3 vext1 <3,6,0,7>, <3,6,0,7>
+ 2572094774U, // <6,0,7,4>: Cost 3 vext1 <3,6,0,7>, RHS
+ 4054238242U, // <6,0,7,5>: Cost 4 vzipr RHS, <1,4,0,5>
+ 3645837653U, // <6,0,7,6>: Cost 4 vext1 <3,6,0,7>, <6,0,7,0>
+ 4054239054U, // <6,0,7,7>: Cost 4 vzipr RHS, <2,5,0,7>
+ 2572097326U, // <6,0,7,u>: Cost 3 vext1 <3,6,0,7>, LHS
+ 2686026378U, // <6,0,u,0>: Cost 3 vext3 <0,2,4,6>, <0,u,0,2>
+ 2686026386U, // <6,0,u,1>: Cost 3 vext3 <0,2,4,6>, <0,u,1,1>
+ 1612284573U, // <6,0,u,2>: Cost 2 vext3 <0,2,4,6>, LHS
+ 2705343144U, // <6,0,u,3>: Cost 3 vext3 <3,4,5,6>, <0,u,3,5>
+ 1616265906U, // <6,0,u,4>: Cost 2 vext3 <0,u,4,6>, <0,u,4,6>
+ 2632292506U, // <6,0,u,5>: Cost 3 vext2 <2,4,6,0>, RHS
+ 2590020356U, // <6,0,u,6>: Cost 3 vext1 <6,6,0,u>, <6,6,0,u>
+ 2852933161U, // <6,0,u,7>: Cost 3 vuzpr <5,6,7,0>, RHS
+ 1612284627U, // <6,0,u,u>: Cost 2 vext3 <0,2,4,6>, LHS
+ 2595995750U, // <6,1,0,0>: Cost 3 vext1 <7,6,1,0>, LHS
+ 2646229094U, // <6,1,0,1>: Cost 3 vext2 <4,7,6,1>, LHS
+ 3694092492U, // <6,1,0,2>: Cost 4 vext2 <0,4,6,1>, <0,2,4,6>
+ 2686026486U, // <6,1,0,3>: Cost 3 vext3 <0,2,4,6>, <1,0,3,2>
+ 2595999030U, // <6,1,0,4>: Cost 3 vext1 <7,6,1,0>, RHS
+ 3767730952U, // <6,1,0,5>: Cost 4 vext3 <1,5,4,6>, <1,0,5,2>
+ 2596000590U, // <6,1,0,6>: Cost 3 vext1 <7,6,1,0>, <6,7,0,1>
+ 2596001246U, // <6,1,0,7>: Cost 3 vext1 <7,6,1,0>, <7,6,1,0>
+ 2686026531U, // <6,1,0,u>: Cost 3 vext3 <0,2,4,6>, <1,0,u,2>
+ 3763602219U, // <6,1,1,0>: Cost 4 vext3 <0,u,2,6>, <1,1,0,1>
+ 2686026548U, // <6,1,1,1>: Cost 3 vext3 <0,2,4,6>, <1,1,1,1>
+ 3764929346U, // <6,1,1,2>: Cost 4 vext3 <1,1,2,6>, <1,1,2,6>
+ 2686026568U, // <6,1,1,3>: Cost 3 vext3 <0,2,4,6>, <1,1,3,3>
+ 2691334996U, // <6,1,1,4>: Cost 3 vext3 <1,1,4,6>, <1,1,4,6>
+ 3760874332U, // <6,1,1,5>: Cost 4 vext3 <0,4,1,6>, <1,1,5,5>
+ 3765224294U, // <6,1,1,6>: Cost 4 vext3 <1,1,6,6>, <1,1,6,6>
+ 3669751263U, // <6,1,1,7>: Cost 4 vext1 <7,6,1,1>, <7,6,1,1>
+ 2686026613U, // <6,1,1,u>: Cost 3 vext3 <0,2,4,6>, <1,1,u,3>
+ 2554208358U, // <6,1,2,0>: Cost 3 vext1 <0,6,1,2>, LHS
+ 3763602311U, // <6,1,2,1>: Cost 4 vext3 <0,u,2,6>, <1,2,1,3>
+ 3639895971U, // <6,1,2,2>: Cost 4 vext1 <2,6,1,2>, <2,6,1,2>
+ 2686026646U, // <6,1,2,3>: Cost 3 vext3 <0,2,4,6>, <1,2,3,0>
+ 2554211638U, // <6,1,2,4>: Cost 3 vext1 <0,6,1,2>, RHS
+ 3760874411U, // <6,1,2,5>: Cost 4 vext3 <0,4,1,6>, <1,2,5,3>
+ 2554212858U, // <6,1,2,6>: Cost 3 vext1 <0,6,1,2>, <6,2,7,3>
+ 3802973114U, // <6,1,2,7>: Cost 4 vext3 <7,4,5,6>, <1,2,7,0>
+ 2686026691U, // <6,1,2,u>: Cost 3 vext3 <0,2,4,6>, <1,2,u,0>
+ 2566160486U, // <6,1,3,0>: Cost 3 vext1 <2,6,1,3>, LHS
+ 2686026712U, // <6,1,3,1>: Cost 3 vext3 <0,2,4,6>, <1,3,1,3>
+ 2686026724U, // <6,1,3,2>: Cost 3 vext3 <0,2,4,6>, <1,3,2,6>
+ 3759768552U, // <6,1,3,3>: Cost 4 vext3 <0,2,4,6>, <1,3,3,1>
+ 2692662262U, // <6,1,3,4>: Cost 3 vext3 <1,3,4,6>, <1,3,4,6>
+ 2686026752U, // <6,1,3,5>: Cost 3 vext3 <0,2,4,6>, <1,3,5,7>
+ 2590053128U, // <6,1,3,6>: Cost 3 vext1 <6,6,1,3>, <6,6,1,3>
+ 3663795194U, // <6,1,3,7>: Cost 4 vext1 <6,6,1,3>, <7,0,1,2>
+ 2686026775U, // <6,1,3,u>: Cost 3 vext3 <0,2,4,6>, <1,3,u,3>
+ 2641587099U, // <6,1,4,0>: Cost 3 vext2 <4,0,6,1>, <4,0,6,1>
+ 2693104684U, // <6,1,4,1>: Cost 3 vext3 <1,4,1,6>, <1,4,1,6>
+ 3639912357U, // <6,1,4,2>: Cost 4 vext1 <2,6,1,4>, <2,6,1,4>
+ 2687206462U, // <6,1,4,3>: Cost 3 vext3 <0,4,2,6>, <1,4,3,6>
+ 3633941814U, // <6,1,4,4>: Cost 4 vext1 <1,6,1,4>, RHS
+ 2693399632U, // <6,1,4,5>: Cost 3 vext3 <1,4,5,6>, <1,4,5,6>
+ 3765077075U, // <6,1,4,6>: Cost 4 vext3 <1,1,4,6>, <1,4,6,0>
+ 2646232530U, // <6,1,4,7>: Cost 3 vext2 <4,7,6,1>, <4,7,6,1>
+ 2687206507U, // <6,1,4,u>: Cost 3 vext3 <0,4,2,6>, <1,4,u,6>
+ 2647559796U, // <6,1,5,0>: Cost 3 vext2 <5,0,6,1>, <5,0,6,1>
+ 3765077118U, // <6,1,5,1>: Cost 4 vext3 <1,1,4,6>, <1,5,1,7>
+ 3767583878U, // <6,1,5,2>: Cost 4 vext3 <1,5,2,6>, <1,5,2,6>
+ 2686026896U, // <6,1,5,3>: Cost 3 vext3 <0,2,4,6>, <1,5,3,7>
+ 2693989528U, // <6,1,5,4>: Cost 3 vext3 <1,5,4,6>, <1,5,4,6>
+ 3767805089U, // <6,1,5,5>: Cost 4 vext3 <1,5,5,6>, <1,5,5,6>
+ 2652868706U, // <6,1,5,6>: Cost 3 vext2 <5,u,6,1>, <5,6,7,0>
+ 3908250934U, // <6,1,5,7>: Cost 4 vuzpr <2,6,0,1>, RHS
+ 2686026941U, // <6,1,5,u>: Cost 3 vext3 <0,2,4,6>, <1,5,u,7>
+ 2554241126U, // <6,1,6,0>: Cost 3 vext1 <0,6,1,6>, LHS
+ 3763602639U, // <6,1,6,1>: Cost 4 vext3 <0,u,2,6>, <1,6,1,7>
+ 3759547607U, // <6,1,6,2>: Cost 4 vext3 <0,2,1,6>, <1,6,2,6>
+ 3115221094U, // <6,1,6,3>: Cost 3 vtrnr <4,6,4,6>, LHS
+ 2554244406U, // <6,1,6,4>: Cost 3 vext1 <0,6,1,6>, RHS
+ 3760874739U, // <6,1,6,5>: Cost 4 vext3 <0,4,1,6>, <1,6,5,7>
+ 2554245944U, // <6,1,6,6>: Cost 3 vext1 <0,6,1,6>, <6,6,6,6>
+ 3719975758U, // <6,1,6,7>: Cost 4 vext2 <4,7,6,1>, <6,7,0,1>
+ 3115221099U, // <6,1,6,u>: Cost 3 vtrnr <4,6,4,6>, LHS
+ 2560221286U, // <6,1,7,0>: Cost 3 vext1 <1,6,1,7>, LHS
+ 2560222415U, // <6,1,7,1>: Cost 3 vext1 <1,6,1,7>, <1,6,1,7>
+ 2980497558U, // <6,1,7,2>: Cost 3 vzipr RHS, <3,0,1,2>
+ 3103211622U, // <6,1,7,3>: Cost 3 vtrnr <2,6,3,7>, LHS
+ 2560224566U, // <6,1,7,4>: Cost 3 vext1 <1,6,1,7>, RHS
+ 2980495698U, // <6,1,7,5>: Cost 3 vzipr RHS, <0,4,1,5>
+ 3633967526U, // <6,1,7,6>: Cost 4 vext1 <1,6,1,7>, <6,1,7,0>
+ 4054237686U, // <6,1,7,7>: Cost 4 vzipr RHS, <0,6,1,7>
+ 2560227118U, // <6,1,7,u>: Cost 3 vext1 <1,6,1,7>, LHS
+ 2560229478U, // <6,1,u,0>: Cost 3 vext1 <1,6,1,u>, LHS
+ 2686027117U, // <6,1,u,1>: Cost 3 vext3 <0,2,4,6>, <1,u,1,3>
+ 2686027129U, // <6,1,u,2>: Cost 3 vext3 <0,2,4,6>, <1,u,2,6>
+ 2686027132U, // <6,1,u,3>: Cost 3 vext3 <0,2,4,6>, <1,u,3,0>
+ 2687206795U, // <6,1,u,4>: Cost 3 vext3 <0,4,2,6>, <1,u,4,6>
+ 2686027157U, // <6,1,u,5>: Cost 3 vext3 <0,2,4,6>, <1,u,5,7>
+ 2590094093U, // <6,1,u,6>: Cost 3 vext1 <6,6,1,u>, <6,6,1,u>
+ 2596066790U, // <6,1,u,7>: Cost 3 vext1 <7,6,1,u>, <7,6,1,u>
+ 2686027177U, // <6,1,u,u>: Cost 3 vext3 <0,2,4,6>, <1,u,u,0>
+ 2646900736U, // <6,2,0,0>: Cost 3 vext2 <4,u,6,2>, <0,0,0,0>
+ 1573159014U, // <6,2,0,1>: Cost 2 vext2 <4,u,6,2>, LHS
+ 2646900900U, // <6,2,0,2>: Cost 3 vext2 <4,u,6,2>, <0,2,0,2>
+ 3759769037U, // <6,2,0,3>: Cost 4 vext3 <0,2,4,6>, <2,0,3,0>
+ 2641592668U, // <6,2,0,4>: Cost 3 vext2 <4,0,6,2>, <0,4,2,6>
+ 3779085794U, // <6,2,0,5>: Cost 4 vext3 <3,4,5,6>, <2,0,5,3>
+ 2686027244U, // <6,2,0,6>: Cost 3 vext3 <0,2,4,6>, <2,0,6,4>
+ 3669816807U, // <6,2,0,7>: Cost 4 vext1 <7,6,2,0>, <7,6,2,0>
+ 1573159581U, // <6,2,0,u>: Cost 2 vext2 <4,u,6,2>, LHS
+ 2230527897U, // <6,2,1,0>: Cost 3 vrev <2,6,0,1>
+ 2646901556U, // <6,2,1,1>: Cost 3 vext2 <4,u,6,2>, <1,1,1,1>
+ 2646901654U, // <6,2,1,2>: Cost 3 vext2 <4,u,6,2>, <1,2,3,0>
+ 2847047782U, // <6,2,1,3>: Cost 3 vuzpr <4,6,u,2>, LHS
+ 3771049517U, // <6,2,1,4>: Cost 4 vext3 <2,1,4,6>, <2,1,4,6>
+ 2646901904U, // <6,2,1,5>: Cost 3 vext2 <4,u,6,2>, <1,5,3,7>
+ 2686027324U, // <6,2,1,6>: Cost 3 vext3 <0,2,4,6>, <2,1,6,3>
+ 3669825000U, // <6,2,1,7>: Cost 4 vext1 <7,6,2,1>, <7,6,2,1>
+ 2231117793U, // <6,2,1,u>: Cost 3 vrev <2,6,u,1>
+ 3763603029U, // <6,2,2,0>: Cost 4 vext3 <0,u,2,6>, <2,2,0,1>
+ 3759769184U, // <6,2,2,1>: Cost 4 vext3 <0,2,4,6>, <2,2,1,3>
+ 2686027368U, // <6,2,2,2>: Cost 3 vext3 <0,2,4,6>, <2,2,2,2>
+ 2686027378U, // <6,2,2,3>: Cost 3 vext3 <0,2,4,6>, <2,2,3,3>
+ 2697971326U, // <6,2,2,4>: Cost 3 vext3 <2,2,4,6>, <2,2,4,6>
+ 3759769224U, // <6,2,2,5>: Cost 4 vext3 <0,2,4,6>, <2,2,5,7>
+ 2698118800U, // <6,2,2,6>: Cost 3 vext3 <2,2,6,6>, <2,2,6,6>
+ 3920794092U, // <6,2,2,7>: Cost 4 vuzpr <4,6,u,2>, <6,2,5,7>
+ 2686027423U, // <6,2,2,u>: Cost 3 vext3 <0,2,4,6>, <2,2,u,3>
+ 2686027430U, // <6,2,3,0>: Cost 3 vext3 <0,2,4,6>, <2,3,0,1>
+ 3759769262U, // <6,2,3,1>: Cost 4 vext3 <0,2,4,6>, <2,3,1,0>
+ 2698487485U, // <6,2,3,2>: Cost 3 vext3 <2,3,2,6>, <2,3,2,6>
+ 2705344196U, // <6,2,3,3>: Cost 3 vext3 <3,4,5,6>, <2,3,3,4>
+ 2686027470U, // <6,2,3,4>: Cost 3 vext3 <0,2,4,6>, <2,3,4,5>
+ 2698708696U, // <6,2,3,5>: Cost 3 vext3 <2,3,5,6>, <2,3,5,6>
+ 2724660961U, // <6,2,3,6>: Cost 3 vext3 <6,6,6,6>, <2,3,6,6>
+ 2729232104U, // <6,2,3,7>: Cost 3 vext3 <7,4,5,6>, <2,3,7,4>
+ 2686027502U, // <6,2,3,u>: Cost 3 vext3 <0,2,4,6>, <2,3,u,1>
+ 1567853468U, // <6,2,4,0>: Cost 2 vext2 <4,0,6,2>, <4,0,6,2>
+ 3759769351U, // <6,2,4,1>: Cost 4 vext3 <0,2,4,6>, <2,4,1,u>
+ 2699151118U, // <6,2,4,2>: Cost 3 vext3 <2,4,2,6>, <2,4,2,6>
+ 2686027543U, // <6,2,4,3>: Cost 3 vext3 <0,2,4,6>, <2,4,3,6>
+ 2699298592U, // <6,2,4,4>: Cost 3 vext3 <2,4,4,6>, <2,4,4,6>
+ 1573162294U, // <6,2,4,5>: Cost 2 vext2 <4,u,6,2>, RHS
+ 2686027564U, // <6,2,4,6>: Cost 3 vext3 <0,2,4,6>, <2,4,6,0>
+ 3719982547U, // <6,2,4,7>: Cost 4 vext2 <4,7,6,2>, <4,7,6,2>
+ 1573162532U, // <6,2,4,u>: Cost 2 vext2 <4,u,6,2>, <4,u,6,2>
+ 3779086154U, // <6,2,5,0>: Cost 4 vext3 <3,4,5,6>, <2,5,0,3>
+ 2646904528U, // <6,2,5,1>: Cost 3 vext2 <4,u,6,2>, <5,1,7,3>
+ 3759769440U, // <6,2,5,2>: Cost 4 vext3 <0,2,4,6>, <2,5,2,7>
+ 2699888488U, // <6,2,5,3>: Cost 3 vext3 <2,5,3,6>, <2,5,3,6>
+ 2230855617U, // <6,2,5,4>: Cost 3 vrev <2,6,4,5>
+ 2646904836U, // <6,2,5,5>: Cost 3 vext2 <4,u,6,2>, <5,5,5,5>
+ 2646904930U, // <6,2,5,6>: Cost 3 vext2 <4,u,6,2>, <5,6,7,0>
+ 2847051062U, // <6,2,5,7>: Cost 3 vuzpr <4,6,u,2>, RHS
+ 2700257173U, // <6,2,5,u>: Cost 3 vext3 <2,5,u,6>, <2,5,u,6>
+ 2687207321U, // <6,2,6,0>: Cost 3 vext3 <0,4,2,6>, <2,6,0,1>
+ 2686027684U, // <6,2,6,1>: Cost 3 vext3 <0,2,4,6>, <2,6,1,3>
+ 2566260656U, // <6,2,6,2>: Cost 3 vext1 <2,6,2,6>, <2,6,2,6>
+ 2685806522U, // <6,2,6,3>: Cost 3 vext3 <0,2,1,6>, <2,6,3,7>
+ 2687207361U, // <6,2,6,4>: Cost 3 vext3 <0,4,2,6>, <2,6,4,5>
+ 2686027724U, // <6,2,6,5>: Cost 3 vext3 <0,2,4,6>, <2,6,5,7>
+ 2646905656U, // <6,2,6,6>: Cost 3 vext2 <4,u,6,2>, <6,6,6,6>
+ 2646905678U, // <6,2,6,7>: Cost 3 vext2 <4,u,6,2>, <6,7,0,1>
+ 2686027751U, // <6,2,6,u>: Cost 3 vext3 <0,2,4,6>, <2,6,u,7>
+ 2554323046U, // <6,2,7,0>: Cost 3 vext1 <0,6,2,7>, LHS
+ 2572239606U, // <6,2,7,1>: Cost 3 vext1 <3,6,2,7>, <1,0,3,2>
+ 2566268849U, // <6,2,7,2>: Cost 3 vext1 <2,6,2,7>, <2,6,2,7>
+ 1906753638U, // <6,2,7,3>: Cost 2 vzipr RHS, LHS
+ 2554326326U, // <6,2,7,4>: Cost 3 vext1 <0,6,2,7>, RHS
+ 3304687564U, // <6,2,7,5>: Cost 4 vrev <2,6,5,7>
+ 2980495708U, // <6,2,7,6>: Cost 3 vzipr RHS, <0,4,2,6>
+ 2646906476U, // <6,2,7,7>: Cost 3 vext2 <4,u,6,2>, <7,7,7,7>
+ 1906753643U, // <6,2,7,u>: Cost 2 vzipr RHS, LHS
+ 1591744256U, // <6,2,u,0>: Cost 2 vext2 <u,0,6,2>, <u,0,6,2>
+ 1573164846U, // <6,2,u,1>: Cost 2 vext2 <4,u,6,2>, LHS
+ 2701805650U, // <6,2,u,2>: Cost 3 vext3 <2,u,2,6>, <2,u,2,6>
+ 1906761830U, // <6,2,u,3>: Cost 2 vzipr RHS, LHS
+ 2686027875U, // <6,2,u,4>: Cost 3 vext3 <0,2,4,6>, <2,u,4,5>
+ 1573165210U, // <6,2,u,5>: Cost 2 vext2 <4,u,6,2>, RHS
+ 2686322800U, // <6,2,u,6>: Cost 3 vext3 <0,2,u,6>, <2,u,6,0>
+ 2847051305U, // <6,2,u,7>: Cost 3 vuzpr <4,6,u,2>, RHS
+ 1906761835U, // <6,2,u,u>: Cost 2 vzipr RHS, LHS
+ 3759769739U, // <6,3,0,0>: Cost 4 vext3 <0,2,4,6>, <3,0,0,0>
+ 2686027926U, // <6,3,0,1>: Cost 3 vext3 <0,2,4,6>, <3,0,1,2>
+ 2686027937U, // <6,3,0,2>: Cost 3 vext3 <0,2,4,6>, <3,0,2,4>
+ 3640027286U, // <6,3,0,3>: Cost 4 vext1 <2,6,3,0>, <3,0,1,2>
+ 2687207601U, // <6,3,0,4>: Cost 3 vext3 <0,4,2,6>, <3,0,4,2>
+ 2705344698U, // <6,3,0,5>: Cost 3 vext3 <3,4,5,6>, <3,0,5,2>
+ 3663917847U, // <6,3,0,6>: Cost 4 vext1 <6,6,3,0>, <6,6,3,0>
+ 2237008560U, // <6,3,0,7>: Cost 3 vrev <3,6,7,0>
+ 2686027989U, // <6,3,0,u>: Cost 3 vext3 <0,2,4,6>, <3,0,u,2>
+ 3759769823U, // <6,3,1,0>: Cost 4 vext3 <0,2,4,6>, <3,1,0,3>
+ 3759769830U, // <6,3,1,1>: Cost 4 vext3 <0,2,4,6>, <3,1,1,1>
+ 3759769841U, // <6,3,1,2>: Cost 4 vext3 <0,2,4,6>, <3,1,2,3>
+ 3759769848U, // <6,3,1,3>: Cost 4 vext3 <0,2,4,6>, <3,1,3,1>
+ 2703280390U, // <6,3,1,4>: Cost 3 vext3 <3,1,4,6>, <3,1,4,6>
+ 3759769868U, // <6,3,1,5>: Cost 4 vext3 <0,2,4,6>, <3,1,5,3>
+ 3704063194U, // <6,3,1,6>: Cost 4 vext2 <2,1,6,3>, <1,6,3,0>
+ 3767732510U, // <6,3,1,7>: Cost 4 vext3 <1,5,4,6>, <3,1,7,3>
+ 2703280390U, // <6,3,1,u>: Cost 3 vext3 <3,1,4,6>, <3,1,4,6>
+ 3704063468U, // <6,3,2,0>: Cost 4 vext2 <2,1,6,3>, <2,0,6,4>
+ 2630321724U, // <6,3,2,1>: Cost 3 vext2 <2,1,6,3>, <2,1,6,3>
+ 3759769921U, // <6,3,2,2>: Cost 4 vext3 <0,2,4,6>, <3,2,2,2>
+ 3759769928U, // <6,3,2,3>: Cost 4 vext3 <0,2,4,6>, <3,2,3,0>
+ 3704063767U, // <6,3,2,4>: Cost 4 vext2 <2,1,6,3>, <2,4,3,6>
+ 3704063876U, // <6,3,2,5>: Cost 4 vext2 <2,1,6,3>, <2,5,6,7>
+ 2636957626U, // <6,3,2,6>: Cost 3 vext2 <3,2,6,3>, <2,6,3,7>
+ 3777907058U, // <6,3,2,7>: Cost 4 vext3 <3,2,7,6>, <3,2,7,6>
+ 2630321724U, // <6,3,2,u>: Cost 3 vext2 <2,1,6,3>, <2,1,6,3>
+ 3759769983U, // <6,3,3,0>: Cost 4 vext3 <0,2,4,6>, <3,3,0,1>
+ 3710036245U, // <6,3,3,1>: Cost 4 vext2 <3,1,6,3>, <3,1,6,3>
+ 2636958054U, // <6,3,3,2>: Cost 3 vext2 <3,2,6,3>, <3,2,6,3>
+ 2686028188U, // <6,3,3,3>: Cost 3 vext3 <0,2,4,6>, <3,3,3,3>
+ 2704607656U, // <6,3,3,4>: Cost 3 vext3 <3,3,4,6>, <3,3,4,6>
+ 3773041072U, // <6,3,3,5>: Cost 4 vext3 <2,4,4,6>, <3,3,5,5>
+ 3711363731U, // <6,3,3,6>: Cost 4 vext2 <3,3,6,3>, <3,6,3,7>
+ 3767732676U, // <6,3,3,7>: Cost 4 vext3 <1,5,4,6>, <3,3,7,7>
+ 2707999179U, // <6,3,3,u>: Cost 3 vext3 <3,u,5,6>, <3,3,u,5>
+ 2584232038U, // <6,3,4,0>: Cost 3 vext1 <5,6,3,4>, LHS
+ 2642267118U, // <6,3,4,1>: Cost 3 vext2 <4,1,6,3>, <4,1,6,3>
+ 2642930751U, // <6,3,4,2>: Cost 3 vext2 <4,2,6,3>, <4,2,6,3>
+ 2705197552U, // <6,3,4,3>: Cost 3 vext3 <3,4,3,6>, <3,4,3,6>
+ 2584235318U, // <6,3,4,4>: Cost 3 vext1 <5,6,3,4>, RHS
+ 1631603202U, // <6,3,4,5>: Cost 2 vext3 <3,4,5,6>, <3,4,5,6>
+ 2654211444U, // <6,3,4,6>: Cost 3 vext2 <6,1,6,3>, <4,6,4,6>
+ 2237041332U, // <6,3,4,7>: Cost 3 vrev <3,6,7,4>
+ 1631824413U, // <6,3,4,u>: Cost 2 vext3 <3,4,u,6>, <3,4,u,6>
+ 3640066150U, // <6,3,5,0>: Cost 4 vext1 <2,6,3,5>, LHS
+ 3772746288U, // <6,3,5,1>: Cost 4 vext3 <2,4,0,6>, <3,5,1,7>
+ 3640067790U, // <6,3,5,2>: Cost 4 vext1 <2,6,3,5>, <2,3,4,5>
+ 3773041216U, // <6,3,5,3>: Cost 4 vext3 <2,4,4,6>, <3,5,3,5>
+ 2705934922U, // <6,3,5,4>: Cost 3 vext3 <3,5,4,6>, <3,5,4,6>
+ 3773041236U, // <6,3,5,5>: Cost 4 vext3 <2,4,4,6>, <3,5,5,7>
+ 3779086940U, // <6,3,5,6>: Cost 4 vext3 <3,4,5,6>, <3,5,6,6>
+ 3767732831U, // <6,3,5,7>: Cost 4 vext3 <1,5,4,6>, <3,5,7,0>
+ 2706229870U, // <6,3,5,u>: Cost 3 vext3 <3,5,u,6>, <3,5,u,6>
+ 2602164326U, // <6,3,6,0>: Cost 3 vext1 <u,6,3,6>, LHS
+ 2654212512U, // <6,3,6,1>: Cost 3 vext2 <6,1,6,3>, <6,1,6,3>
+ 2566334393U, // <6,3,6,2>: Cost 3 vext1 <2,6,3,6>, <2,6,3,6>
+ 3704066588U, // <6,3,6,3>: Cost 4 vext2 <2,1,6,3>, <6,3,2,1>
+ 2602167524U, // <6,3,6,4>: Cost 3 vext1 <u,6,3,6>, <4,4,6,6>
+ 3710702321U, // <6,3,6,5>: Cost 4 vext2 <3,2,6,3>, <6,5,7,7>
+ 2724661933U, // <6,3,6,6>: Cost 3 vext3 <6,6,6,6>, <3,6,6,6>
+ 3710702465U, // <6,3,6,7>: Cost 4 vext2 <3,2,6,3>, <6,7,5,7>
+ 2602170158U, // <6,3,6,u>: Cost 3 vext1 <u,6,3,6>, LHS
+ 1492598886U, // <6,3,7,0>: Cost 2 vext1 <2,6,3,7>, LHS
+ 2560369889U, // <6,3,7,1>: Cost 3 vext1 <1,6,3,7>, <1,6,3,7>
+ 1492600762U, // <6,3,7,2>: Cost 2 vext1 <2,6,3,7>, <2,6,3,7>
+ 2566342806U, // <6,3,7,3>: Cost 3 vext1 <2,6,3,7>, <3,0,1,2>
+ 1492602166U, // <6,3,7,4>: Cost 2 vext1 <2,6,3,7>, RHS
+ 2602176208U, // <6,3,7,5>: Cost 3 vext1 <u,6,3,7>, <5,1,7,3>
+ 2566345210U, // <6,3,7,6>: Cost 3 vext1 <2,6,3,7>, <6,2,7,3>
+ 2980496528U, // <6,3,7,7>: Cost 3 vzipr RHS, <1,5,3,7>
+ 1492604718U, // <6,3,7,u>: Cost 2 vext1 <2,6,3,7>, LHS
+ 1492607078U, // <6,3,u,0>: Cost 2 vext1 <2,6,3,u>, LHS
+ 2686028574U, // <6,3,u,1>: Cost 3 vext3 <0,2,4,6>, <3,u,1,2>
+ 1492608955U, // <6,3,u,2>: Cost 2 vext1 <2,6,3,u>, <2,6,3,u>
+ 2566350998U, // <6,3,u,3>: Cost 3 vext1 <2,6,3,u>, <3,0,1,2>
+ 1492610358U, // <6,3,u,4>: Cost 2 vext1 <2,6,3,u>, RHS
+ 1634257734U, // <6,3,u,5>: Cost 2 vext3 <3,u,5,6>, <3,u,5,6>
+ 2566353489U, // <6,3,u,6>: Cost 3 vext1 <2,6,3,u>, <6,3,u,0>
+ 2980504720U, // <6,3,u,7>: Cost 3 vzipr RHS, <1,5,3,7>
+ 1492612910U, // <6,3,u,u>: Cost 2 vext1 <2,6,3,u>, LHS
+ 3703406592U, // <6,4,0,0>: Cost 4 vext2 <2,0,6,4>, <0,0,0,0>
+ 2629664870U, // <6,4,0,1>: Cost 3 vext2 <2,0,6,4>, LHS
+ 2629664972U, // <6,4,0,2>: Cost 3 vext2 <2,0,6,4>, <0,2,4,6>
+ 3779087232U, // <6,4,0,3>: Cost 4 vext3 <3,4,5,6>, <4,0,3,1>
+ 2642936156U, // <6,4,0,4>: Cost 3 vext2 <4,2,6,4>, <0,4,2,6>
+ 2712570770U, // <6,4,0,5>: Cost 3 vext3 <4,6,4,6>, <4,0,5,1>
+ 2687208348U, // <6,4,0,6>: Cost 3 vext3 <0,4,2,6>, <4,0,6,2>
+ 3316723081U, // <6,4,0,7>: Cost 4 vrev <4,6,7,0>
+ 2629665437U, // <6,4,0,u>: Cost 3 vext2 <2,0,6,4>, LHS
+ 2242473291U, // <6,4,1,0>: Cost 3 vrev <4,6,0,1>
+ 3700089652U, // <6,4,1,1>: Cost 4 vext2 <1,4,6,4>, <1,1,1,1>
+ 3703407510U, // <6,4,1,2>: Cost 4 vext2 <2,0,6,4>, <1,2,3,0>
+ 2852962406U, // <6,4,1,3>: Cost 3 vuzpr <5,6,7,4>, LHS
+ 3628166454U, // <6,4,1,4>: Cost 4 vext1 <0,6,4,1>, RHS
+ 3760876514U, // <6,4,1,5>: Cost 4 vext3 <0,4,1,6>, <4,1,5,0>
+ 2687208430U, // <6,4,1,6>: Cost 3 vext3 <0,4,2,6>, <4,1,6,3>
+ 3316731274U, // <6,4,1,7>: Cost 4 vrev <4,6,7,1>
+ 2243063187U, // <6,4,1,u>: Cost 3 vrev <4,6,u,1>
+ 2629666284U, // <6,4,2,0>: Cost 3 vext2 <2,0,6,4>, <2,0,6,4>
+ 3703408188U, // <6,4,2,1>: Cost 4 vext2 <2,0,6,4>, <2,1,6,3>
+ 3703408232U, // <6,4,2,2>: Cost 4 vext2 <2,0,6,4>, <2,2,2,2>
+ 3703408294U, // <6,4,2,3>: Cost 4 vext2 <2,0,6,4>, <2,3,0,1>
+ 2632320816U, // <6,4,2,4>: Cost 3 vext2 <2,4,6,4>, <2,4,6,4>
+ 2923384118U, // <6,4,2,5>: Cost 3 vzipl <6,2,7,3>, RHS
+ 2687208508U, // <6,4,2,6>: Cost 3 vext3 <0,4,2,6>, <4,2,6,0>
+ 3760950341U, // <6,4,2,7>: Cost 4 vext3 <0,4,2,6>, <4,2,7,0>
+ 2634975348U, // <6,4,2,u>: Cost 3 vext2 <2,u,6,4>, <2,u,6,4>
+ 3703408790U, // <6,4,3,0>: Cost 4 vext2 <2,0,6,4>, <3,0,1,2>
+ 3316305238U, // <6,4,3,1>: Cost 4 vrev <4,6,1,3>
+ 3703408947U, // <6,4,3,2>: Cost 4 vext2 <2,0,6,4>, <3,2,0,6>
+ 3703409052U, // <6,4,3,3>: Cost 4 vext2 <2,0,6,4>, <3,3,3,3>
+ 2644929026U, // <6,4,3,4>: Cost 3 vext2 <4,5,6,4>, <3,4,5,6>
+ 3718670922U, // <6,4,3,5>: Cost 4 vext2 <4,5,6,4>, <3,5,4,6>
+ 2705345682U, // <6,4,3,6>: Cost 3 vext3 <3,4,5,6>, <4,3,6,5>
+ 3926705152U, // <6,4,3,7>: Cost 4 vuzpr <5,6,7,4>, <1,3,5,7>
+ 2668817222U, // <6,4,3,u>: Cost 3 vext2 <u,5,6,4>, <3,u,5,6>
+ 2590277734U, // <6,4,4,0>: Cost 3 vext1 <6,6,4,4>, LHS
+ 3716017135U, // <6,4,4,1>: Cost 4 vext2 <4,1,6,4>, <4,1,6,4>
+ 2642938944U, // <6,4,4,2>: Cost 3 vext2 <4,2,6,4>, <4,2,6,4>
+ 3717344401U, // <6,4,4,3>: Cost 4 vext2 <4,3,6,4>, <4,3,6,4>
+ 2712571088U, // <6,4,4,4>: Cost 3 vext3 <4,6,4,6>, <4,4,4,4>
+ 2629668150U, // <6,4,4,5>: Cost 3 vext2 <2,0,6,4>, RHS
+ 1637649636U, // <6,4,4,6>: Cost 2 vext3 <4,4,6,6>, <4,4,6,6>
+ 2646257109U, // <6,4,4,7>: Cost 3 vext2 <4,7,6,4>, <4,7,6,4>
+ 1637649636U, // <6,4,4,u>: Cost 2 vext3 <4,4,6,6>, <4,4,6,6>
+ 2566398054U, // <6,4,5,0>: Cost 3 vext1 <2,6,4,5>, LHS
+ 3760876805U, // <6,4,5,1>: Cost 4 vext3 <0,4,1,6>, <4,5,1,3>
+ 2566399937U, // <6,4,5,2>: Cost 3 vext1 <2,6,4,5>, <2,6,4,5>
+ 2584316418U, // <6,4,5,3>: Cost 3 vext1 <5,6,4,5>, <3,4,5,6>
+ 2566401334U, // <6,4,5,4>: Cost 3 vext1 <2,6,4,5>, RHS
+ 2584318028U, // <6,4,5,5>: Cost 3 vext1 <5,6,4,5>, <5,6,4,5>
+ 1612287286U, // <6,4,5,6>: Cost 2 vext3 <0,2,4,6>, RHS
+ 2852965686U, // <6,4,5,7>: Cost 3 vuzpr <5,6,7,4>, RHS
+ 1612287304U, // <6,4,5,u>: Cost 2 vext3 <0,2,4,6>, RHS
+ 1504608358U, // <6,4,6,0>: Cost 2 vext1 <4,6,4,6>, LHS
+ 2578350838U, // <6,4,6,1>: Cost 3 vext1 <4,6,4,6>, <1,0,3,2>
+ 2578351720U, // <6,4,6,2>: Cost 3 vext1 <4,6,4,6>, <2,2,2,2>
+ 2578352278U, // <6,4,6,3>: Cost 3 vext1 <4,6,4,6>, <3,0,1,2>
+ 1504611638U, // <6,4,6,4>: Cost 2 vext1 <4,6,4,6>, RHS
+ 2578353872U, // <6,4,6,5>: Cost 3 vext1 <4,6,4,6>, <5,1,7,3>
+ 2578354682U, // <6,4,6,6>: Cost 3 vext1 <4,6,4,6>, <6,2,7,3>
+ 2578355194U, // <6,4,6,7>: Cost 3 vext1 <4,6,4,6>, <7,0,1,2>
+ 1504614190U, // <6,4,6,u>: Cost 2 vext1 <4,6,4,6>, LHS
+ 2572386406U, // <6,4,7,0>: Cost 3 vext1 <3,6,4,7>, LHS
+ 2572387226U, // <6,4,7,1>: Cost 3 vext1 <3,6,4,7>, <1,2,3,4>
+ 3640157902U, // <6,4,7,2>: Cost 4 vext1 <2,6,4,7>, <2,3,4,5>
+ 2572389020U, // <6,4,7,3>: Cost 3 vext1 <3,6,4,7>, <3,6,4,7>
+ 2572389686U, // <6,4,7,4>: Cost 3 vext1 <3,6,4,7>, RHS
+ 2980497102U, // <6,4,7,5>: Cost 3 vzipr RHS, <2,3,4,5>
+ 2980495564U, // <6,4,7,6>: Cost 3 vzipr RHS, <0,2,4,6>
+ 4054239090U, // <6,4,7,7>: Cost 4 vzipr RHS, <2,5,4,7>
+ 2572392238U, // <6,4,7,u>: Cost 3 vext1 <3,6,4,7>, LHS
+ 1504608358U, // <6,4,u,0>: Cost 2 vext1 <4,6,4,6>, LHS
+ 2629670702U, // <6,4,u,1>: Cost 3 vext2 <2,0,6,4>, LHS
+ 2566424516U, // <6,4,u,2>: Cost 3 vext1 <2,6,4,u>, <2,6,4,u>
+ 2584340994U, // <6,4,u,3>: Cost 3 vext1 <5,6,4,u>, <3,4,5,6>
+ 1640156694U, // <6,4,u,4>: Cost 2 vext3 <4,u,4,6>, <4,u,4,6>
+ 2629671066U, // <6,4,u,5>: Cost 3 vext2 <2,0,6,4>, RHS
+ 1612287529U, // <6,4,u,6>: Cost 2 vext3 <0,2,4,6>, RHS
+ 2852965929U, // <6,4,u,7>: Cost 3 vuzpr <5,6,7,4>, RHS
+ 1612287547U, // <6,4,u,u>: Cost 2 vext3 <0,2,4,6>, RHS
+ 3708723200U, // <6,5,0,0>: Cost 4 vext2 <2,u,6,5>, <0,0,0,0>
+ 2634981478U, // <6,5,0,1>: Cost 3 vext2 <2,u,6,5>, LHS
+ 3694125260U, // <6,5,0,2>: Cost 4 vext2 <0,4,6,5>, <0,2,4,6>
+ 3779087962U, // <6,5,0,3>: Cost 4 vext3 <3,4,5,6>, <5,0,3,2>
+ 3760877154U, // <6,5,0,4>: Cost 4 vext3 <0,4,1,6>, <5,0,4,1>
+ 4195110916U, // <6,5,0,5>: Cost 4 vtrnr <5,6,7,0>, <5,5,5,5>
+ 3696779775U, // <6,5,0,6>: Cost 4 vext2 <0,u,6,5>, <0,6,2,7>
+ 1175212130U, // <6,5,0,7>: Cost 2 vrev <5,6,7,0>
+ 1175285867U, // <6,5,0,u>: Cost 2 vrev <5,6,u,0>
+ 2248445988U, // <6,5,1,0>: Cost 3 vrev <5,6,0,1>
+ 3698107237U, // <6,5,1,1>: Cost 4 vext2 <1,1,6,5>, <1,1,6,5>
+ 3708724118U, // <6,5,1,2>: Cost 4 vext2 <2,u,6,5>, <1,2,3,0>
+ 3908575334U, // <6,5,1,3>: Cost 4 vuzpr <2,6,4,5>, LHS
+ 3716023376U, // <6,5,1,4>: Cost 4 vext2 <4,1,6,5>, <1,4,5,6>
+ 3708724368U, // <6,5,1,5>: Cost 4 vext2 <2,u,6,5>, <1,5,3,7>
+ 3767733960U, // <6,5,1,6>: Cost 4 vext3 <1,5,4,6>, <5,1,6,4>
+ 2712571600U, // <6,5,1,7>: Cost 3 vext3 <4,6,4,6>, <5,1,7,3>
+ 2712571609U, // <6,5,1,u>: Cost 3 vext3 <4,6,4,6>, <5,1,u,3>
+ 2578391142U, // <6,5,2,0>: Cost 3 vext1 <4,6,5,2>, LHS
+ 3704079934U, // <6,5,2,1>: Cost 4 vext2 <2,1,6,5>, <2,1,6,5>
+ 3708724840U, // <6,5,2,2>: Cost 4 vext2 <2,u,6,5>, <2,2,2,2>
+ 3705407182U, // <6,5,2,3>: Cost 4 vext2 <2,3,6,5>, <2,3,4,5>
+ 2578394422U, // <6,5,2,4>: Cost 3 vext1 <4,6,5,2>, RHS
+ 3717351272U, // <6,5,2,5>: Cost 4 vext2 <4,3,6,5>, <2,5,3,6>
+ 2634983354U, // <6,5,2,6>: Cost 3 vext2 <2,u,6,5>, <2,6,3,7>
+ 3115486518U, // <6,5,2,7>: Cost 3 vtrnr <4,6,u,2>, RHS
+ 2634983541U, // <6,5,2,u>: Cost 3 vext2 <2,u,6,5>, <2,u,6,5>
+ 3708725398U, // <6,5,3,0>: Cost 4 vext2 <2,u,6,5>, <3,0,1,2>
+ 3710052631U, // <6,5,3,1>: Cost 4 vext2 <3,1,6,5>, <3,1,6,5>
+ 3708725606U, // <6,5,3,2>: Cost 4 vext2 <2,u,6,5>, <3,2,6,3>
+ 3708725660U, // <6,5,3,3>: Cost 4 vext2 <2,u,6,5>, <3,3,3,3>
+ 2643610114U, // <6,5,3,4>: Cost 3 vext2 <4,3,6,5>, <3,4,5,6>
+ 3717352010U, // <6,5,3,5>: Cost 4 vext2 <4,3,6,5>, <3,5,4,6>
+ 3773632358U, // <6,5,3,6>: Cost 4 vext3 <2,5,3,6>, <5,3,6,0>
+ 2248978533U, // <6,5,3,7>: Cost 3 vrev <5,6,7,3>
+ 2249052270U, // <6,5,3,u>: Cost 3 vrev <5,6,u,3>
+ 2596323430U, // <6,5,4,0>: Cost 3 vext1 <7,6,5,4>, LHS
+ 3716025328U, // <6,5,4,1>: Cost 4 vext2 <4,1,6,5>, <4,1,6,5>
+ 3716688961U, // <6,5,4,2>: Cost 4 vext2 <4,2,6,5>, <4,2,6,5>
+ 2643610770U, // <6,5,4,3>: Cost 3 vext2 <4,3,6,5>, <4,3,6,5>
+ 2596326710U, // <6,5,4,4>: Cost 3 vext1 <7,6,5,4>, RHS
+ 2634984758U, // <6,5,4,5>: Cost 3 vext2 <2,u,6,5>, RHS
+ 3767734199U, // <6,5,4,6>: Cost 4 vext3 <1,5,4,6>, <5,4,6,0>
+ 1643696070U, // <6,5,4,7>: Cost 2 vext3 <5,4,7,6>, <5,4,7,6>
+ 1643769807U, // <6,5,4,u>: Cost 2 vext3 <5,4,u,6>, <5,4,u,6>
+ 2578415718U, // <6,5,5,0>: Cost 3 vext1 <4,6,5,5>, LHS
+ 3652158198U, // <6,5,5,1>: Cost 4 vext1 <4,6,5,5>, <1,0,3,2>
+ 3652159080U, // <6,5,5,2>: Cost 4 vext1 <4,6,5,5>, <2,2,2,2>
+ 3652159638U, // <6,5,5,3>: Cost 4 vext1 <4,6,5,5>, <3,0,1,2>
+ 2578418998U, // <6,5,5,4>: Cost 3 vext1 <4,6,5,5>, RHS
+ 2712571908U, // <6,5,5,5>: Cost 3 vext3 <4,6,4,6>, <5,5,5,5>
+ 2718027790U, // <6,5,5,6>: Cost 3 vext3 <5,5,6,6>, <5,5,6,6>
+ 2712571928U, // <6,5,5,7>: Cost 3 vext3 <4,6,4,6>, <5,5,7,7>
+ 2712571937U, // <6,5,5,u>: Cost 3 vext3 <4,6,4,6>, <5,5,u,7>
+ 2705346596U, // <6,5,6,0>: Cost 3 vext3 <3,4,5,6>, <5,6,0,1>
+ 3767144496U, // <6,5,6,1>: Cost 4 vext3 <1,4,5,6>, <5,6,1,4>
+ 3773116473U, // <6,5,6,2>: Cost 4 vext3 <2,4,5,6>, <5,6,2,4>
+ 2705346626U, // <6,5,6,3>: Cost 3 vext3 <3,4,5,6>, <5,6,3,4>
+ 2705346636U, // <6,5,6,4>: Cost 3 vext3 <3,4,5,6>, <5,6,4,5>
+ 3908577217U, // <6,5,6,5>: Cost 4 vuzpr <2,6,4,5>, <2,6,4,5>
+ 2578428728U, // <6,5,6,6>: Cost 3 vext1 <4,6,5,6>, <6,6,6,6>
+ 2712572002U, // <6,5,6,7>: Cost 3 vext3 <4,6,4,6>, <5,6,7,0>
+ 2705346668U, // <6,5,6,u>: Cost 3 vext3 <3,4,5,6>, <5,6,u,1>
+ 2560516198U, // <6,5,7,0>: Cost 3 vext1 <1,6,5,7>, LHS
+ 2560517363U, // <6,5,7,1>: Cost 3 vext1 <1,6,5,7>, <1,6,5,7>
+ 2566490060U, // <6,5,7,2>: Cost 3 vext1 <2,6,5,7>, <2,6,5,7>
+ 3634260118U, // <6,5,7,3>: Cost 4 vext1 <1,6,5,7>, <3,0,1,2>
+ 2560519478U, // <6,5,7,4>: Cost 3 vext1 <1,6,5,7>, RHS
+ 2980498650U, // <6,5,7,5>: Cost 3 vzipr RHS, <4,4,5,5>
+ 2980497922U, // <6,5,7,6>: Cost 3 vzipr RHS, <3,4,5,6>
+ 3103214902U, // <6,5,7,7>: Cost 3 vtrnr <2,6,3,7>, RHS
+ 2560522030U, // <6,5,7,u>: Cost 3 vext1 <1,6,5,7>, LHS
+ 2560524390U, // <6,5,u,0>: Cost 3 vext1 <1,6,5,u>, LHS
+ 2560525556U, // <6,5,u,1>: Cost 3 vext1 <1,6,5,u>, <1,6,5,u>
+ 2566498253U, // <6,5,u,2>: Cost 3 vext1 <2,6,5,u>, <2,6,5,u>
+ 2646931439U, // <6,5,u,3>: Cost 3 vext2 <4,u,6,5>, <u,3,5,7>
+ 2560527670U, // <6,5,u,4>: Cost 3 vext1 <1,6,5,u>, RHS
+ 2634987674U, // <6,5,u,5>: Cost 3 vext2 <2,u,6,5>, RHS
+ 2980506114U, // <6,5,u,6>: Cost 3 vzipr RHS, <3,4,5,6>
+ 1175277674U, // <6,5,u,7>: Cost 2 vrev <5,6,7,u>
+ 1175351411U, // <6,5,u,u>: Cost 2 vrev <5,6,u,u>
+ 2578448486U, // <6,6,0,0>: Cost 3 vext1 <4,6,6,0>, LHS
+ 1573191782U, // <6,6,0,1>: Cost 2 vext2 <4,u,6,6>, LHS
+ 2686030124U, // <6,6,0,2>: Cost 3 vext3 <0,2,4,6>, <6,0,2,4>
+ 3779088690U, // <6,6,0,3>: Cost 4 vext3 <3,4,5,6>, <6,0,3,1>
+ 2687209788U, // <6,6,0,4>: Cost 3 vext3 <0,4,2,6>, <6,0,4,2>
+ 3652194000U, // <6,6,0,5>: Cost 4 vext1 <4,6,6,0>, <5,1,7,3>
+ 2254852914U, // <6,6,0,6>: Cost 3 vrev <6,6,6,0>
+ 4041575734U, // <6,6,0,7>: Cost 4 vzipr <2,4,6,0>, RHS
+ 1573192349U, // <6,6,0,u>: Cost 2 vext2 <4,u,6,6>, LHS
+ 2646934262U, // <6,6,1,0>: Cost 3 vext2 <4,u,6,6>, <1,0,3,2>
+ 2646934324U, // <6,6,1,1>: Cost 3 vext2 <4,u,6,6>, <1,1,1,1>
+ 2646934422U, // <6,6,1,2>: Cost 3 vext2 <4,u,6,6>, <1,2,3,0>
+ 2846785638U, // <6,6,1,3>: Cost 3 vuzpr <4,6,4,6>, LHS
+ 3760951694U, // <6,6,1,4>: Cost 4 vext3 <0,4,2,6>, <6,1,4,3>
+ 2646934672U, // <6,6,1,5>: Cost 3 vext2 <4,u,6,6>, <1,5,3,7>
+ 2712572320U, // <6,6,1,6>: Cost 3 vext3 <4,6,4,6>, <6,1,6,3>
+ 3775549865U, // <6,6,1,7>: Cost 4 vext3 <2,u,2,6>, <6,1,7,3>
+ 2846785643U, // <6,6,1,u>: Cost 3 vuzpr <4,6,4,6>, LHS
+ 3759772094U, // <6,6,2,0>: Cost 4 vext3 <0,2,4,6>, <6,2,0,6>
+ 3704751676U, // <6,6,2,1>: Cost 4 vext2 <2,2,6,6>, <2,1,6,3>
+ 2631009936U, // <6,6,2,2>: Cost 3 vext2 <2,2,6,6>, <2,2,6,6>
+ 2646935206U, // <6,6,2,3>: Cost 3 vext2 <4,u,6,6>, <2,3,0,1>
+ 3759772127U, // <6,6,2,4>: Cost 4 vext3 <0,2,4,6>, <6,2,4,3>
+ 3704752004U, // <6,6,2,5>: Cost 4 vext2 <2,2,6,6>, <2,5,6,7>
+ 2646935482U, // <6,6,2,6>: Cost 3 vext2 <4,u,6,6>, <2,6,3,7>
+ 2712572410U, // <6,6,2,7>: Cost 3 vext3 <4,6,4,6>, <6,2,7,3>
+ 2712572419U, // <6,6,2,u>: Cost 3 vext3 <4,6,4,6>, <6,2,u,3>
+ 2646935702U, // <6,6,3,0>: Cost 3 vext2 <4,u,6,6>, <3,0,1,2>
+ 3777024534U, // <6,6,3,1>: Cost 4 vext3 <3,1,4,6>, <6,3,1,4>
+ 3704752453U, // <6,6,3,2>: Cost 4 vext2 <2,2,6,6>, <3,2,2,6>
+ 2646935964U, // <6,6,3,3>: Cost 3 vext2 <4,u,6,6>, <3,3,3,3>
+ 2705347122U, // <6,6,3,4>: Cost 3 vext3 <3,4,5,6>, <6,3,4,5>
+ 3779678778U, // <6,6,3,5>: Cost 4 vext3 <3,5,4,6>, <6,3,5,4>
+ 2657553069U, // <6,6,3,6>: Cost 3 vext2 <6,6,6,6>, <3,6,6,6>
+ 4039609654U, // <6,6,3,7>: Cost 4 vzipr <2,1,6,3>, RHS
+ 2708001366U, // <6,6,3,u>: Cost 3 vext3 <3,u,5,6>, <6,3,u,5>
+ 2578481254U, // <6,6,4,0>: Cost 3 vext1 <4,6,6,4>, LHS
+ 3652223734U, // <6,6,4,1>: Cost 4 vext1 <4,6,6,4>, <1,0,3,2>
+ 3760951922U, // <6,6,4,2>: Cost 4 vext3 <0,4,2,6>, <6,4,2,6>
+ 3779089019U, // <6,6,4,3>: Cost 4 vext3 <3,4,5,6>, <6,4,3,6>
+ 1570540772U, // <6,6,4,4>: Cost 2 vext2 <4,4,6,6>, <4,4,6,6>
+ 1573195062U, // <6,6,4,5>: Cost 2 vext2 <4,u,6,6>, RHS
+ 2712572560U, // <6,6,4,6>: Cost 3 vext3 <4,6,4,6>, <6,4,6,0>
+ 2723410591U, // <6,6,4,7>: Cost 3 vext3 <6,4,7,6>, <6,4,7,6>
+ 1573195304U, // <6,6,4,u>: Cost 2 vext2 <4,u,6,6>, <4,u,6,6>
+ 3640287334U, // <6,6,5,0>: Cost 4 vext1 <2,6,6,5>, LHS
+ 2646937296U, // <6,6,5,1>: Cost 3 vext2 <4,u,6,6>, <5,1,7,3>
+ 3640289235U, // <6,6,5,2>: Cost 4 vext1 <2,6,6,5>, <2,6,6,5>
+ 3720679279U, // <6,6,5,3>: Cost 4 vext2 <4,u,6,6>, <5,3,7,0>
+ 2646937542U, // <6,6,5,4>: Cost 3 vext2 <4,u,6,6>, <5,4,7,6>
+ 2646937604U, // <6,6,5,5>: Cost 3 vext2 <4,u,6,6>, <5,5,5,5>
+ 2646937698U, // <6,6,5,6>: Cost 3 vext2 <4,u,6,6>, <5,6,7,0>
+ 2846788918U, // <6,6,5,7>: Cost 3 vuzpr <4,6,4,6>, RHS
+ 2846788919U, // <6,6,5,u>: Cost 3 vuzpr <4,6,4,6>, RHS
+ 1516699750U, // <6,6,6,0>: Cost 2 vext1 <6,6,6,6>, LHS
+ 2590442230U, // <6,6,6,1>: Cost 3 vext1 <6,6,6,6>, <1,0,3,2>
+ 2646938106U, // <6,6,6,2>: Cost 3 vext2 <4,u,6,6>, <6,2,7,3>
+ 2590443670U, // <6,6,6,3>: Cost 3 vext1 <6,6,6,6>, <3,0,1,2>
+ 1516703030U, // <6,6,6,4>: Cost 2 vext1 <6,6,6,6>, RHS
+ 2590445264U, // <6,6,6,5>: Cost 3 vext1 <6,6,6,6>, <5,1,7,3>
+ 296144182U, // <6,6,6,6>: Cost 1 vdup2 RHS
+ 2712572738U, // <6,6,6,7>: Cost 3 vext3 <4,6,4,6>, <6,6,7,7>
+ 296144182U, // <6,6,6,u>: Cost 1 vdup2 RHS
+ 2566561894U, // <6,6,7,0>: Cost 3 vext1 <2,6,6,7>, LHS
+ 3634332924U, // <6,6,7,1>: Cost 4 vext1 <1,6,6,7>, <1,6,6,7>
+ 2566563797U, // <6,6,7,2>: Cost 3 vext1 <2,6,6,7>, <2,6,6,7>
+ 2584480258U, // <6,6,7,3>: Cost 3 vext1 <5,6,6,7>, <3,4,5,6>
+ 2566565174U, // <6,6,7,4>: Cost 3 vext1 <2,6,6,7>, RHS
+ 2717438846U, // <6,6,7,5>: Cost 3 vext3 <5,4,7,6>, <6,7,5,4>
+ 2980500280U, // <6,6,7,6>: Cost 3 vzipr RHS, <6,6,6,6>
+ 1906756918U, // <6,6,7,7>: Cost 2 vzipr RHS, RHS
+ 1906756919U, // <6,6,7,u>: Cost 2 vzipr RHS, RHS
+ 1516699750U, // <6,6,u,0>: Cost 2 vext1 <6,6,6,6>, LHS
+ 1573197614U, // <6,6,u,1>: Cost 2 vext2 <4,u,6,6>, LHS
+ 2566571990U, // <6,6,u,2>: Cost 3 vext1 <2,6,6,u>, <2,6,6,u>
+ 2846786205U, // <6,6,u,3>: Cost 3 vuzpr <4,6,4,6>, LHS
+ 1516703030U, // <6,6,u,4>: Cost 2 vext1 <6,6,6,6>, RHS
+ 1573197978U, // <6,6,u,5>: Cost 2 vext2 <4,u,6,6>, RHS
+ 296144182U, // <6,6,u,6>: Cost 1 vdup2 RHS
+ 1906765110U, // <6,6,u,7>: Cost 2 vzipr RHS, RHS
+ 296144182U, // <6,6,u,u>: Cost 1 vdup2 RHS
+ 1571209216U, // <6,7,0,0>: Cost 2 vext2 RHS, <0,0,0,0>
+ 497467494U, // <6,7,0,1>: Cost 1 vext2 RHS, LHS
+ 1571209380U, // <6,7,0,2>: Cost 2 vext2 RHS, <0,2,0,2>
+ 2644951292U, // <6,7,0,3>: Cost 3 vext2 RHS, <0,3,1,0>
+ 1571209554U, // <6,7,0,4>: Cost 2 vext2 RHS, <0,4,1,5>
+ 1510756450U, // <6,7,0,5>: Cost 2 vext1 <5,6,7,0>, <5,6,7,0>
+ 2644951542U, // <6,7,0,6>: Cost 3 vext2 RHS, <0,6,1,7>
+ 2584499194U, // <6,7,0,7>: Cost 3 vext1 <5,6,7,0>, <7,0,1,2>
+ 497468061U, // <6,7,0,u>: Cost 1 vext2 RHS, LHS
+ 1571209974U, // <6,7,1,0>: Cost 2 vext2 RHS, <1,0,3,2>
+ 1571210036U, // <6,7,1,1>: Cost 2 vext2 RHS, <1,1,1,1>
+ 1571210134U, // <6,7,1,2>: Cost 2 vext2 RHS, <1,2,3,0>
+ 1571210200U, // <6,7,1,3>: Cost 2 vext2 RHS, <1,3,1,3>
+ 2644952098U, // <6,7,1,4>: Cost 3 vext2 RHS, <1,4,0,5>
+ 1571210384U, // <6,7,1,5>: Cost 2 vext2 RHS, <1,5,3,7>
+ 2644952271U, // <6,7,1,6>: Cost 3 vext2 RHS, <1,6,1,7>
+ 2578535418U, // <6,7,1,7>: Cost 3 vext1 <4,6,7,1>, <7,0,1,2>
+ 1571210605U, // <6,7,1,u>: Cost 2 vext2 RHS, <1,u,1,3>
+ 2644952509U, // <6,7,2,0>: Cost 3 vext2 RHS, <2,0,1,2>
+ 2644952582U, // <6,7,2,1>: Cost 3 vext2 RHS, <2,1,0,3>
+ 1571210856U, // <6,7,2,2>: Cost 2 vext2 RHS, <2,2,2,2>
+ 1571210918U, // <6,7,2,3>: Cost 2 vext2 RHS, <2,3,0,1>
+ 2644952828U, // <6,7,2,4>: Cost 3 vext2 RHS, <2,4,0,6>
+ 2633009028U, // <6,7,2,5>: Cost 3 vext2 <2,5,6,7>, <2,5,6,7>
+ 1571211194U, // <6,7,2,6>: Cost 2 vext2 RHS, <2,6,3,7>
+ 2668840938U, // <6,7,2,7>: Cost 3 vext2 RHS, <2,7,0,1>
+ 1571211323U, // <6,7,2,u>: Cost 2 vext2 RHS, <2,u,0,1>
+ 1571211414U, // <6,7,3,0>: Cost 2 vext2 RHS, <3,0,1,2>
+ 2644953311U, // <6,7,3,1>: Cost 3 vext2 RHS, <3,1,0,3>
+ 2644953390U, // <6,7,3,2>: Cost 3 vext2 RHS, <3,2,0,1>
+ 1571211676U, // <6,7,3,3>: Cost 2 vext2 RHS, <3,3,3,3>
+ 1571211778U, // <6,7,3,4>: Cost 2 vext2 RHS, <3,4,5,6>
+ 2644953648U, // <6,7,3,5>: Cost 3 vext2 RHS, <3,5,1,7>
+ 2644953720U, // <6,7,3,6>: Cost 3 vext2 RHS, <3,6,0,7>
+ 2644953795U, // <6,7,3,7>: Cost 3 vext2 RHS, <3,7,0,1>
+ 1571212062U, // <6,7,3,u>: Cost 2 vext2 RHS, <3,u,1,2>
+ 1573202834U, // <6,7,4,0>: Cost 2 vext2 RHS, <4,0,5,1>
+ 2644954058U, // <6,7,4,1>: Cost 3 vext2 RHS, <4,1,2,3>
+ 2644954166U, // <6,7,4,2>: Cost 3 vext2 RHS, <4,2,5,3>
+ 2644954258U, // <6,7,4,3>: Cost 3 vext2 RHS, <4,3,6,5>
+ 1571212496U, // <6,7,4,4>: Cost 2 vext2 RHS, <4,4,4,4>
+ 497470774U, // <6,7,4,5>: Cost 1 vext2 RHS, RHS
+ 1573203316U, // <6,7,4,6>: Cost 2 vext2 RHS, <4,6,4,6>
+ 2646281688U, // <6,7,4,7>: Cost 3 vext2 <4,7,6,7>, <4,7,6,7>
+ 497471017U, // <6,7,4,u>: Cost 1 vext2 RHS, RHS
+ 2644954696U, // <6,7,5,0>: Cost 3 vext2 RHS, <5,0,1,2>
+ 1573203664U, // <6,7,5,1>: Cost 2 vext2 RHS, <5,1,7,3>
+ 2644954878U, // <6,7,5,2>: Cost 3 vext2 RHS, <5,2,3,4>
+ 2644954991U, // <6,7,5,3>: Cost 3 vext2 RHS, <5,3,7,0>
+ 1571213254U, // <6,7,5,4>: Cost 2 vext2 RHS, <5,4,7,6>
+ 1571213316U, // <6,7,5,5>: Cost 2 vext2 RHS, <5,5,5,5>
+ 1571213410U, // <6,7,5,6>: Cost 2 vext2 RHS, <5,6,7,0>
+ 1573204136U, // <6,7,5,7>: Cost 2 vext2 RHS, <5,7,5,7>
+ 1573204217U, // <6,7,5,u>: Cost 2 vext2 RHS, <5,u,5,7>
+ 2644955425U, // <6,7,6,0>: Cost 3 vext2 RHS, <6,0,1,2>
+ 2644955561U, // <6,7,6,1>: Cost 3 vext2 RHS, <6,1,7,3>
+ 1573204474U, // <6,7,6,2>: Cost 2 vext2 RHS, <6,2,7,3>
+ 2644955698U, // <6,7,6,3>: Cost 3 vext2 RHS, <6,3,4,5>
+ 2644955789U, // <6,7,6,4>: Cost 3 vext2 RHS, <6,4,5,6>
+ 2644955889U, // <6,7,6,5>: Cost 3 vext2 RHS, <6,5,7,7>
+ 1571214136U, // <6,7,6,6>: Cost 2 vext2 RHS, <6,6,6,6>
+ 1571214158U, // <6,7,6,7>: Cost 2 vext2 RHS, <6,7,0,1>
+ 1573204895U, // <6,7,6,u>: Cost 2 vext2 RHS, <6,u,0,1>
+ 1573204986U, // <6,7,7,0>: Cost 2 vext2 RHS, <7,0,1,2>
+ 2572608656U, // <6,7,7,1>: Cost 3 vext1 <3,6,7,7>, <1,5,3,7>
+ 2644956362U, // <6,7,7,2>: Cost 3 vext2 RHS, <7,2,6,3>
+ 2572610231U, // <6,7,7,3>: Cost 3 vext1 <3,6,7,7>, <3,6,7,7>
+ 1573205350U, // <6,7,7,4>: Cost 2 vext2 RHS, <7,4,5,6>
+ 2646947220U, // <6,7,7,5>: Cost 3 vext2 RHS, <7,5,1,7>
+ 1516786498U, // <6,7,7,6>: Cost 2 vext1 <6,6,7,7>, <6,6,7,7>
+ 1571214956U, // <6,7,7,7>: Cost 2 vext2 RHS, <7,7,7,7>
+ 1573205634U, // <6,7,7,u>: Cost 2 vext2 RHS, <7,u,1,2>
+ 1571215059U, // <6,7,u,0>: Cost 2 vext2 RHS, <u,0,1,2>
+ 497473326U, // <6,7,u,1>: Cost 1 vext2 RHS, LHS
+ 1571215237U, // <6,7,u,2>: Cost 2 vext2 RHS, <u,2,3,0>
+ 1571215292U, // <6,7,u,3>: Cost 2 vext2 RHS, <u,3,0,1>
+ 1571215423U, // <6,7,u,4>: Cost 2 vext2 RHS, <u,4,5,6>
+ 497473690U, // <6,7,u,5>: Cost 1 vext2 RHS, RHS
+ 1571215568U, // <6,7,u,6>: Cost 2 vext2 RHS, <u,6,3,7>
+ 1573206272U, // <6,7,u,7>: Cost 2 vext2 RHS, <u,7,0,1>
+ 497473893U, // <6,7,u,u>: Cost 1 vext2 RHS, LHS
+ 1571217408U, // <6,u,0,0>: Cost 2 vext2 RHS, <0,0,0,0>
+ 497475686U, // <6,u,0,1>: Cost 1 vext2 RHS, LHS
+ 1571217572U, // <6,u,0,2>: Cost 2 vext2 RHS, <0,2,0,2>
+ 2689865445U, // <6,u,0,3>: Cost 3 vext3 <0,u,2,6>, <u,0,3,2>
+ 1571217746U, // <6,u,0,4>: Cost 2 vext2 RHS, <0,4,1,5>
+ 1510830187U, // <6,u,0,5>: Cost 2 vext1 <5,6,u,0>, <5,6,u,0>
+ 2644959734U, // <6,u,0,6>: Cost 3 vext2 RHS, <0,6,1,7>
+ 1193130221U, // <6,u,0,7>: Cost 2 vrev <u,6,7,0>
+ 497476253U, // <6,u,0,u>: Cost 1 vext2 RHS, LHS
+ 1571218166U, // <6,u,1,0>: Cost 2 vext2 RHS, <1,0,3,2>
+ 1571218228U, // <6,u,1,1>: Cost 2 vext2 RHS, <1,1,1,1>
+ 1612289838U, // <6,u,1,2>: Cost 2 vext3 <0,2,4,6>, LHS
+ 1571218392U, // <6,u,1,3>: Cost 2 vext2 RHS, <1,3,1,3>
+ 2566663478U, // <6,u,1,4>: Cost 3 vext1 <2,6,u,1>, RHS
+ 1571218576U, // <6,u,1,5>: Cost 2 vext2 RHS, <1,5,3,7>
+ 2644960463U, // <6,u,1,6>: Cost 3 vext2 RHS, <1,6,1,7>
+ 2717439835U, // <6,u,1,7>: Cost 3 vext3 <5,4,7,6>, <u,1,7,3>
+ 1612289892U, // <6,u,1,u>: Cost 2 vext3 <0,2,4,6>, LHS
+ 1504870502U, // <6,u,2,0>: Cost 2 vext1 <4,6,u,2>, LHS
+ 2644960774U, // <6,u,2,1>: Cost 3 vext2 RHS, <2,1,0,3>
+ 1571219048U, // <6,u,2,2>: Cost 2 vext2 RHS, <2,2,2,2>
+ 1571219110U, // <6,u,2,3>: Cost 2 vext2 RHS, <2,3,0,1>
+ 1504873782U, // <6,u,2,4>: Cost 2 vext1 <4,6,u,2>, RHS
+ 2633017221U, // <6,u,2,5>: Cost 3 vext2 <2,5,6,u>, <2,5,6,u>
+ 1571219386U, // <6,u,2,6>: Cost 2 vext2 RHS, <2,6,3,7>
+ 2712573868U, // <6,u,2,7>: Cost 3 vext3 <4,6,4,6>, <u,2,7,3>
+ 1571219515U, // <6,u,2,u>: Cost 2 vext2 RHS, <2,u,0,1>
+ 1571219606U, // <6,u,3,0>: Cost 2 vext2 RHS, <3,0,1,2>
+ 2644961503U, // <6,u,3,1>: Cost 3 vext2 RHS, <3,1,0,3>
+ 2566678499U, // <6,u,3,2>: Cost 3 vext1 <2,6,u,3>, <2,6,u,3>
+ 1571219868U, // <6,u,3,3>: Cost 2 vext2 RHS, <3,3,3,3>
+ 1571219970U, // <6,u,3,4>: Cost 2 vext2 RHS, <3,4,5,6>
+ 2689865711U, // <6,u,3,5>: Cost 3 vext3 <0,u,2,6>, <u,3,5,7>
+ 2708002806U, // <6,u,3,6>: Cost 3 vext3 <3,u,5,6>, <u,3,6,5>
+ 2644961987U, // <6,u,3,7>: Cost 3 vext2 RHS, <3,7,0,1>
+ 1571220254U, // <6,u,3,u>: Cost 2 vext2 RHS, <3,u,1,2>
+ 1571220370U, // <6,u,4,0>: Cost 2 vext2 RHS, <4,0,5,1>
+ 2644962250U, // <6,u,4,1>: Cost 3 vext2 RHS, <4,1,2,3>
+ 1661245476U, // <6,u,4,2>: Cost 2 vext3 <u,4,2,6>, <u,4,2,6>
+ 2686031917U, // <6,u,4,3>: Cost 3 vext3 <0,2,4,6>, <u,4,3,6>
+ 1571220688U, // <6,u,4,4>: Cost 2 vext2 RHS, <4,4,4,4>
+ 497478967U, // <6,u,4,5>: Cost 1 vext2 RHS, RHS
+ 1571220852U, // <6,u,4,6>: Cost 2 vext2 RHS, <4,6,4,6>
+ 1661614161U, // <6,u,4,7>: Cost 2 vext3 <u,4,7,6>, <u,4,7,6>
+ 497479209U, // <6,u,4,u>: Cost 1 vext2 RHS, RHS
+ 2566692966U, // <6,u,5,0>: Cost 3 vext1 <2,6,u,5>, LHS
+ 1571221200U, // <6,u,5,1>: Cost 2 vext2 RHS, <5,1,7,3>
+ 2566694885U, // <6,u,5,2>: Cost 3 vext1 <2,6,u,5>, <2,6,u,5>
+ 2689865855U, // <6,u,5,3>: Cost 3 vext3 <0,u,2,6>, <u,5,3,7>
+ 1571221446U, // <6,u,5,4>: Cost 2 vext2 RHS, <5,4,7,6>
+ 1571221508U, // <6,u,5,5>: Cost 2 vext2 RHS, <5,5,5,5>
+ 1612290202U, // <6,u,5,6>: Cost 2 vext3 <0,2,4,6>, RHS
+ 1571221672U, // <6,u,5,7>: Cost 2 vext2 RHS, <5,7,5,7>
+ 1612290220U, // <6,u,5,u>: Cost 2 vext3 <0,2,4,6>, RHS
+ 1504903270U, // <6,u,6,0>: Cost 2 vext1 <4,6,u,6>, LHS
+ 2644963752U, // <6,u,6,1>: Cost 3 vext2 RHS, <6,1,7,2>
+ 1571222010U, // <6,u,6,2>: Cost 2 vext2 RHS, <6,2,7,3>
+ 2686032080U, // <6,u,6,3>: Cost 3 vext3 <0,2,4,6>, <u,6,3,7>
+ 1504906550U, // <6,u,6,4>: Cost 2 vext1 <4,6,u,6>, RHS
+ 2644964079U, // <6,u,6,5>: Cost 3 vext2 RHS, <6,5,7,5>
+ 296144182U, // <6,u,6,6>: Cost 1 vdup2 RHS
+ 1571222350U, // <6,u,6,7>: Cost 2 vext2 RHS, <6,7,0,1>
+ 296144182U, // <6,u,6,u>: Cost 1 vdup2 RHS
+ 1492967526U, // <6,u,7,0>: Cost 2 vext1 <2,6,u,7>, LHS
+ 2560738574U, // <6,u,7,1>: Cost 3 vext1 <1,6,u,7>, <1,6,u,7>
+ 1492969447U, // <6,u,7,2>: Cost 2 vext1 <2,6,u,7>, <2,6,u,7>
+ 1906753692U, // <6,u,7,3>: Cost 2 vzipr RHS, LHS
+ 1492970806U, // <6,u,7,4>: Cost 2 vext1 <2,6,u,7>, RHS
+ 2980495761U, // <6,u,7,5>: Cost 3 vzipr RHS, <0,4,u,5>
+ 1516860235U, // <6,u,7,6>: Cost 2 vext1 <6,6,u,7>, <6,6,u,7>
+ 1906756936U, // <6,u,7,7>: Cost 2 vzipr RHS, RHS
+ 1492973358U, // <6,u,7,u>: Cost 2 vext1 <2,6,u,7>, LHS
+ 1492975718U, // <6,u,u,0>: Cost 2 vext1 <2,6,u,u>, LHS
+ 497481518U, // <6,u,u,1>: Cost 1 vext2 RHS, LHS
+ 1612290405U, // <6,u,u,2>: Cost 2 vext3 <0,2,4,6>, LHS
+ 1571223484U, // <6,u,u,3>: Cost 2 vext2 RHS, <u,3,0,1>
+ 1492978998U, // <6,u,u,4>: Cost 2 vext1 <2,6,u,u>, RHS
+ 497481882U, // <6,u,u,5>: Cost 1 vext2 RHS, RHS
+ 296144182U, // <6,u,u,6>: Cost 1 vdup2 RHS
+ 1906765128U, // <6,u,u,7>: Cost 2 vzipr RHS, RHS
+ 497482085U, // <6,u,u,u>: Cost 1 vext2 RHS, LHS
+ 1638318080U, // <7,0,0,0>: Cost 2 vext3 RHS, <0,0,0,0>
+ 1638318090U, // <7,0,0,1>: Cost 2 vext3 RHS, <0,0,1,1>
+ 1638318100U, // <7,0,0,2>: Cost 2 vext3 RHS, <0,0,2,2>
+ 3646442178U, // <7,0,0,3>: Cost 4 vext1 <3,7,0,0>, <3,7,0,0>
+ 2712059941U, // <7,0,0,4>: Cost 3 vext3 RHS, <0,0,4,1>
+ 2651603364U, // <7,0,0,5>: Cost 3 vext2 <5,6,7,0>, <0,5,1,6>
+ 2590618445U, // <7,0,0,6>: Cost 3 vext1 <6,7,0,0>, <6,7,0,0>
+ 3785801798U, // <7,0,0,7>: Cost 4 vext3 RHS, <0,0,7,7>
+ 1638318153U, // <7,0,0,u>: Cost 2 vext3 RHS, <0,0,u,1>
+ 1516879974U, // <7,0,1,0>: Cost 2 vext1 <6,7,0,1>, LHS
+ 2693922911U, // <7,0,1,1>: Cost 3 vext3 <1,5,3,7>, <0,1,1,5>
+ 564576358U, // <7,0,1,2>: Cost 1 vext3 RHS, LHS
+ 2638996480U, // <7,0,1,3>: Cost 3 vext2 <3,5,7,0>, <1,3,5,7>
+ 1516883254U, // <7,0,1,4>: Cost 2 vext1 <6,7,0,1>, RHS
+ 2649613456U, // <7,0,1,5>: Cost 3 vext2 <5,3,7,0>, <1,5,3,7>
+ 1516884814U, // <7,0,1,6>: Cost 2 vext1 <6,7,0,1>, <6,7,0,1>
+ 2590626808U, // <7,0,1,7>: Cost 3 vext1 <6,7,0,1>, <7,0,1,0>
+ 564576412U, // <7,0,1,u>: Cost 1 vext3 RHS, LHS
+ 1638318244U, // <7,0,2,0>: Cost 2 vext3 RHS, <0,2,0,2>
+ 2692743344U, // <7,0,2,1>: Cost 3 vext3 <1,3,5,7>, <0,2,1,5>
+ 2712060084U, // <7,0,2,2>: Cost 3 vext3 RHS, <0,2,2,0>
+ 2712060094U, // <7,0,2,3>: Cost 3 vext3 RHS, <0,2,3,1>
+ 1638318284U, // <7,0,2,4>: Cost 2 vext3 RHS, <0,2,4,6>
+ 2712060118U, // <7,0,2,5>: Cost 3 vext3 RHS, <0,2,5,7>
+ 2651604922U, // <7,0,2,6>: Cost 3 vext2 <5,6,7,0>, <2,6,3,7>
+ 2686255336U, // <7,0,2,7>: Cost 3 vext3 <0,2,7,7>, <0,2,7,7>
+ 1638318316U, // <7,0,2,u>: Cost 2 vext3 RHS, <0,2,u,2>
+ 2651605142U, // <7,0,3,0>: Cost 3 vext2 <5,6,7,0>, <3,0,1,2>
+ 2712060156U, // <7,0,3,1>: Cost 3 vext3 RHS, <0,3,1,0>
+ 2712060165U, // <7,0,3,2>: Cost 3 vext3 RHS, <0,3,2,0>
+ 2651605404U, // <7,0,3,3>: Cost 3 vext2 <5,6,7,0>, <3,3,3,3>
+ 2651605506U, // <7,0,3,4>: Cost 3 vext2 <5,6,7,0>, <3,4,5,6>
+ 2638998111U, // <7,0,3,5>: Cost 3 vext2 <3,5,7,0>, <3,5,7,0>
+ 2639661744U, // <7,0,3,6>: Cost 3 vext2 <3,6,7,0>, <3,6,7,0>
+ 3712740068U, // <7,0,3,7>: Cost 4 vext2 <3,5,7,0>, <3,7,3,7>
+ 2640989010U, // <7,0,3,u>: Cost 3 vext2 <3,u,7,0>, <3,u,7,0>
+ 2712060232U, // <7,0,4,0>: Cost 3 vext3 RHS, <0,4,0,4>
+ 1638318418U, // <7,0,4,1>: Cost 2 vext3 RHS, <0,4,1,5>
+ 1638318428U, // <7,0,4,2>: Cost 2 vext3 RHS, <0,4,2,6>
+ 3646474950U, // <7,0,4,3>: Cost 4 vext1 <3,7,0,4>, <3,7,0,4>
+ 2712060270U, // <7,0,4,4>: Cost 3 vext3 RHS, <0,4,4,6>
+ 1577864502U, // <7,0,4,5>: Cost 2 vext2 <5,6,7,0>, RHS
+ 2651606388U, // <7,0,4,6>: Cost 3 vext2 <5,6,7,0>, <4,6,4,6>
+ 3787792776U, // <7,0,4,7>: Cost 4 vext3 RHS, <0,4,7,5>
+ 1638318481U, // <7,0,4,u>: Cost 2 vext3 RHS, <0,4,u,5>
+ 2590654566U, // <7,0,5,0>: Cost 3 vext1 <6,7,0,5>, LHS
+ 2651606736U, // <7,0,5,1>: Cost 3 vext2 <5,6,7,0>, <5,1,7,3>
+ 2712060334U, // <7,0,5,2>: Cost 3 vext3 RHS, <0,5,2,7>
+ 2649616239U, // <7,0,5,3>: Cost 3 vext2 <5,3,7,0>, <5,3,7,0>
+ 2651606982U, // <7,0,5,4>: Cost 3 vext2 <5,6,7,0>, <5,4,7,6>
+ 2651607044U, // <7,0,5,5>: Cost 3 vext2 <5,6,7,0>, <5,5,5,5>
+ 1577865314U, // <7,0,5,6>: Cost 2 vext2 <5,6,7,0>, <5,6,7,0>
+ 2651607208U, // <7,0,5,7>: Cost 3 vext2 <5,6,7,0>, <5,7,5,7>
+ 1579192580U, // <7,0,5,u>: Cost 2 vext2 <5,u,7,0>, <5,u,7,0>
+ 2688393709U, // <7,0,6,0>: Cost 3 vext3 <0,6,0,7>, <0,6,0,7>
+ 2712060406U, // <7,0,6,1>: Cost 3 vext3 RHS, <0,6,1,7>
+ 2688541183U, // <7,0,6,2>: Cost 3 vext3 <0,6,2,7>, <0,6,2,7>
+ 2655588936U, // <7,0,6,3>: Cost 3 vext2 <6,3,7,0>, <6,3,7,0>
+ 3762430481U, // <7,0,6,4>: Cost 4 vext3 <0,6,4,7>, <0,6,4,7>
+ 2651607730U, // <7,0,6,5>: Cost 3 vext2 <5,6,7,0>, <6,5,0,7>
+ 2651607864U, // <7,0,6,6>: Cost 3 vext2 <5,6,7,0>, <6,6,6,6>
+ 2651607886U, // <7,0,6,7>: Cost 3 vext2 <5,6,7,0>, <6,7,0,1>
+ 2688983605U, // <7,0,6,u>: Cost 3 vext3 <0,6,u,7>, <0,6,u,7>
+ 2651608058U, // <7,0,7,0>: Cost 3 vext2 <5,6,7,0>, <7,0,1,2>
+ 2932703334U, // <7,0,7,1>: Cost 3 vzipl <7,7,7,7>, LHS
+ 3066921062U, // <7,0,7,2>: Cost 3 vtrnl <7,7,7,7>, LHS
+ 3712742678U, // <7,0,7,3>: Cost 4 vext2 <3,5,7,0>, <7,3,5,7>
+ 2651608422U, // <7,0,7,4>: Cost 3 vext2 <5,6,7,0>, <7,4,5,6>
+ 2651608513U, // <7,0,7,5>: Cost 3 vext2 <5,6,7,0>, <7,5,6,7>
+ 2663552532U, // <7,0,7,6>: Cost 3 vext2 <7,6,7,0>, <7,6,7,0>
+ 2651608684U, // <7,0,7,7>: Cost 3 vext2 <5,6,7,0>, <7,7,7,7>
+ 2651608706U, // <7,0,7,u>: Cost 3 vext2 <5,6,7,0>, <7,u,1,2>
+ 1638318730U, // <7,0,u,0>: Cost 2 vext3 RHS, <0,u,0,2>
+ 1638318738U, // <7,0,u,1>: Cost 2 vext3 RHS, <0,u,1,1>
+ 564576925U, // <7,0,u,2>: Cost 1 vext3 RHS, LHS
+ 2572765898U, // <7,0,u,3>: Cost 3 vext1 <3,7,0,u>, <3,7,0,u>
+ 1638318770U, // <7,0,u,4>: Cost 2 vext3 RHS, <0,u,4,6>
+ 1577867418U, // <7,0,u,5>: Cost 2 vext2 <5,6,7,0>, RHS
+ 1516942165U, // <7,0,u,6>: Cost 2 vext1 <6,7,0,u>, <6,7,0,u>
+ 2651609344U, // <7,0,u,7>: Cost 3 vext2 <5,6,7,0>, <u,7,0,1>
+ 564576979U, // <7,0,u,u>: Cost 1 vext3 RHS, LHS
+ 2590687334U, // <7,1,0,0>: Cost 3 vext1 <6,7,1,0>, LHS
+ 2639003750U, // <7,1,0,1>: Cost 3 vext2 <3,5,7,1>, LHS
+ 2793357414U, // <7,1,0,2>: Cost 3 vuzpl <7,0,1,2>, LHS
+ 1638318838U, // <7,1,0,3>: Cost 2 vext3 RHS, <1,0,3,2>
+ 2590690614U, // <7,1,0,4>: Cost 3 vext1 <6,7,1,0>, RHS
+ 2712060679U, // <7,1,0,5>: Cost 3 vext3 RHS, <1,0,5,1>
+ 2590692182U, // <7,1,0,6>: Cost 3 vext1 <6,7,1,0>, <6,7,1,0>
+ 3785802521U, // <7,1,0,7>: Cost 4 vext3 RHS, <1,0,7,1>
+ 1638318883U, // <7,1,0,u>: Cost 2 vext3 RHS, <1,0,u,2>
+ 2712060715U, // <7,1,1,0>: Cost 3 vext3 RHS, <1,1,0,1>
+ 1638318900U, // <7,1,1,1>: Cost 2 vext3 RHS, <1,1,1,1>
+ 3774300994U, // <7,1,1,2>: Cost 4 vext3 <2,6,3,7>, <1,1,2,6>
+ 1638318920U, // <7,1,1,3>: Cost 2 vext3 RHS, <1,1,3,3>
+ 2712060755U, // <7,1,1,4>: Cost 3 vext3 RHS, <1,1,4,5>
+ 2691416926U, // <7,1,1,5>: Cost 3 vext3 <1,1,5,7>, <1,1,5,7>
+ 2590700375U, // <7,1,1,6>: Cost 3 vext1 <6,7,1,1>, <6,7,1,1>
+ 3765158766U, // <7,1,1,7>: Cost 4 vext3 <1,1,5,7>, <1,1,7,5>
+ 1638318965U, // <7,1,1,u>: Cost 2 vext3 RHS, <1,1,u,3>
+ 2712060796U, // <7,1,2,0>: Cost 3 vext3 RHS, <1,2,0,1>
+ 2712060807U, // <7,1,2,1>: Cost 3 vext3 RHS, <1,2,1,3>
+ 3712747112U, // <7,1,2,2>: Cost 4 vext2 <3,5,7,1>, <2,2,2,2>
+ 1638318998U, // <7,1,2,3>: Cost 2 vext3 RHS, <1,2,3,0>
+ 2712060836U, // <7,1,2,4>: Cost 3 vext3 RHS, <1,2,4,5>
+ 2712060843U, // <7,1,2,5>: Cost 3 vext3 RHS, <1,2,5,3>
+ 2590708568U, // <7,1,2,6>: Cost 3 vext1 <6,7,1,2>, <6,7,1,2>
+ 2735948730U, // <7,1,2,7>: Cost 3 vext3 RHS, <1,2,7,0>
+ 1638319043U, // <7,1,2,u>: Cost 2 vext3 RHS, <1,2,u,0>
+ 2712060876U, // <7,1,3,0>: Cost 3 vext3 RHS, <1,3,0,0>
+ 1638319064U, // <7,1,3,1>: Cost 2 vext3 RHS, <1,3,1,3>
+ 2712060894U, // <7,1,3,2>: Cost 3 vext3 RHS, <1,3,2,0>
+ 2692596718U, // <7,1,3,3>: Cost 3 vext3 <1,3,3,7>, <1,3,3,7>
+ 2712060917U, // <7,1,3,4>: Cost 3 vext3 RHS, <1,3,4,5>
+ 1619002368U, // <7,1,3,5>: Cost 2 vext3 <1,3,5,7>, <1,3,5,7>
+ 2692817929U, // <7,1,3,6>: Cost 3 vext3 <1,3,6,7>, <1,3,6,7>
+ 2735948814U, // <7,1,3,7>: Cost 3 vext3 RHS, <1,3,7,3>
+ 1619223579U, // <7,1,3,u>: Cost 2 vext3 <1,3,u,7>, <1,3,u,7>
+ 2712060962U, // <7,1,4,0>: Cost 3 vext3 RHS, <1,4,0,5>
+ 2712060971U, // <7,1,4,1>: Cost 3 vext3 RHS, <1,4,1,5>
+ 2712060980U, // <7,1,4,2>: Cost 3 vext3 RHS, <1,4,2,5>
+ 2712060989U, // <7,1,4,3>: Cost 3 vext3 RHS, <1,4,3,5>
+ 3785802822U, // <7,1,4,4>: Cost 4 vext3 RHS, <1,4,4,5>
+ 2639007030U, // <7,1,4,5>: Cost 3 vext2 <3,5,7,1>, RHS
+ 2645642634U, // <7,1,4,6>: Cost 3 vext2 <4,6,7,1>, <4,6,7,1>
+ 3719384520U, // <7,1,4,7>: Cost 4 vext2 <4,6,7,1>, <4,7,5,0>
+ 2639007273U, // <7,1,4,u>: Cost 3 vext2 <3,5,7,1>, RHS
+ 2572812390U, // <7,1,5,0>: Cost 3 vext1 <3,7,1,5>, LHS
+ 2693776510U, // <7,1,5,1>: Cost 3 vext3 <1,5,1,7>, <1,5,1,7>
+ 3774301318U, // <7,1,5,2>: Cost 4 vext3 <2,6,3,7>, <1,5,2,6>
+ 1620182160U, // <7,1,5,3>: Cost 2 vext3 <1,5,3,7>, <1,5,3,7>
+ 2572815670U, // <7,1,5,4>: Cost 3 vext1 <3,7,1,5>, RHS
+ 3766486178U, // <7,1,5,5>: Cost 4 vext3 <1,3,5,7>, <1,5,5,7>
+ 2651615331U, // <7,1,5,6>: Cost 3 vext2 <5,6,7,1>, <5,6,7,1>
+ 2652278964U, // <7,1,5,7>: Cost 3 vext2 <5,7,7,1>, <5,7,7,1>
+ 1620550845U, // <7,1,5,u>: Cost 2 vext3 <1,5,u,7>, <1,5,u,7>
+ 3768108230U, // <7,1,6,0>: Cost 4 vext3 <1,6,0,7>, <1,6,0,7>
+ 2694440143U, // <7,1,6,1>: Cost 3 vext3 <1,6,1,7>, <1,6,1,7>
+ 2712061144U, // <7,1,6,2>: Cost 3 vext3 RHS, <1,6,2,7>
+ 2694587617U, // <7,1,6,3>: Cost 3 vext3 <1,6,3,7>, <1,6,3,7>
+ 3768403178U, // <7,1,6,4>: Cost 4 vext3 <1,6,4,7>, <1,6,4,7>
+ 2694735091U, // <7,1,6,5>: Cost 3 vext3 <1,6,5,7>, <1,6,5,7>
+ 3768550652U, // <7,1,6,6>: Cost 4 vext3 <1,6,6,7>, <1,6,6,7>
+ 2652279630U, // <7,1,6,7>: Cost 3 vext2 <5,7,7,1>, <6,7,0,1>
+ 2694956302U, // <7,1,6,u>: Cost 3 vext3 <1,6,u,7>, <1,6,u,7>
+ 2645644282U, // <7,1,7,0>: Cost 3 vext2 <4,6,7,1>, <7,0,1,2>
+ 2859062094U, // <7,1,7,1>: Cost 3 vuzpr <6,7,0,1>, <6,7,0,1>
+ 3779462437U, // <7,1,7,2>: Cost 4 vext3 <3,5,1,7>, <1,7,2,3>
+ 3121938534U, // <7,1,7,3>: Cost 3 vtrnr <5,7,5,7>, LHS
+ 2554916150U, // <7,1,7,4>: Cost 3 vext1 <0,7,1,7>, RHS
+ 3769140548U, // <7,1,7,5>: Cost 4 vext3 <1,7,5,7>, <1,7,5,7>
+ 3726022164U, // <7,1,7,6>: Cost 4 vext2 <5,7,7,1>, <7,6,7,0>
+ 2554918508U, // <7,1,7,7>: Cost 3 vext1 <0,7,1,7>, <7,7,7,7>
+ 3121938539U, // <7,1,7,u>: Cost 3 vtrnr <5,7,5,7>, LHS
+ 2572836966U, // <7,1,u,0>: Cost 3 vext1 <3,7,1,u>, LHS
+ 1638319469U, // <7,1,u,1>: Cost 2 vext3 RHS, <1,u,1,3>
+ 2712061299U, // <7,1,u,2>: Cost 3 vext3 RHS, <1,u,2,0>
+ 1622173059U, // <7,1,u,3>: Cost 2 vext3 <1,u,3,7>, <1,u,3,7>
+ 2572840246U, // <7,1,u,4>: Cost 3 vext1 <3,7,1,u>, RHS
+ 1622320533U, // <7,1,u,5>: Cost 2 vext3 <1,u,5,7>, <1,u,5,7>
+ 2696136094U, // <7,1,u,6>: Cost 3 vext3 <1,u,6,7>, <1,u,6,7>
+ 2859060777U, // <7,1,u,7>: Cost 3 vuzpr <6,7,0,1>, RHS
+ 1622541744U, // <7,1,u,u>: Cost 2 vext3 <1,u,u,7>, <1,u,u,7>
+ 2712061364U, // <7,2,0,0>: Cost 3 vext3 RHS, <2,0,0,2>
+ 2712061373U, // <7,2,0,1>: Cost 3 vext3 RHS, <2,0,1,2>
+ 2712061380U, // <7,2,0,2>: Cost 3 vext3 RHS, <2,0,2,0>
+ 2712061389U, // <7,2,0,3>: Cost 3 vext3 RHS, <2,0,3,0>
+ 2712061404U, // <7,2,0,4>: Cost 3 vext3 RHS, <2,0,4,6>
+ 2696725990U, // <7,2,0,5>: Cost 3 vext3 <2,0,5,7>, <2,0,5,7>
+ 2712061417U, // <7,2,0,6>: Cost 3 vext3 RHS, <2,0,6,1>
+ 3785803251U, // <7,2,0,7>: Cost 4 vext3 RHS, <2,0,7,2>
+ 2696947201U, // <7,2,0,u>: Cost 3 vext3 <2,0,u,7>, <2,0,u,7>
+ 2712061446U, // <7,2,1,0>: Cost 3 vext3 RHS, <2,1,0,3>
+ 3785803276U, // <7,2,1,1>: Cost 4 vext3 RHS, <2,1,1,0>
+ 3785803285U, // <7,2,1,2>: Cost 4 vext3 RHS, <2,1,2,0>
+ 2712061471U, // <7,2,1,3>: Cost 3 vext3 RHS, <2,1,3,1>
+ 2712061482U, // <7,2,1,4>: Cost 3 vext3 RHS, <2,1,4,3>
+ 3766486576U, // <7,2,1,5>: Cost 4 vext3 <1,3,5,7>, <2,1,5,0>
+ 2712061500U, // <7,2,1,6>: Cost 3 vext3 RHS, <2,1,6,3>
+ 2602718850U, // <7,2,1,7>: Cost 3 vext1 <u,7,2,1>, <7,u,1,2>
+ 2712061516U, // <7,2,1,u>: Cost 3 vext3 RHS, <2,1,u,1>
+ 2712061525U, // <7,2,2,0>: Cost 3 vext3 RHS, <2,2,0,1>
+ 2712061536U, // <7,2,2,1>: Cost 3 vext3 RHS, <2,2,1,3>
+ 1638319720U, // <7,2,2,2>: Cost 2 vext3 RHS, <2,2,2,2>
+ 1638319730U, // <7,2,2,3>: Cost 2 vext3 RHS, <2,2,3,3>
+ 2712061565U, // <7,2,2,4>: Cost 3 vext3 RHS, <2,2,4,5>
+ 2698053256U, // <7,2,2,5>: Cost 3 vext3 <2,2,5,7>, <2,2,5,7>
+ 2712061584U, // <7,2,2,6>: Cost 3 vext3 RHS, <2,2,6,6>
+ 3771795096U, // <7,2,2,7>: Cost 4 vext3 <2,2,5,7>, <2,2,7,5>
+ 1638319775U, // <7,2,2,u>: Cost 2 vext3 RHS, <2,2,u,3>
+ 1638319782U, // <7,2,3,0>: Cost 2 vext3 RHS, <2,3,0,1>
+ 2693924531U, // <7,2,3,1>: Cost 3 vext3 <1,5,3,7>, <2,3,1,5>
+ 2700560061U, // <7,2,3,2>: Cost 3 vext3 <2,6,3,7>, <2,3,2,6>
+ 2693924551U, // <7,2,3,3>: Cost 3 vext3 <1,5,3,7>, <2,3,3,7>
+ 1638319822U, // <7,2,3,4>: Cost 2 vext3 RHS, <2,3,4,5>
+ 2698716889U, // <7,2,3,5>: Cost 3 vext3 <2,3,5,7>, <2,3,5,7>
+ 2712061665U, // <7,2,3,6>: Cost 3 vext3 RHS, <2,3,6,6>
+ 2735949540U, // <7,2,3,7>: Cost 3 vext3 RHS, <2,3,7,0>
+ 1638319854U, // <7,2,3,u>: Cost 2 vext3 RHS, <2,3,u,1>
+ 2712061692U, // <7,2,4,0>: Cost 3 vext3 RHS, <2,4,0,6>
+ 2712061698U, // <7,2,4,1>: Cost 3 vext3 RHS, <2,4,1,3>
+ 2712061708U, // <7,2,4,2>: Cost 3 vext3 RHS, <2,4,2,4>
+ 2712061718U, // <7,2,4,3>: Cost 3 vext3 RHS, <2,4,3,5>
+ 2712061728U, // <7,2,4,4>: Cost 3 vext3 RHS, <2,4,4,6>
+ 2699380522U, // <7,2,4,5>: Cost 3 vext3 <2,4,5,7>, <2,4,5,7>
+ 2712061740U, // <7,2,4,6>: Cost 3 vext3 RHS, <2,4,6,0>
+ 3809691445U, // <7,2,4,7>: Cost 4 vext3 RHS, <2,4,7,0>
+ 2699601733U, // <7,2,4,u>: Cost 3 vext3 <2,4,u,7>, <2,4,u,7>
+ 2699675470U, // <7,2,5,0>: Cost 3 vext3 <2,5,0,7>, <2,5,0,7>
+ 3766486867U, // <7,2,5,1>: Cost 4 vext3 <1,3,5,7>, <2,5,1,3>
+ 2699822944U, // <7,2,5,2>: Cost 3 vext3 <2,5,2,7>, <2,5,2,7>
+ 2692745065U, // <7,2,5,3>: Cost 3 vext3 <1,3,5,7>, <2,5,3,7>
+ 2699970418U, // <7,2,5,4>: Cost 3 vext3 <2,5,4,7>, <2,5,4,7>
+ 3766486907U, // <7,2,5,5>: Cost 4 vext3 <1,3,5,7>, <2,5,5,7>
+ 2700117892U, // <7,2,5,6>: Cost 3 vext3 <2,5,6,7>, <2,5,6,7>
+ 3771795334U, // <7,2,5,7>: Cost 4 vext3 <2,2,5,7>, <2,5,7,0>
+ 2692745110U, // <7,2,5,u>: Cost 3 vext3 <1,3,5,7>, <2,5,u,7>
+ 2572894310U, // <7,2,6,0>: Cost 3 vext1 <3,7,2,6>, LHS
+ 2712061860U, // <7,2,6,1>: Cost 3 vext3 RHS, <2,6,1,3>
+ 2700486577U, // <7,2,6,2>: Cost 3 vext3 <2,6,2,7>, <2,6,2,7>
+ 1626818490U, // <7,2,6,3>: Cost 2 vext3 <2,6,3,7>, <2,6,3,7>
+ 2572897590U, // <7,2,6,4>: Cost 3 vext1 <3,7,2,6>, RHS
+ 2700707788U, // <7,2,6,5>: Cost 3 vext3 <2,6,5,7>, <2,6,5,7>
+ 2700781525U, // <7,2,6,6>: Cost 3 vext3 <2,6,6,7>, <2,6,6,7>
+ 3774597086U, // <7,2,6,7>: Cost 4 vext3 <2,6,7,7>, <2,6,7,7>
+ 1627187175U, // <7,2,6,u>: Cost 2 vext3 <2,6,u,7>, <2,6,u,7>
+ 2735949802U, // <7,2,7,0>: Cost 3 vext3 RHS, <2,7,0,1>
+ 3780200434U, // <7,2,7,1>: Cost 4 vext3 <3,6,2,7>, <2,7,1,0>
+ 3773564928U, // <7,2,7,2>: Cost 4 vext3 <2,5,2,7>, <2,7,2,5>
+ 2986541158U, // <7,2,7,3>: Cost 3 vzipr <5,5,7,7>, LHS
+ 2554989878U, // <7,2,7,4>: Cost 3 vext1 <0,7,2,7>, RHS
+ 3775113245U, // <7,2,7,5>: Cost 4 vext3 <2,7,5,7>, <2,7,5,7>
+ 4060283228U, // <7,2,7,6>: Cost 4 vzipr <5,5,7,7>, <0,4,2,6>
+ 2554992236U, // <7,2,7,7>: Cost 3 vext1 <0,7,2,7>, <7,7,7,7>
+ 2986541163U, // <7,2,7,u>: Cost 3 vzipr <5,5,7,7>, LHS
+ 1638320187U, // <7,2,u,0>: Cost 2 vext3 RHS, <2,u,0,1>
+ 2693924936U, // <7,2,u,1>: Cost 3 vext3 <1,5,3,7>, <2,u,1,5>
+ 1638319720U, // <7,2,u,2>: Cost 2 vext3 RHS, <2,2,2,2>
+ 1628145756U, // <7,2,u,3>: Cost 2 vext3 <2,u,3,7>, <2,u,3,7>
+ 1638320227U, // <7,2,u,4>: Cost 2 vext3 RHS, <2,u,4,5>
+ 2702035054U, // <7,2,u,5>: Cost 3 vext3 <2,u,5,7>, <2,u,5,7>
+ 2702108791U, // <7,2,u,6>: Cost 3 vext3 <2,u,6,7>, <2,u,6,7>
+ 2735949945U, // <7,2,u,7>: Cost 3 vext3 RHS, <2,u,7,0>
+ 1628514441U, // <7,2,u,u>: Cost 2 vext3 <2,u,u,7>, <2,u,u,7>
+ 2712062091U, // <7,3,0,0>: Cost 3 vext3 RHS, <3,0,0,0>
+ 1638320278U, // <7,3,0,1>: Cost 2 vext3 RHS, <3,0,1,2>
+ 2712062109U, // <7,3,0,2>: Cost 3 vext3 RHS, <3,0,2,0>
+ 2590836886U, // <7,3,0,3>: Cost 3 vext1 <6,7,3,0>, <3,0,1,2>
+ 2712062128U, // <7,3,0,4>: Cost 3 vext3 RHS, <3,0,4,1>
+ 2712062138U, // <7,3,0,5>: Cost 3 vext3 RHS, <3,0,5,2>
+ 2590839656U, // <7,3,0,6>: Cost 3 vext1 <6,7,3,0>, <6,7,3,0>
+ 3311414017U, // <7,3,0,7>: Cost 4 vrev <3,7,7,0>
+ 1638320341U, // <7,3,0,u>: Cost 2 vext3 RHS, <3,0,u,2>
+ 2237164227U, // <7,3,1,0>: Cost 3 vrev <3,7,0,1>
+ 2712062182U, // <7,3,1,1>: Cost 3 vext3 RHS, <3,1,1,1>
+ 2712062193U, // <7,3,1,2>: Cost 3 vext3 RHS, <3,1,2,3>
+ 2692745468U, // <7,3,1,3>: Cost 3 vext3 <1,3,5,7>, <3,1,3,5>
+ 2712062214U, // <7,3,1,4>: Cost 3 vext3 RHS, <3,1,4,6>
+ 2693925132U, // <7,3,1,5>: Cost 3 vext3 <1,5,3,7>, <3,1,5,3>
+ 3768183059U, // <7,3,1,6>: Cost 4 vext3 <1,6,1,7>, <3,1,6,1>
+ 2692745504U, // <7,3,1,7>: Cost 3 vext3 <1,3,5,7>, <3,1,7,5>
+ 2696063273U, // <7,3,1,u>: Cost 3 vext3 <1,u,5,7>, <3,1,u,5>
+ 2712062254U, // <7,3,2,0>: Cost 3 vext3 RHS, <3,2,0,1>
+ 2712062262U, // <7,3,2,1>: Cost 3 vext3 RHS, <3,2,1,0>
+ 2712062273U, // <7,3,2,2>: Cost 3 vext3 RHS, <3,2,2,2>
+ 2712062280U, // <7,3,2,3>: Cost 3 vext3 RHS, <3,2,3,0>
+ 2712062294U, // <7,3,2,4>: Cost 3 vext3 RHS, <3,2,4,5>
+ 2712062302U, // <7,3,2,5>: Cost 3 vext3 RHS, <3,2,5,4>
+ 2700560742U, // <7,3,2,6>: Cost 3 vext3 <2,6,3,7>, <3,2,6,3>
+ 2712062319U, // <7,3,2,7>: Cost 3 vext3 RHS, <3,2,7,3>
+ 2712062325U, // <7,3,2,u>: Cost 3 vext3 RHS, <3,2,u,0>
+ 2712062335U, // <7,3,3,0>: Cost 3 vext3 RHS, <3,3,0,1>
+ 2636368158U, // <7,3,3,1>: Cost 3 vext2 <3,1,7,3>, <3,1,7,3>
+ 2637031791U, // <7,3,3,2>: Cost 3 vext2 <3,2,7,3>, <3,2,7,3>
+ 1638320540U, // <7,3,3,3>: Cost 2 vext3 RHS, <3,3,3,3>
+ 2712062374U, // <7,3,3,4>: Cost 3 vext3 RHS, <3,3,4,4>
+ 2704689586U, // <7,3,3,5>: Cost 3 vext3 <3,3,5,7>, <3,3,5,7>
+ 2590864235U, // <7,3,3,6>: Cost 3 vext1 <6,7,3,3>, <6,7,3,3>
+ 2704837060U, // <7,3,3,7>: Cost 3 vext3 <3,3,7,7>, <3,3,7,7>
+ 1638320540U, // <7,3,3,u>: Cost 2 vext3 RHS, <3,3,3,3>
+ 2712062416U, // <7,3,4,0>: Cost 3 vext3 RHS, <3,4,0,1>
+ 2712062426U, // <7,3,4,1>: Cost 3 vext3 RHS, <3,4,1,2>
+ 2566981640U, // <7,3,4,2>: Cost 3 vext1 <2,7,3,4>, <2,7,3,4>
+ 2712062447U, // <7,3,4,3>: Cost 3 vext3 RHS, <3,4,3,5>
+ 2712062456U, // <7,3,4,4>: Cost 3 vext3 RHS, <3,4,4,5>
+ 1638320642U, // <7,3,4,5>: Cost 2 vext3 RHS, <3,4,5,6>
+ 2648313204U, // <7,3,4,6>: Cost 3 vext2 <5,1,7,3>, <4,6,4,6>
+ 3311446789U, // <7,3,4,7>: Cost 4 vrev <3,7,7,4>
+ 1638320669U, // <7,3,4,u>: Cost 2 vext3 RHS, <3,4,u,6>
+ 2602819686U, // <7,3,5,0>: Cost 3 vext1 <u,7,3,5>, LHS
+ 1574571728U, // <7,3,5,1>: Cost 2 vext2 <5,1,7,3>, <5,1,7,3>
+ 2648977185U, // <7,3,5,2>: Cost 3 vext2 <5,2,7,3>, <5,2,7,3>
+ 2705869378U, // <7,3,5,3>: Cost 3 vext3 <3,5,3,7>, <3,5,3,7>
+ 2237491947U, // <7,3,5,4>: Cost 3 vrev <3,7,4,5>
+ 2706016852U, // <7,3,5,5>: Cost 3 vext3 <3,5,5,7>, <3,5,5,7>
+ 2648313954U, // <7,3,5,6>: Cost 3 vext2 <5,1,7,3>, <5,6,7,0>
+ 2692745823U, // <7,3,5,7>: Cost 3 vext3 <1,3,5,7>, <3,5,7,0>
+ 1579217159U, // <7,3,5,u>: Cost 2 vext2 <5,u,7,3>, <5,u,7,3>
+ 2706311800U, // <7,3,6,0>: Cost 3 vext3 <3,6,0,7>, <3,6,0,7>
+ 2654286249U, // <7,3,6,1>: Cost 3 vext2 <6,1,7,3>, <6,1,7,3>
+ 1581208058U, // <7,3,6,2>: Cost 2 vext2 <6,2,7,3>, <6,2,7,3>
+ 2706533011U, // <7,3,6,3>: Cost 3 vext3 <3,6,3,7>, <3,6,3,7>
+ 2706606748U, // <7,3,6,4>: Cost 3 vext3 <3,6,4,7>, <3,6,4,7>
+ 3780422309U, // <7,3,6,5>: Cost 4 vext3 <3,6,5,7>, <3,6,5,7>
+ 2712062637U, // <7,3,6,6>: Cost 3 vext3 RHS, <3,6,6,6>
+ 2706827959U, // <7,3,6,7>: Cost 3 vext3 <3,6,7,7>, <3,6,7,7>
+ 1585189856U, // <7,3,6,u>: Cost 2 vext2 <6,u,7,3>, <6,u,7,3>
+ 2693925571U, // <7,3,7,0>: Cost 3 vext3 <1,5,3,7>, <3,7,0,1>
+ 2693925584U, // <7,3,7,1>: Cost 3 vext3 <1,5,3,7>, <3,7,1,5>
+ 2700561114U, // <7,3,7,2>: Cost 3 vext3 <2,6,3,7>, <3,7,2,6>
+ 2572978916U, // <7,3,7,3>: Cost 3 vext1 <3,7,3,7>, <3,7,3,7>
+ 2693925611U, // <7,3,7,4>: Cost 3 vext3 <1,5,3,7>, <3,7,4,5>
+ 2707344118U, // <7,3,7,5>: Cost 3 vext3 <3,7,5,7>, <3,7,5,7>
+ 2654950894U, // <7,3,7,6>: Cost 3 vext2 <6,2,7,3>, <7,6,2,7>
+ 2648315500U, // <7,3,7,7>: Cost 3 vext2 <5,1,7,3>, <7,7,7,7>
+ 2693925643U, // <7,3,7,u>: Cost 3 vext3 <1,5,3,7>, <3,7,u,1>
+ 2237221578U, // <7,3,u,0>: Cost 3 vrev <3,7,0,u>
+ 1638320926U, // <7,3,u,1>: Cost 2 vext3 RHS, <3,u,1,2>
+ 1593153452U, // <7,3,u,2>: Cost 2 vext2 <u,2,7,3>, <u,2,7,3>
+ 1638320540U, // <7,3,u,3>: Cost 2 vext3 RHS, <3,3,3,3>
+ 2237516526U, // <7,3,u,4>: Cost 3 vrev <3,7,4,u>
+ 1638320966U, // <7,3,u,5>: Cost 2 vext3 RHS, <3,u,5,6>
+ 2712062796U, // <7,3,u,6>: Cost 3 vext3 RHS, <3,u,6,3>
+ 2692967250U, // <7,3,u,7>: Cost 3 vext3 <1,3,u,7>, <3,u,7,0>
+ 1638320989U, // <7,3,u,u>: Cost 2 vext3 RHS, <3,u,u,2>
+ 2651635712U, // <7,4,0,0>: Cost 3 vext2 <5,6,7,4>, <0,0,0,0>
+ 1577893990U, // <7,4,0,1>: Cost 2 vext2 <5,6,7,4>, LHS
+ 2651635876U, // <7,4,0,2>: Cost 3 vext2 <5,6,7,4>, <0,2,0,2>
+ 3785804672U, // <7,4,0,3>: Cost 4 vext3 RHS, <4,0,3,1>
+ 2651636050U, // <7,4,0,4>: Cost 3 vext2 <5,6,7,4>, <0,4,1,5>
+ 1638468498U, // <7,4,0,5>: Cost 2 vext3 RHS, <4,0,5,1>
+ 1638468508U, // <7,4,0,6>: Cost 2 vext3 RHS, <4,0,6,2>
+ 3787795364U, // <7,4,0,7>: Cost 4 vext3 RHS, <4,0,7,1>
+ 1640459181U, // <7,4,0,u>: Cost 2 vext3 RHS, <4,0,u,1>
+ 2651636470U, // <7,4,1,0>: Cost 3 vext2 <5,6,7,4>, <1,0,3,2>
+ 2651636532U, // <7,4,1,1>: Cost 3 vext2 <5,6,7,4>, <1,1,1,1>
+ 2712062922U, // <7,4,1,2>: Cost 3 vext3 RHS, <4,1,2,3>
+ 2639029248U, // <7,4,1,3>: Cost 3 vext2 <3,5,7,4>, <1,3,5,7>
+ 2712062940U, // <7,4,1,4>: Cost 3 vext3 RHS, <4,1,4,3>
+ 2712062946U, // <7,4,1,5>: Cost 3 vext3 RHS, <4,1,5,0>
+ 2712062958U, // <7,4,1,6>: Cost 3 vext3 RHS, <4,1,6,3>
+ 3785804791U, // <7,4,1,7>: Cost 4 vext3 RHS, <4,1,7,3>
+ 2712062973U, // <7,4,1,u>: Cost 3 vext3 RHS, <4,1,u,0>
+ 3785804807U, // <7,4,2,0>: Cost 4 vext3 RHS, <4,2,0,1>
+ 3785804818U, // <7,4,2,1>: Cost 4 vext3 RHS, <4,2,1,3>
+ 2651637352U, // <7,4,2,2>: Cost 3 vext2 <5,6,7,4>, <2,2,2,2>
+ 2651637414U, // <7,4,2,3>: Cost 3 vext2 <5,6,7,4>, <2,3,0,1>
+ 3716753194U, // <7,4,2,4>: Cost 4 vext2 <4,2,7,4>, <2,4,5,7>
+ 2712063030U, // <7,4,2,5>: Cost 3 vext3 RHS, <4,2,5,3>
+ 2712063036U, // <7,4,2,6>: Cost 3 vext3 RHS, <4,2,6,0>
+ 3773123658U, // <7,4,2,7>: Cost 4 vext3 <2,4,5,7>, <4,2,7,5>
+ 2712063054U, // <7,4,2,u>: Cost 3 vext3 RHS, <4,2,u,0>
+ 2651637910U, // <7,4,3,0>: Cost 3 vext2 <5,6,7,4>, <3,0,1,2>
+ 3712772348U, // <7,4,3,1>: Cost 4 vext2 <3,5,7,4>, <3,1,3,5>
+ 3785804906U, // <7,4,3,2>: Cost 4 vext3 RHS, <4,3,2,1>
+ 2651638172U, // <7,4,3,3>: Cost 3 vext2 <5,6,7,4>, <3,3,3,3>
+ 2651638274U, // <7,4,3,4>: Cost 3 vext2 <5,6,7,4>, <3,4,5,6>
+ 2639030883U, // <7,4,3,5>: Cost 3 vext2 <3,5,7,4>, <3,5,7,4>
+ 2712063122U, // <7,4,3,6>: Cost 3 vext3 RHS, <4,3,6,5>
+ 3712772836U, // <7,4,3,7>: Cost 4 vext2 <3,5,7,4>, <3,7,3,7>
+ 2641021782U, // <7,4,3,u>: Cost 3 vext2 <3,u,7,4>, <3,u,7,4>
+ 2714053802U, // <7,4,4,0>: Cost 3 vext3 RHS, <4,4,0,2>
+ 3785804978U, // <7,4,4,1>: Cost 4 vext3 RHS, <4,4,1,1>
+ 3716754505U, // <7,4,4,2>: Cost 4 vext2 <4,2,7,4>, <4,2,7,4>
+ 3785804998U, // <7,4,4,3>: Cost 4 vext3 RHS, <4,4,3,3>
+ 1638321360U, // <7,4,4,4>: Cost 2 vext3 RHS, <4,4,4,4>
+ 1638468826U, // <7,4,4,5>: Cost 2 vext3 RHS, <4,4,5,5>
+ 1638468836U, // <7,4,4,6>: Cost 2 vext3 RHS, <4,4,6,6>
+ 3785215214U, // <7,4,4,7>: Cost 4 vext3 <4,4,7,7>, <4,4,7,7>
+ 1640459509U, // <7,4,4,u>: Cost 2 vext3 RHS, <4,4,u,5>
+ 1517207654U, // <7,4,5,0>: Cost 2 vext1 <6,7,4,5>, LHS
+ 2573034640U, // <7,4,5,1>: Cost 3 vext1 <3,7,4,5>, <1,5,3,7>
+ 2712063246U, // <7,4,5,2>: Cost 3 vext3 RHS, <4,5,2,3>
+ 2573036267U, // <7,4,5,3>: Cost 3 vext1 <3,7,4,5>, <3,7,4,5>
+ 1517210934U, // <7,4,5,4>: Cost 2 vext1 <6,7,4,5>, RHS
+ 2711989549U, // <7,4,5,5>: Cost 3 vext3 <4,5,5,7>, <4,5,5,7>
+ 564579638U, // <7,4,5,6>: Cost 1 vext3 RHS, RHS
+ 2651639976U, // <7,4,5,7>: Cost 3 vext2 <5,6,7,4>, <5,7,5,7>
+ 564579656U, // <7,4,5,u>: Cost 1 vext3 RHS, RHS
+ 2712063307U, // <7,4,6,0>: Cost 3 vext3 RHS, <4,6,0,1>
+ 3767668056U, // <7,4,6,1>: Cost 4 vext3 <1,5,3,7>, <4,6,1,5>
+ 2651640314U, // <7,4,6,2>: Cost 3 vext2 <5,6,7,4>, <6,2,7,3>
+ 2655621708U, // <7,4,6,3>: Cost 3 vext2 <6,3,7,4>, <6,3,7,4>
+ 1638468980U, // <7,4,6,4>: Cost 2 vext3 RHS, <4,6,4,6>
+ 2712063358U, // <7,4,6,5>: Cost 3 vext3 RHS, <4,6,5,7>
+ 2712063367U, // <7,4,6,6>: Cost 3 vext3 RHS, <4,6,6,7>
+ 2712210826U, // <7,4,6,7>: Cost 3 vext3 RHS, <4,6,7,1>
+ 1638469012U, // <7,4,6,u>: Cost 2 vext3 RHS, <4,6,u,2>
+ 2651640826U, // <7,4,7,0>: Cost 3 vext2 <5,6,7,4>, <7,0,1,2>
+ 3773713830U, // <7,4,7,1>: Cost 4 vext3 <2,5,4,7>, <4,7,1,2>
+ 3773713842U, // <7,4,7,2>: Cost 4 vext3 <2,5,4,7>, <4,7,2,5>
+ 3780349372U, // <7,4,7,3>: Cost 4 vext3 <3,6,4,7>, <4,7,3,6>
+ 2651641140U, // <7,4,7,4>: Cost 3 vext2 <5,6,7,4>, <7,4,0,1>
+ 2712210888U, // <7,4,7,5>: Cost 3 vext3 RHS, <4,7,5,0>
+ 2712210898U, // <7,4,7,6>: Cost 3 vext3 RHS, <4,7,6,1>
+ 2651641452U, // <7,4,7,7>: Cost 3 vext2 <5,6,7,4>, <7,7,7,7>
+ 2713538026U, // <7,4,7,u>: Cost 3 vext3 <4,7,u,7>, <4,7,u,7>
+ 1517232230U, // <7,4,u,0>: Cost 2 vext1 <6,7,4,u>, LHS
+ 1577899822U, // <7,4,u,1>: Cost 2 vext2 <5,6,7,4>, LHS
+ 2712063489U, // <7,4,u,2>: Cost 3 vext3 RHS, <4,u,2,3>
+ 2573060846U, // <7,4,u,3>: Cost 3 vext1 <3,7,4,u>, <3,7,4,u>
+ 1640312342U, // <7,4,u,4>: Cost 2 vext3 RHS, <4,u,4,6>
+ 1638469146U, // <7,4,u,5>: Cost 2 vext3 RHS, <4,u,5,1>
+ 564579881U, // <7,4,u,6>: Cost 1 vext3 RHS, RHS
+ 2714054192U, // <7,4,u,7>: Cost 3 vext3 RHS, <4,u,7,5>
+ 564579899U, // <7,4,u,u>: Cost 1 vext3 RHS, RHS
+ 2579038310U, // <7,5,0,0>: Cost 3 vext1 <4,7,5,0>, LHS
+ 2636382310U, // <7,5,0,1>: Cost 3 vext2 <3,1,7,5>, LHS
+ 2796339302U, // <7,5,0,2>: Cost 3 vuzpl <7,4,5,6>, LHS
+ 3646810719U, // <7,5,0,3>: Cost 4 vext1 <3,7,5,0>, <3,5,7,0>
+ 2712063586U, // <7,5,0,4>: Cost 3 vext3 RHS, <5,0,4,1>
+ 2735951467U, // <7,5,0,5>: Cost 3 vext3 RHS, <5,0,5,1>
+ 2735951476U, // <7,5,0,6>: Cost 3 vext3 RHS, <5,0,6,1>
+ 2579043322U, // <7,5,0,7>: Cost 3 vext1 <4,7,5,0>, <7,0,1,2>
+ 2636382877U, // <7,5,0,u>: Cost 3 vext2 <3,1,7,5>, LHS
+ 2712211087U, // <7,5,1,0>: Cost 3 vext3 RHS, <5,1,0,1>
+ 3698180916U, // <7,5,1,1>: Cost 4 vext2 <1,1,7,5>, <1,1,1,1>
+ 3710124950U, // <7,5,1,2>: Cost 4 vext2 <3,1,7,5>, <1,2,3,0>
+ 2636383232U, // <7,5,1,3>: Cost 3 vext2 <3,1,7,5>, <1,3,5,7>
+ 2712211127U, // <7,5,1,4>: Cost 3 vext3 RHS, <5,1,4,5>
+ 2590994128U, // <7,5,1,5>: Cost 3 vext1 <6,7,5,1>, <5,1,7,3>
+ 2590995323U, // <7,5,1,6>: Cost 3 vext1 <6,7,5,1>, <6,7,5,1>
+ 1638469328U, // <7,5,1,7>: Cost 2 vext3 RHS, <5,1,7,3>
+ 1638469337U, // <7,5,1,u>: Cost 2 vext3 RHS, <5,1,u,3>
+ 3785805536U, // <7,5,2,0>: Cost 4 vext3 RHS, <5,2,0,1>
+ 3785805544U, // <7,5,2,1>: Cost 4 vext3 RHS, <5,2,1,0>
+ 3704817288U, // <7,5,2,2>: Cost 4 vext2 <2,2,7,5>, <2,2,5,7>
+ 2712063742U, // <7,5,2,3>: Cost 3 vext3 RHS, <5,2,3,4>
+ 3716761386U, // <7,5,2,4>: Cost 4 vext2 <4,2,7,5>, <2,4,5,7>
+ 2714054415U, // <7,5,2,5>: Cost 3 vext3 RHS, <5,2,5,3>
+ 3774304024U, // <7,5,2,6>: Cost 4 vext3 <2,6,3,7>, <5,2,6,3>
+ 2712063777U, // <7,5,2,7>: Cost 3 vext3 RHS, <5,2,7,3>
+ 2712063787U, // <7,5,2,u>: Cost 3 vext3 RHS, <5,2,u,4>
+ 3634888806U, // <7,5,3,0>: Cost 4 vext1 <1,7,5,3>, LHS
+ 2636384544U, // <7,5,3,1>: Cost 3 vext2 <3,1,7,5>, <3,1,7,5>
+ 3710790001U, // <7,5,3,2>: Cost 4 vext2 <3,2,7,5>, <3,2,7,5>
+ 3710126492U, // <7,5,3,3>: Cost 4 vext2 <3,1,7,5>, <3,3,3,3>
+ 3634892086U, // <7,5,3,4>: Cost 4 vext1 <1,7,5,3>, RHS
+ 2639039076U, // <7,5,3,5>: Cost 3 vext2 <3,5,7,5>, <3,5,7,5>
+ 3713444533U, // <7,5,3,6>: Cost 4 vext2 <3,6,7,5>, <3,6,7,5>
+ 2693926767U, // <7,5,3,7>: Cost 3 vext3 <1,5,3,7>, <5,3,7,0>
+ 2712063864U, // <7,5,3,u>: Cost 3 vext3 RHS, <5,3,u,0>
+ 2579071078U, // <7,5,4,0>: Cost 3 vext1 <4,7,5,4>, LHS
+ 3646841856U, // <7,5,4,1>: Cost 4 vext1 <3,7,5,4>, <1,3,5,7>
+ 3716762698U, // <7,5,4,2>: Cost 4 vext2 <4,2,7,5>, <4,2,7,5>
+ 3646843491U, // <7,5,4,3>: Cost 4 vext1 <3,7,5,4>, <3,5,7,4>
+ 2579074358U, // <7,5,4,4>: Cost 3 vext1 <4,7,5,4>, RHS
+ 2636385590U, // <7,5,4,5>: Cost 3 vext2 <3,1,7,5>, RHS
+ 2645675406U, // <7,5,4,6>: Cost 3 vext2 <4,6,7,5>, <4,6,7,5>
+ 1638322118U, // <7,5,4,7>: Cost 2 vext3 RHS, <5,4,7,6>
+ 1638469583U, // <7,5,4,u>: Cost 2 vext3 RHS, <5,4,u,6>
+ 2714054611U, // <7,5,5,0>: Cost 3 vext3 RHS, <5,5,0,1>
+ 2652974800U, // <7,5,5,1>: Cost 3 vext2 <5,u,7,5>, <5,1,7,3>
+ 3710127905U, // <7,5,5,2>: Cost 4 vext2 <3,1,7,5>, <5,2,7,3>
+ 3785805808U, // <7,5,5,3>: Cost 4 vext3 RHS, <5,5,3,3>
+ 2712211450U, // <7,5,5,4>: Cost 3 vext3 RHS, <5,5,4,4>
+ 1638322180U, // <7,5,5,5>: Cost 2 vext3 RHS, <5,5,5,5>
+ 2712064014U, // <7,5,5,6>: Cost 3 vext3 RHS, <5,5,6,6>
+ 1638469656U, // <7,5,5,7>: Cost 2 vext3 RHS, <5,5,7,7>
+ 1638469665U, // <7,5,5,u>: Cost 2 vext3 RHS, <5,5,u,7>
+ 2712064036U, // <7,5,6,0>: Cost 3 vext3 RHS, <5,6,0,1>
+ 2714054707U, // <7,5,6,1>: Cost 3 vext3 RHS, <5,6,1,7>
+ 3785805879U, // <7,5,6,2>: Cost 4 vext3 RHS, <5,6,2,2>
+ 2712064066U, // <7,5,6,3>: Cost 3 vext3 RHS, <5,6,3,4>
+ 2712064076U, // <7,5,6,4>: Cost 3 vext3 RHS, <5,6,4,5>
+ 2714054743U, // <7,5,6,5>: Cost 3 vext3 RHS, <5,6,5,7>
+ 2712064096U, // <7,5,6,6>: Cost 3 vext3 RHS, <5,6,6,7>
+ 1638322274U, // <7,5,6,7>: Cost 2 vext3 RHS, <5,6,7,0>
+ 1638469739U, // <7,5,6,u>: Cost 2 vext3 RHS, <5,6,u,0>
+ 1511325798U, // <7,5,7,0>: Cost 2 vext1 <5,7,5,7>, LHS
+ 2692747392U, // <7,5,7,1>: Cost 3 vext3 <1,3,5,7>, <5,7,1,3>
+ 2585069160U, // <7,5,7,2>: Cost 3 vext1 <5,7,5,7>, <2,2,2,2>
+ 2573126390U, // <7,5,7,3>: Cost 3 vext1 <3,7,5,7>, <3,7,5,7>
+ 1511329078U, // <7,5,7,4>: Cost 2 vext1 <5,7,5,7>, RHS
+ 1638469800U, // <7,5,7,5>: Cost 2 vext3 RHS, <5,7,5,7>
+ 2712211626U, // <7,5,7,6>: Cost 3 vext3 RHS, <5,7,6,0>
+ 2712211636U, // <7,5,7,7>: Cost 3 vext3 RHS, <5,7,7,1>
+ 1638469823U, // <7,5,7,u>: Cost 2 vext3 RHS, <5,7,u,3>
+ 1511333990U, // <7,5,u,0>: Cost 2 vext1 <5,7,5,u>, LHS
+ 2636388142U, // <7,5,u,1>: Cost 3 vext2 <3,1,7,5>, LHS
+ 2712211671U, // <7,5,u,2>: Cost 3 vext3 RHS, <5,u,2,0>
+ 2573134583U, // <7,5,u,3>: Cost 3 vext1 <3,7,5,u>, <3,7,5,u>
+ 1511337270U, // <7,5,u,4>: Cost 2 vext1 <5,7,5,u>, RHS
+ 1638469881U, // <7,5,u,5>: Cost 2 vext3 RHS, <5,u,5,7>
+ 2712064258U, // <7,5,u,6>: Cost 3 vext3 RHS, <5,u,6,7>
+ 1638469892U, // <7,5,u,7>: Cost 2 vext3 RHS, <5,u,7,0>
+ 1638469904U, // <7,5,u,u>: Cost 2 vext3 RHS, <5,u,u,3>
+ 2650324992U, // <7,6,0,0>: Cost 3 vext2 <5,4,7,6>, <0,0,0,0>
+ 1576583270U, // <7,6,0,1>: Cost 2 vext2 <5,4,7,6>, LHS
+ 2712064300U, // <7,6,0,2>: Cost 3 vext3 RHS, <6,0,2,4>
+ 2255295336U, // <7,6,0,3>: Cost 3 vrev <6,7,3,0>
+ 2712064316U, // <7,6,0,4>: Cost 3 vext3 RHS, <6,0,4,2>
+ 2585088098U, // <7,6,0,5>: Cost 3 vext1 <5,7,6,0>, <5,6,7,0>
+ 2735952204U, // <7,6,0,6>: Cost 3 vext3 RHS, <6,0,6,0>
+ 2712211799U, // <7,6,0,7>: Cost 3 vext3 RHS, <6,0,7,2>
+ 1576583837U, // <7,6,0,u>: Cost 2 vext2 <5,4,7,6>, LHS
+ 1181340494U, // <7,6,1,0>: Cost 2 vrev <6,7,0,1>
+ 2650325812U, // <7,6,1,1>: Cost 3 vext2 <5,4,7,6>, <1,1,1,1>
+ 2650325910U, // <7,6,1,2>: Cost 3 vext2 <5,4,7,6>, <1,2,3,0>
+ 2650325976U, // <7,6,1,3>: Cost 3 vext2 <5,4,7,6>, <1,3,1,3>
+ 2579123510U, // <7,6,1,4>: Cost 3 vext1 <4,7,6,1>, RHS
+ 2650326160U, // <7,6,1,5>: Cost 3 vext2 <5,4,7,6>, <1,5,3,7>
+ 2714055072U, // <7,6,1,6>: Cost 3 vext3 RHS, <6,1,6,3>
+ 2712064425U, // <7,6,1,7>: Cost 3 vext3 RHS, <6,1,7,3>
+ 1181930390U, // <7,6,1,u>: Cost 2 vrev <6,7,u,1>
+ 2712211897U, // <7,6,2,0>: Cost 3 vext3 RHS, <6,2,0,1>
+ 2714055108U, // <7,6,2,1>: Cost 3 vext3 RHS, <6,2,1,3>
+ 2650326632U, // <7,6,2,2>: Cost 3 vext2 <5,4,7,6>, <2,2,2,2>
+ 2650326694U, // <7,6,2,3>: Cost 3 vext2 <5,4,7,6>, <2,3,0,1>
+ 2714055137U, // <7,6,2,4>: Cost 3 vext3 RHS, <6,2,4,5>
+ 2714055148U, // <7,6,2,5>: Cost 3 vext3 RHS, <6,2,5,7>
+ 2650326970U, // <7,6,2,6>: Cost 3 vext2 <5,4,7,6>, <2,6,3,7>
+ 1638470138U, // <7,6,2,7>: Cost 2 vext3 RHS, <6,2,7,3>
+ 1638470147U, // <7,6,2,u>: Cost 2 vext3 RHS, <6,2,u,3>
+ 2650327190U, // <7,6,3,0>: Cost 3 vext2 <5,4,7,6>, <3,0,1,2>
+ 2255172441U, // <7,6,3,1>: Cost 3 vrev <6,7,1,3>
+ 2255246178U, // <7,6,3,2>: Cost 3 vrev <6,7,2,3>
+ 2650327452U, // <7,6,3,3>: Cost 3 vext2 <5,4,7,6>, <3,3,3,3>
+ 2712064562U, // <7,6,3,4>: Cost 3 vext3 RHS, <6,3,4,5>
+ 2650327627U, // <7,6,3,5>: Cost 3 vext2 <5,4,7,6>, <3,5,4,7>
+ 3713452726U, // <7,6,3,6>: Cost 4 vext2 <3,6,7,6>, <3,6,7,6>
+ 2700563016U, // <7,6,3,7>: Cost 3 vext3 <2,6,3,7>, <6,3,7,0>
+ 2712064593U, // <7,6,3,u>: Cost 3 vext3 RHS, <6,3,u,0>
+ 2650327954U, // <7,6,4,0>: Cost 3 vext2 <5,4,7,6>, <4,0,5,1>
+ 2735952486U, // <7,6,4,1>: Cost 3 vext3 RHS, <6,4,1,3>
+ 2735952497U, // <7,6,4,2>: Cost 3 vext3 RHS, <6,4,2,5>
+ 2255328108U, // <7,6,4,3>: Cost 3 vrev <6,7,3,4>
+ 2712212100U, // <7,6,4,4>: Cost 3 vext3 RHS, <6,4,4,6>
+ 1576586550U, // <7,6,4,5>: Cost 2 vext2 <5,4,7,6>, RHS
+ 2714055312U, // <7,6,4,6>: Cost 3 vext3 RHS, <6,4,6,0>
+ 2712212126U, // <7,6,4,7>: Cost 3 vext3 RHS, <6,4,7,5>
+ 1576586793U, // <7,6,4,u>: Cost 2 vext2 <5,4,7,6>, RHS
+ 2579152998U, // <7,6,5,0>: Cost 3 vext1 <4,7,6,5>, LHS
+ 2650328784U, // <7,6,5,1>: Cost 3 vext2 <5,4,7,6>, <5,1,7,3>
+ 2714055364U, // <7,6,5,2>: Cost 3 vext3 RHS, <6,5,2,7>
+ 3785806538U, // <7,6,5,3>: Cost 4 vext3 RHS, <6,5,3,4>
+ 1576587206U, // <7,6,5,4>: Cost 2 vext2 <5,4,7,6>, <5,4,7,6>
+ 2650329092U, // <7,6,5,5>: Cost 3 vext2 <5,4,7,6>, <5,5,5,5>
+ 2650329186U, // <7,6,5,6>: Cost 3 vext2 <5,4,7,6>, <5,6,7,0>
+ 2712064753U, // <7,6,5,7>: Cost 3 vext3 RHS, <6,5,7,7>
+ 1181963162U, // <7,6,5,u>: Cost 2 vrev <6,7,u,5>
+ 2714055421U, // <7,6,6,0>: Cost 3 vext3 RHS, <6,6,0,1>
+ 2714055432U, // <7,6,6,1>: Cost 3 vext3 RHS, <6,6,1,3>
+ 2650329594U, // <7,6,6,2>: Cost 3 vext2 <5,4,7,6>, <6,2,7,3>
+ 3785806619U, // <7,6,6,3>: Cost 4 vext3 RHS, <6,6,3,4>
+ 2712212260U, // <7,6,6,4>: Cost 3 vext3 RHS, <6,6,4,4>
+ 2714055472U, // <7,6,6,5>: Cost 3 vext3 RHS, <6,6,5,7>
+ 1638323000U, // <7,6,6,6>: Cost 2 vext3 RHS, <6,6,6,6>
+ 1638470466U, // <7,6,6,7>: Cost 2 vext3 RHS, <6,6,7,7>
+ 1638470475U, // <7,6,6,u>: Cost 2 vext3 RHS, <6,6,u,7>
+ 1638323022U, // <7,6,7,0>: Cost 2 vext3 RHS, <6,7,0,1>
+ 2712064854U, // <7,6,7,1>: Cost 3 vext3 RHS, <6,7,1,0>
+ 2712064865U, // <7,6,7,2>: Cost 3 vext3 RHS, <6,7,2,2>
+ 2712064872U, // <7,6,7,3>: Cost 3 vext3 RHS, <6,7,3,0>
+ 1638323062U, // <7,6,7,4>: Cost 2 vext3 RHS, <6,7,4,5>
+ 2712064894U, // <7,6,7,5>: Cost 3 vext3 RHS, <6,7,5,4>
+ 2712064905U, // <7,6,7,6>: Cost 3 vext3 RHS, <6,7,6,6>
+ 2712064915U, // <7,6,7,7>: Cost 3 vext3 RHS, <6,7,7,7>
+ 1638323094U, // <7,6,7,u>: Cost 2 vext3 RHS, <6,7,u,1>
+ 1638470559U, // <7,6,u,0>: Cost 2 vext3 RHS, <6,u,0,1>
+ 1576589102U, // <7,6,u,1>: Cost 2 vext2 <5,4,7,6>, LHS
+ 2712212402U, // <7,6,u,2>: Cost 3 vext3 RHS, <6,u,2,2>
+ 2712212409U, // <7,6,u,3>: Cost 3 vext3 RHS, <6,u,3,0>
+ 1638470599U, // <7,6,u,4>: Cost 2 vext3 RHS, <6,u,4,5>
+ 1576589466U, // <7,6,u,5>: Cost 2 vext2 <5,4,7,6>, RHS
+ 1638323000U, // <7,6,u,6>: Cost 2 vext3 RHS, <6,6,6,6>
+ 1638470624U, // <7,6,u,7>: Cost 2 vext3 RHS, <6,u,7,3>
+ 1638470631U, // <7,6,u,u>: Cost 2 vext3 RHS, <6,u,u,1>
+ 2712065007U, // <7,7,0,0>: Cost 3 vext3 RHS, <7,0,0,0>
+ 1638323194U, // <7,7,0,1>: Cost 2 vext3 RHS, <7,0,1,2>
+ 2712065025U, // <7,7,0,2>: Cost 3 vext3 RHS, <7,0,2,0>
+ 3646958337U, // <7,7,0,3>: Cost 4 vext1 <3,7,7,0>, <3,7,7,0>
+ 2712065044U, // <7,7,0,4>: Cost 3 vext3 RHS, <7,0,4,1>
+ 2585161907U, // <7,7,0,5>: Cost 3 vext1 <5,7,7,0>, <5,7,7,0>
+ 2591134604U, // <7,7,0,6>: Cost 3 vext1 <6,7,7,0>, <6,7,7,0>
+ 2591134714U, // <7,7,0,7>: Cost 3 vext1 <6,7,7,0>, <7,0,1,2>
+ 1638323257U, // <7,7,0,u>: Cost 2 vext3 RHS, <7,0,u,2>
+ 2712065091U, // <7,7,1,0>: Cost 3 vext3 RHS, <7,1,0,3>
+ 2712065098U, // <7,7,1,1>: Cost 3 vext3 RHS, <7,1,1,1>
+ 2712065109U, // <7,7,1,2>: Cost 3 vext3 RHS, <7,1,2,3>
+ 2692748384U, // <7,7,1,3>: Cost 3 vext3 <1,3,5,7>, <7,1,3,5>
+ 2585169206U, // <7,7,1,4>: Cost 3 vext1 <5,7,7,1>, RHS
+ 2693928048U, // <7,7,1,5>: Cost 3 vext3 <1,5,3,7>, <7,1,5,3>
+ 2585170766U, // <7,7,1,6>: Cost 3 vext1 <5,7,7,1>, <6,7,0,1>
+ 2735953024U, // <7,7,1,7>: Cost 3 vext3 RHS, <7,1,7,1>
+ 2695918731U, // <7,7,1,u>: Cost 3 vext3 <1,u,3,7>, <7,1,u,3>
+ 3770471574U, // <7,7,2,0>: Cost 4 vext3 <2,0,5,7>, <7,2,0,5>
+ 3785807002U, // <7,7,2,1>: Cost 4 vext3 RHS, <7,2,1,0>
+ 2712065189U, // <7,7,2,2>: Cost 3 vext3 RHS, <7,2,2,2>
+ 2712065196U, // <7,7,2,3>: Cost 3 vext3 RHS, <7,2,3,0>
+ 3773125818U, // <7,7,2,4>: Cost 4 vext3 <2,4,5,7>, <7,2,4,5>
+ 3766490305U, // <7,7,2,5>: Cost 4 vext3 <1,3,5,7>, <7,2,5,3>
+ 2700563658U, // <7,7,2,6>: Cost 3 vext3 <2,6,3,7>, <7,2,6,3>
+ 2735953107U, // <7,7,2,7>: Cost 3 vext3 RHS, <7,2,7,3>
+ 2701890780U, // <7,7,2,u>: Cost 3 vext3 <2,u,3,7>, <7,2,u,3>
+ 2712065251U, // <7,7,3,0>: Cost 3 vext3 RHS, <7,3,0,1>
+ 3766490350U, // <7,7,3,1>: Cost 4 vext3 <1,3,5,7>, <7,3,1,3>
+ 3774305530U, // <7,7,3,2>: Cost 4 vext3 <2,6,3,7>, <7,3,2,6>
+ 2637728196U, // <7,7,3,3>: Cost 3 vext2 <3,3,7,7>, <3,3,7,7>
+ 2712065291U, // <7,7,3,4>: Cost 3 vext3 RHS, <7,3,4,5>
+ 2585186486U, // <7,7,3,5>: Cost 3 vext1 <5,7,7,3>, <5,7,7,3>
+ 2639719095U, // <7,7,3,6>: Cost 3 vext2 <3,6,7,7>, <3,6,7,7>
+ 2640382728U, // <7,7,3,7>: Cost 3 vext2 <3,7,7,7>, <3,7,7,7>
+ 2641046361U, // <7,7,3,u>: Cost 3 vext2 <3,u,7,7>, <3,u,7,7>
+ 2712212792U, // <7,7,4,0>: Cost 3 vext3 RHS, <7,4,0,5>
+ 3646989312U, // <7,7,4,1>: Cost 4 vext1 <3,7,7,4>, <1,3,5,7>
+ 3785807176U, // <7,7,4,2>: Cost 4 vext3 RHS, <7,4,2,3>
+ 3646991109U, // <7,7,4,3>: Cost 4 vext1 <3,7,7,4>, <3,7,7,4>
+ 2712065371U, // <7,7,4,4>: Cost 3 vext3 RHS, <7,4,4,4>
+ 1638323558U, // <7,7,4,5>: Cost 2 vext3 RHS, <7,4,5,6>
+ 2712212845U, // <7,7,4,6>: Cost 3 vext3 RHS, <7,4,6,4>
+ 2591167846U, // <7,7,4,7>: Cost 3 vext1 <6,7,7,4>, <7,4,5,6>
+ 1638323585U, // <7,7,4,u>: Cost 2 vext3 RHS, <7,4,u,6>
+ 2585198694U, // <7,7,5,0>: Cost 3 vext1 <5,7,7,5>, LHS
+ 2712212884U, // <7,7,5,1>: Cost 3 vext3 RHS, <7,5,1,7>
+ 3711471393U, // <7,7,5,2>: Cost 4 vext2 <3,3,7,7>, <5,2,7,3>
+ 2649673590U, // <7,7,5,3>: Cost 3 vext2 <5,3,7,7>, <5,3,7,7>
+ 2712065455U, // <7,7,5,4>: Cost 3 vext3 RHS, <7,5,4,7>
+ 1577259032U, // <7,7,5,5>: Cost 2 vext2 <5,5,7,7>, <5,5,7,7>
+ 2712065473U, // <7,7,5,6>: Cost 3 vext3 RHS, <7,5,6,7>
+ 2712212936U, // <7,7,5,7>: Cost 3 vext3 RHS, <7,5,7,5>
+ 1579249931U, // <7,7,5,u>: Cost 2 vext2 <5,u,7,7>, <5,u,7,7>
+ 2591178854U, // <7,7,6,0>: Cost 3 vext1 <6,7,7,6>, LHS
+ 2735953374U, // <7,7,6,1>: Cost 3 vext3 RHS, <7,6,1,0>
+ 2712212974U, // <7,7,6,2>: Cost 3 vext3 RHS, <7,6,2,7>
+ 2655646287U, // <7,7,6,3>: Cost 3 vext2 <6,3,7,7>, <6,3,7,7>
+ 2591182134U, // <7,7,6,4>: Cost 3 vext1 <6,7,7,6>, RHS
+ 2656973553U, // <7,7,6,5>: Cost 3 vext2 <6,5,7,7>, <6,5,7,7>
+ 1583895362U, // <7,7,6,6>: Cost 2 vext2 <6,6,7,7>, <6,6,7,7>
+ 2712065556U, // <7,7,6,7>: Cost 3 vext3 RHS, <7,6,7,0>
+ 1585222628U, // <7,7,6,u>: Cost 2 vext2 <6,u,7,7>, <6,u,7,7>
+ 1523417190U, // <7,7,7,0>: Cost 2 vext1 <7,7,7,7>, LHS
+ 2597159670U, // <7,7,7,1>: Cost 3 vext1 <7,7,7,7>, <1,0,3,2>
+ 2597160552U, // <7,7,7,2>: Cost 3 vext1 <7,7,7,7>, <2,2,2,2>
+ 2597161110U, // <7,7,7,3>: Cost 3 vext1 <7,7,7,7>, <3,0,1,2>
+ 1523420470U, // <7,7,7,4>: Cost 2 vext1 <7,7,7,7>, RHS
+ 2651002296U, // <7,7,7,5>: Cost 3 vext2 <5,5,7,7>, <7,5,5,7>
+ 2657637906U, // <7,7,7,6>: Cost 3 vext2 <6,6,7,7>, <7,6,6,7>
+ 363253046U, // <7,7,7,7>: Cost 1 vdup3 RHS
+ 363253046U, // <7,7,7,u>: Cost 1 vdup3 RHS
+ 1523417190U, // <7,7,u,0>: Cost 2 vext1 <7,7,7,7>, LHS
+ 1638471298U, // <7,7,u,1>: Cost 2 vext3 RHS, <7,u,1,2>
+ 2712213132U, // <7,7,u,2>: Cost 3 vext3 RHS, <7,u,2,3>
+ 2712213138U, // <7,7,u,3>: Cost 3 vext3 RHS, <7,u,3,0>
+ 1523420470U, // <7,7,u,4>: Cost 2 vext1 <7,7,7,7>, RHS
+ 1638471338U, // <7,7,u,5>: Cost 2 vext3 RHS, <7,u,5,6>
+ 1595840756U, // <7,7,u,6>: Cost 2 vext2 <u,6,7,7>, <u,6,7,7>
+ 363253046U, // <7,7,u,7>: Cost 1 vdup3 RHS
+ 363253046U, // <7,7,u,u>: Cost 1 vdup3 RHS
+ 1638318080U, // <7,u,0,0>: Cost 2 vext3 RHS, <0,0,0,0>
+ 1638323923U, // <7,u,0,1>: Cost 2 vext3 RHS, <u,0,1,2>
+ 1662211804U, // <7,u,0,2>: Cost 2 vext3 RHS, <u,0,2,2>
+ 1638323941U, // <7,u,0,3>: Cost 2 vext3 RHS, <u,0,3,2>
+ 2712065773U, // <7,u,0,4>: Cost 3 vext3 RHS, <u,0,4,1>
+ 1662359286U, // <7,u,0,5>: Cost 2 vext3 RHS, <u,0,5,1>
+ 1662359296U, // <7,u,0,6>: Cost 2 vext3 RHS, <u,0,6,2>
+ 2987150664U, // <7,u,0,7>: Cost 3 vzipr <5,6,7,0>, RHS
+ 1638323986U, // <7,u,0,u>: Cost 2 vext3 RHS, <u,0,u,2>
+ 1517469798U, // <7,u,1,0>: Cost 2 vext1 <6,7,u,1>, LHS
+ 1638318900U, // <7,u,1,1>: Cost 2 vext3 RHS, <1,1,1,1>
+ 564582190U, // <7,u,1,2>: Cost 1 vext3 RHS, LHS
+ 1638324023U, // <7,u,1,3>: Cost 2 vext3 RHS, <u,1,3,3>
+ 1517473078U, // <7,u,1,4>: Cost 2 vext1 <6,7,u,1>, RHS
+ 2693928777U, // <7,u,1,5>: Cost 3 vext3 <1,5,3,7>, <u,1,5,3>
+ 1517474710U, // <7,u,1,6>: Cost 2 vext1 <6,7,u,1>, <6,7,u,1>
+ 1640462171U, // <7,u,1,7>: Cost 2 vext3 RHS, <u,1,7,3>
+ 564582244U, // <7,u,1,u>: Cost 1 vext3 RHS, LHS
+ 1638318244U, // <7,u,2,0>: Cost 2 vext3 RHS, <0,2,0,2>
+ 2712065907U, // <7,u,2,1>: Cost 3 vext3 RHS, <u,2,1,0>
+ 1638319720U, // <7,u,2,2>: Cost 2 vext3 RHS, <2,2,2,2>
+ 1638324101U, // <7,u,2,3>: Cost 2 vext3 RHS, <u,2,3,0>
+ 1638318284U, // <7,u,2,4>: Cost 2 vext3 RHS, <0,2,4,6>
+ 2712065947U, // <7,u,2,5>: Cost 3 vext3 RHS, <u,2,5,4>
+ 2700564387U, // <7,u,2,6>: Cost 3 vext3 <2,6,3,7>, <u,2,6,3>
+ 1640314796U, // <7,u,2,7>: Cost 2 vext3 RHS, <u,2,7,3>
+ 1638324146U, // <7,u,2,u>: Cost 2 vext3 RHS, <u,2,u,0>
+ 1638324156U, // <7,u,3,0>: Cost 2 vext3 RHS, <u,3,0,1>
+ 1638319064U, // <7,u,3,1>: Cost 2 vext3 RHS, <1,3,1,3>
+ 2700564435U, // <7,u,3,2>: Cost 3 vext3 <2,6,3,7>, <u,3,2,6>
+ 1638320540U, // <7,u,3,3>: Cost 2 vext3 RHS, <3,3,3,3>
+ 1638324196U, // <7,u,3,4>: Cost 2 vext3 RHS, <u,3,4,5>
+ 1638324207U, // <7,u,3,5>: Cost 2 vext3 RHS, <u,3,5,7>
+ 2700564472U, // <7,u,3,6>: Cost 3 vext3 <2,6,3,7>, <u,3,6,7>
+ 2695919610U, // <7,u,3,7>: Cost 3 vext3 <1,u,3,7>, <u,3,7,0>
+ 1638324228U, // <7,u,3,u>: Cost 2 vext3 RHS, <u,3,u,1>
+ 2712066061U, // <7,u,4,0>: Cost 3 vext3 RHS, <u,4,0,1>
+ 1662212122U, // <7,u,4,1>: Cost 2 vext3 RHS, <u,4,1,5>
+ 1662212132U, // <7,u,4,2>: Cost 2 vext3 RHS, <u,4,2,6>
+ 2712066092U, // <7,u,4,3>: Cost 3 vext3 RHS, <u,4,3,5>
+ 1638321360U, // <7,u,4,4>: Cost 2 vext3 RHS, <4,4,4,4>
+ 1638324287U, // <7,u,4,5>: Cost 2 vext3 RHS, <u,4,5,6>
+ 1662359624U, // <7,u,4,6>: Cost 2 vext3 RHS, <u,4,6,6>
+ 1640314961U, // <7,u,4,7>: Cost 2 vext3 RHS, <u,4,7,6>
+ 1638324314U, // <7,u,4,u>: Cost 2 vext3 RHS, <u,4,u,6>
+ 1517502566U, // <7,u,5,0>: Cost 2 vext1 <6,7,u,5>, LHS
+ 1574612693U, // <7,u,5,1>: Cost 2 vext2 <5,1,7,u>, <5,1,7,u>
+ 2712066162U, // <7,u,5,2>: Cost 3 vext3 RHS, <u,5,2,3>
+ 1638324351U, // <7,u,5,3>: Cost 2 vext3 RHS, <u,5,3,7>
+ 1576603592U, // <7,u,5,4>: Cost 2 vext2 <5,4,7,u>, <5,4,7,u>
+ 1577267225U, // <7,u,5,5>: Cost 2 vext2 <5,5,7,u>, <5,5,7,u>
+ 564582554U, // <7,u,5,6>: Cost 1 vext3 RHS, RHS
+ 1640462499U, // <7,u,5,7>: Cost 2 vext3 RHS, <u,5,7,7>
+ 564582572U, // <7,u,5,u>: Cost 1 vext3 RHS, RHS
+ 2712066223U, // <7,u,6,0>: Cost 3 vext3 RHS, <u,6,0,1>
+ 2712066238U, // <7,u,6,1>: Cost 3 vext3 RHS, <u,6,1,7>
+ 1581249023U, // <7,u,6,2>: Cost 2 vext2 <6,2,7,u>, <6,2,7,u>
+ 1638324432U, // <7,u,6,3>: Cost 2 vext3 RHS, <u,6,3,7>
+ 1638468980U, // <7,u,6,4>: Cost 2 vext3 RHS, <4,6,4,6>
+ 2712066274U, // <7,u,6,5>: Cost 3 vext3 RHS, <u,6,5,7>
+ 1583903555U, // <7,u,6,6>: Cost 2 vext2 <6,6,7,u>, <6,6,7,u>
+ 1640315117U, // <7,u,6,7>: Cost 2 vext3 RHS, <u,6,7,0>
+ 1638324477U, // <7,u,6,u>: Cost 2 vext3 RHS, <u,6,u,7>
+ 1638471936U, // <7,u,7,0>: Cost 2 vext3 RHS, <u,7,0,1>
+ 2692970763U, // <7,u,7,1>: Cost 3 vext3 <1,3,u,7>, <u,7,1,3>
+ 2700933399U, // <7,u,7,2>: Cost 3 vext3 <2,6,u,7>, <u,7,2,6>
+ 2573347601U, // <7,u,7,3>: Cost 3 vext1 <3,7,u,7>, <3,7,u,7>
+ 1638471976U, // <7,u,7,4>: Cost 2 vext3 RHS, <u,7,4,5>
+ 1511551171U, // <7,u,7,5>: Cost 2 vext1 <5,7,u,7>, <5,7,u,7>
+ 2712213815U, // <7,u,7,6>: Cost 3 vext3 RHS, <u,7,6,2>
+ 363253046U, // <7,u,7,7>: Cost 1 vdup3 RHS
+ 363253046U, // <7,u,7,u>: Cost 1 vdup3 RHS
+ 1638324561U, // <7,u,u,0>: Cost 2 vext3 RHS, <u,u,0,1>
+ 1638324571U, // <7,u,u,1>: Cost 2 vext3 RHS, <u,u,1,2>
+ 564582757U, // <7,u,u,2>: Cost 1 vext3 RHS, LHS
+ 1638324587U, // <7,u,u,3>: Cost 2 vext3 RHS, <u,u,3,0>
+ 1638324601U, // <7,u,u,4>: Cost 2 vext3 RHS, <u,u,4,5>
+ 1638324611U, // <7,u,u,5>: Cost 2 vext3 RHS, <u,u,5,6>
+ 564582797U, // <7,u,u,6>: Cost 1 vext3 RHS, RHS
+ 363253046U, // <7,u,u,7>: Cost 1 vdup3 RHS
+ 564582811U, // <7,u,u,u>: Cost 1 vext3 RHS, LHS
+ 135053414U, // <u,0,0,0>: Cost 1 vdup0 LHS
+ 1611489290U, // <u,0,0,1>: Cost 2 vext3 LHS, <0,0,1,1>
+ 1611489300U, // <u,0,0,2>: Cost 2 vext3 LHS, <0,0,2,2>
+ 2568054923U, // <u,0,0,3>: Cost 3 vext1 <3,0,0,0>, <3,0,0,0>
+ 1481706806U, // <u,0,0,4>: Cost 2 vext1 <0,u,0,0>, RHS
+ 2555449040U, // <u,0,0,5>: Cost 3 vext1 <0,u,0,0>, <5,1,7,3>
+ 2591282078U, // <u,0,0,6>: Cost 3 vext1 <6,u,0,0>, <6,u,0,0>
+ 2591945711U, // <u,0,0,7>: Cost 3 vext1 <7,0,0,0>, <7,0,0,0>
+ 135053414U, // <u,0,0,u>: Cost 1 vdup0 LHS
+ 1493655654U, // <u,0,1,0>: Cost 2 vext1 <2,u,0,1>, LHS
+ 1860550758U, // <u,0,1,1>: Cost 2 vzipl LHS, LHS
+ 537747563U, // <u,0,1,2>: Cost 1 vext3 LHS, LHS
+ 2625135576U, // <u,0,1,3>: Cost 3 vext2 <1,2,u,0>, <1,3,1,3>
+ 1493658934U, // <u,0,1,4>: Cost 2 vext1 <2,u,0,1>, RHS
+ 2625135760U, // <u,0,1,5>: Cost 3 vext2 <1,2,u,0>, <1,5,3,7>
+ 1517548447U, // <u,0,1,6>: Cost 2 vext1 <6,u,0,1>, <6,u,0,1>
+ 2591290362U, // <u,0,1,7>: Cost 3 vext1 <6,u,0,1>, <7,0,1,2>
+ 537747612U, // <u,0,1,u>: Cost 1 vext3 LHS, LHS
+ 1611489444U, // <u,0,2,0>: Cost 2 vext3 LHS, <0,2,0,2>
+ 2685231276U, // <u,0,2,1>: Cost 3 vext3 LHS, <0,2,1,1>
+ 1994768486U, // <u,0,2,2>: Cost 2 vtrnl LHS, LHS
+ 2685231294U, // <u,0,2,3>: Cost 3 vext3 LHS, <0,2,3,1>
+ 1611489484U, // <u,0,2,4>: Cost 2 vext3 LHS, <0,2,4,6>
+ 2712068310U, // <u,0,2,5>: Cost 3 vext3 RHS, <0,2,5,7>
+ 2625136570U, // <u,0,2,6>: Cost 3 vext2 <1,2,u,0>, <2,6,3,7>
+ 2591962097U, // <u,0,2,7>: Cost 3 vext1 <7,0,0,2>, <7,0,0,2>
+ 1611489516U, // <u,0,2,u>: Cost 2 vext3 LHS, <0,2,u,2>
+ 2954067968U, // <u,0,3,0>: Cost 3 vzipr LHS, <0,0,0,0>
+ 2685231356U, // <u,0,3,1>: Cost 3 vext3 LHS, <0,3,1,0>
+ 72589981U, // <u,0,3,2>: Cost 1 vrev LHS
+ 2625137052U, // <u,0,3,3>: Cost 3 vext2 <1,2,u,0>, <3,3,3,3>
+ 2625137154U, // <u,0,3,4>: Cost 3 vext2 <1,2,u,0>, <3,4,5,6>
+ 2639071848U, // <u,0,3,5>: Cost 3 vext2 <3,5,u,0>, <3,5,u,0>
+ 2639735481U, // <u,0,3,6>: Cost 3 vext2 <3,6,u,0>, <3,6,u,0>
+ 2597279354U, // <u,0,3,7>: Cost 3 vext1 <7,u,0,3>, <7,u,0,3>
+ 73032403U, // <u,0,3,u>: Cost 1 vrev LHS
+ 2687074636U, // <u,0,4,0>: Cost 3 vext3 <0,4,0,u>, <0,4,0,u>
+ 1611489618U, // <u,0,4,1>: Cost 2 vext3 LHS, <0,4,1,5>
+ 1611489628U, // <u,0,4,2>: Cost 2 vext3 LHS, <0,4,2,6>
+ 3629222038U, // <u,0,4,3>: Cost 4 vext1 <0,u,0,4>, <3,0,1,2>
+ 2555481398U, // <u,0,4,4>: Cost 3 vext1 <0,u,0,4>, RHS
+ 1551396150U, // <u,0,4,5>: Cost 2 vext2 <1,2,u,0>, RHS
+ 2651680116U, // <u,0,4,6>: Cost 3 vext2 <5,6,u,0>, <4,6,4,6>
+ 2646150600U, // <u,0,4,7>: Cost 3 vext2 <4,7,5,0>, <4,7,5,0>
+ 1611932050U, // <u,0,4,u>: Cost 2 vext3 LHS, <0,4,u,6>
+ 2561458278U, // <u,0,5,0>: Cost 3 vext1 <1,u,0,5>, LHS
+ 1863532646U, // <u,0,5,1>: Cost 2 vzipl RHS, LHS
+ 2712068526U, // <u,0,5,2>: Cost 3 vext3 RHS, <0,5,2,7>
+ 2649689976U, // <u,0,5,3>: Cost 3 vext2 <5,3,u,0>, <5,3,u,0>
+ 2220237489U, // <u,0,5,4>: Cost 3 vrev <0,u,4,5>
+ 2651680772U, // <u,0,5,5>: Cost 3 vext2 <5,6,u,0>, <5,5,5,5>
+ 1577939051U, // <u,0,5,6>: Cost 2 vext2 <5,6,u,0>, <5,6,u,0>
+ 2830077238U, // <u,0,5,7>: Cost 3 vuzpr <1,u,3,0>, RHS
+ 1579266317U, // <u,0,5,u>: Cost 2 vext2 <5,u,u,0>, <5,u,u,0>
+ 2555494502U, // <u,0,6,0>: Cost 3 vext1 <0,u,0,6>, LHS
+ 2712068598U, // <u,0,6,1>: Cost 3 vext3 RHS, <0,6,1,7>
+ 1997750374U, // <u,0,6,2>: Cost 2 vtrnl RHS, LHS
+ 2655662673U, // <u,0,6,3>: Cost 3 vext2 <6,3,u,0>, <6,3,u,0>
+ 2555497782U, // <u,0,6,4>: Cost 3 vext1 <0,u,0,6>, RHS
+ 2651681459U, // <u,0,6,5>: Cost 3 vext2 <5,6,u,0>, <6,5,0,u>
+ 2651681592U, // <u,0,6,6>: Cost 3 vext2 <5,6,u,0>, <6,6,6,6>
+ 2651681614U, // <u,0,6,7>: Cost 3 vext2 <5,6,u,0>, <6,7,0,1>
+ 1997750428U, // <u,0,6,u>: Cost 2 vtrnl RHS, LHS
+ 2567446630U, // <u,0,7,0>: Cost 3 vext1 <2,u,0,7>, LHS
+ 2567447446U, // <u,0,7,1>: Cost 3 vext1 <2,u,0,7>, <1,2,3,0>
+ 2567448641U, // <u,0,7,2>: Cost 3 vext1 <2,u,0,7>, <2,u,0,7>
+ 2573421338U, // <u,0,7,3>: Cost 3 vext1 <3,u,0,7>, <3,u,0,7>
+ 2567449910U, // <u,0,7,4>: Cost 3 vext1 <2,u,0,7>, RHS
+ 2651682242U, // <u,0,7,5>: Cost 3 vext2 <5,6,u,0>, <7,5,6,u>
+ 2591339429U, // <u,0,7,6>: Cost 3 vext1 <6,u,0,7>, <6,u,0,7>
+ 2651682412U, // <u,0,7,7>: Cost 3 vext2 <5,6,u,0>, <7,7,7,7>
+ 2567452462U, // <u,0,7,u>: Cost 3 vext1 <2,u,0,7>, LHS
+ 135053414U, // <u,0,u,0>: Cost 1 vdup0 LHS
+ 1611489938U, // <u,0,u,1>: Cost 2 vext3 LHS, <0,u,1,1>
+ 537748125U, // <u,0,u,2>: Cost 1 vext3 LHS, LHS
+ 2685674148U, // <u,0,u,3>: Cost 3 vext3 LHS, <0,u,3,1>
+ 1611932338U, // <u,0,u,4>: Cost 2 vext3 LHS, <0,u,4,6>
+ 1551399066U, // <u,0,u,5>: Cost 2 vext2 <1,2,u,0>, RHS
+ 1517605798U, // <u,0,u,6>: Cost 2 vext1 <6,u,0,u>, <6,u,0,u>
+ 2830077481U, // <u,0,u,7>: Cost 3 vuzpr <1,u,3,0>, RHS
+ 537748179U, // <u,0,u,u>: Cost 1 vext3 LHS, LHS
+ 1544101961U, // <u,1,0,0>: Cost 2 vext2 <0,0,u,1>, <0,0,u,1>
+ 1558036582U, // <u,1,0,1>: Cost 2 vext2 <2,3,u,1>, LHS
+ 2619171051U, // <u,1,0,2>: Cost 3 vext2 <0,2,u,1>, <0,2,u,1>
+ 1611490038U, // <u,1,0,3>: Cost 2 vext3 LHS, <1,0,3,2>
+ 2555522358U, // <u,1,0,4>: Cost 3 vext1 <0,u,1,0>, RHS
+ 2712068871U, // <u,1,0,5>: Cost 3 vext3 RHS, <1,0,5,1>
+ 2591355815U, // <u,1,0,6>: Cost 3 vext1 <6,u,1,0>, <6,u,1,0>
+ 2597328512U, // <u,1,0,7>: Cost 3 vext1 <7,u,1,0>, <7,u,1,0>
+ 1611490083U, // <u,1,0,u>: Cost 2 vext3 LHS, <1,0,u,2>
+ 1481785446U, // <u,1,1,0>: Cost 2 vext1 <0,u,1,1>, LHS
+ 202162278U, // <u,1,1,1>: Cost 1 vdup1 LHS
+ 2555528808U, // <u,1,1,2>: Cost 3 vext1 <0,u,1,1>, <2,2,2,2>
+ 1611490120U, // <u,1,1,3>: Cost 2 vext3 LHS, <1,1,3,3>
+ 1481788726U, // <u,1,1,4>: Cost 2 vext1 <0,u,1,1>, RHS
+ 2689876828U, // <u,1,1,5>: Cost 3 vext3 LHS, <1,1,5,5>
+ 2591364008U, // <u,1,1,6>: Cost 3 vext1 <6,u,1,1>, <6,u,1,1>
+ 2592691274U, // <u,1,1,7>: Cost 3 vext1 <7,1,1,1>, <7,1,1,1>
+ 202162278U, // <u,1,1,u>: Cost 1 vdup1 LHS
+ 1499709542U, // <u,1,2,0>: Cost 2 vext1 <3,u,1,2>, LHS
+ 2689876871U, // <u,1,2,1>: Cost 3 vext3 LHS, <1,2,1,3>
+ 2631116445U, // <u,1,2,2>: Cost 3 vext2 <2,2,u,1>, <2,2,u,1>
+ 835584U, // <u,1,2,3>: Cost 0 copy LHS
+ 1499712822U, // <u,1,2,4>: Cost 2 vext1 <3,u,1,2>, RHS
+ 2689876907U, // <u,1,2,5>: Cost 3 vext3 LHS, <1,2,5,3>
+ 2631780282U, // <u,1,2,6>: Cost 3 vext2 <2,3,u,1>, <2,6,3,7>
+ 1523603074U, // <u,1,2,7>: Cost 2 vext1 <7,u,1,2>, <7,u,1,2>
+ 835584U, // <u,1,2,u>: Cost 0 copy LHS
+ 1487773798U, // <u,1,3,0>: Cost 2 vext1 <1,u,1,3>, LHS
+ 1611490264U, // <u,1,3,1>: Cost 2 vext3 LHS, <1,3,1,3>
+ 2685232094U, // <u,1,3,2>: Cost 3 vext3 LHS, <1,3,2,0>
+ 2018746470U, // <u,1,3,3>: Cost 2 vtrnr LHS, LHS
+ 1487777078U, // <u,1,3,4>: Cost 2 vext1 <1,u,1,3>, RHS
+ 1611490304U, // <u,1,3,5>: Cost 2 vext3 LHS, <1,3,5,7>
+ 2685674505U, // <u,1,3,6>: Cost 3 vext3 LHS, <1,3,6,7>
+ 2640407307U, // <u,1,3,7>: Cost 3 vext2 <3,7,u,1>, <3,7,u,1>
+ 1611490327U, // <u,1,3,u>: Cost 2 vext3 LHS, <1,3,u,3>
+ 1567992749U, // <u,1,4,0>: Cost 2 vext2 <4,0,u,1>, <4,0,u,1>
+ 2693121070U, // <u,1,4,1>: Cost 3 vext3 <1,4,1,u>, <1,4,1,u>
+ 2693194807U, // <u,1,4,2>: Cost 3 vext3 <1,4,2,u>, <1,4,2,u>
+ 1152386432U, // <u,1,4,3>: Cost 2 vrev <1,u,3,4>
+ 2555555126U, // <u,1,4,4>: Cost 3 vext1 <0,u,1,4>, RHS
+ 1558039862U, // <u,1,4,5>: Cost 2 vext2 <2,3,u,1>, RHS
+ 2645716371U, // <u,1,4,6>: Cost 3 vext2 <4,6,u,1>, <4,6,u,1>
+ 2597361284U, // <u,1,4,7>: Cost 3 vext1 <7,u,1,4>, <7,u,1,4>
+ 1152755117U, // <u,1,4,u>: Cost 2 vrev <1,u,u,4>
+ 1481818214U, // <u,1,5,0>: Cost 2 vext1 <0,u,1,5>, LHS
+ 2555560694U, // <u,1,5,1>: Cost 3 vext1 <0,u,1,5>, <1,0,3,2>
+ 2555561576U, // <u,1,5,2>: Cost 3 vext1 <0,u,1,5>, <2,2,2,2>
+ 1611490448U, // <u,1,5,3>: Cost 2 vext3 LHS, <1,5,3,7>
+ 1481821494U, // <u,1,5,4>: Cost 2 vext1 <0,u,1,5>, RHS
+ 2651025435U, // <u,1,5,5>: Cost 3 vext2 <5,5,u,1>, <5,5,u,1>
+ 2651689068U, // <u,1,5,6>: Cost 3 vext2 <5,6,u,1>, <5,6,u,1>
+ 2823966006U, // <u,1,5,7>: Cost 3 vuzpr <0,u,1,1>, RHS
+ 1611932861U, // <u,1,5,u>: Cost 2 vext3 LHS, <1,5,u,7>
+ 2555568230U, // <u,1,6,0>: Cost 3 vext1 <0,u,1,6>, LHS
+ 2689877199U, // <u,1,6,1>: Cost 3 vext3 LHS, <1,6,1,7>
+ 2712069336U, // <u,1,6,2>: Cost 3 vext3 RHS, <1,6,2,7>
+ 2685232353U, // <u,1,6,3>: Cost 3 vext3 LHS, <1,6,3,7>
+ 2555571510U, // <u,1,6,4>: Cost 3 vext1 <0,u,1,6>, RHS
+ 2689877235U, // <u,1,6,5>: Cost 3 vext3 LHS, <1,6,5,7>
+ 2657661765U, // <u,1,6,6>: Cost 3 vext2 <6,6,u,1>, <6,6,u,1>
+ 1584583574U, // <u,1,6,7>: Cost 2 vext2 <6,7,u,1>, <6,7,u,1>
+ 1585247207U, // <u,1,6,u>: Cost 2 vext2 <6,u,u,1>, <6,u,u,1>
+ 2561548390U, // <u,1,7,0>: Cost 3 vext1 <1,u,1,7>, LHS
+ 2561549681U, // <u,1,7,1>: Cost 3 vext1 <1,u,1,7>, <1,u,1,7>
+ 2573493926U, // <u,1,7,2>: Cost 3 vext1 <3,u,1,7>, <2,3,0,1>
+ 2042962022U, // <u,1,7,3>: Cost 2 vtrnr RHS, LHS
+ 2561551670U, // <u,1,7,4>: Cost 3 vext1 <1,u,1,7>, RHS
+ 2226300309U, // <u,1,7,5>: Cost 3 vrev <1,u,5,7>
+ 2658325990U, // <u,1,7,6>: Cost 3 vext2 <6,7,u,1>, <7,6,1,u>
+ 2658326124U, // <u,1,7,7>: Cost 3 vext2 <6,7,u,1>, <7,7,7,7>
+ 2042962027U, // <u,1,7,u>: Cost 2 vtrnr RHS, LHS
+ 1481842790U, // <u,1,u,0>: Cost 2 vext1 <0,u,1,u>, LHS
+ 202162278U, // <u,1,u,1>: Cost 1 vdup1 LHS
+ 2685674867U, // <u,1,u,2>: Cost 3 vext3 LHS, <1,u,2,0>
+ 835584U, // <u,1,u,3>: Cost 0 copy LHS
+ 1481846070U, // <u,1,u,4>: Cost 2 vext1 <0,u,1,u>, RHS
+ 1611933077U, // <u,1,u,5>: Cost 2 vext3 LHS, <1,u,5,7>
+ 2685674910U, // <u,1,u,6>: Cost 3 vext3 LHS, <1,u,6,7>
+ 1523652232U, // <u,1,u,7>: Cost 2 vext1 <7,u,1,u>, <7,u,1,u>
+ 835584U, // <u,1,u,u>: Cost 0 copy LHS
+ 1544110154U, // <u,2,0,0>: Cost 2 vext2 <0,0,u,2>, <0,0,u,2>
+ 1545437286U, // <u,2,0,1>: Cost 2 vext2 <0,2,u,2>, LHS
+ 1545437420U, // <u,2,0,2>: Cost 2 vext2 <0,2,u,2>, <0,2,u,2>
+ 2685232589U, // <u,2,0,3>: Cost 3 vext3 LHS, <2,0,3,0>
+ 2619179346U, // <u,2,0,4>: Cost 3 vext2 <0,2,u,2>, <0,4,1,5>
+ 2712069606U, // <u,2,0,5>: Cost 3 vext3 RHS, <2,0,5,7>
+ 2689877484U, // <u,2,0,6>: Cost 3 vext3 LHS, <2,0,6,4>
+ 2659656273U, // <u,2,0,7>: Cost 3 vext2 <7,0,u,2>, <0,7,2,u>
+ 1545437853U, // <u,2,0,u>: Cost 2 vext2 <0,2,u,2>, LHS
+ 1550082851U, // <u,2,1,0>: Cost 2 vext2 <1,0,u,2>, <1,0,u,2>
+ 2619179828U, // <u,2,1,1>: Cost 3 vext2 <0,2,u,2>, <1,1,1,1>
+ 2619179926U, // <u,2,1,2>: Cost 3 vext2 <0,2,u,2>, <1,2,3,0>
+ 2685232671U, // <u,2,1,3>: Cost 3 vext3 LHS, <2,1,3,1>
+ 2555604278U, // <u,2,1,4>: Cost 3 vext1 <0,u,2,1>, RHS
+ 2619180176U, // <u,2,1,5>: Cost 3 vext2 <0,2,u,2>, <1,5,3,7>
+ 2689877564U, // <u,2,1,6>: Cost 3 vext3 LHS, <2,1,6,3>
+ 2602718850U, // <u,2,1,7>: Cost 3 vext1 <u,7,2,1>, <7,u,1,2>
+ 1158703235U, // <u,2,1,u>: Cost 2 vrev <2,u,u,1>
+ 1481867366U, // <u,2,2,0>: Cost 2 vext1 <0,u,2,2>, LHS
+ 2555609846U, // <u,2,2,1>: Cost 3 vext1 <0,u,2,2>, <1,0,3,2>
+ 269271142U, // <u,2,2,2>: Cost 1 vdup2 LHS
+ 1611490930U, // <u,2,2,3>: Cost 2 vext3 LHS, <2,2,3,3>
+ 1481870646U, // <u,2,2,4>: Cost 2 vext1 <0,u,2,2>, RHS
+ 2689877640U, // <u,2,2,5>: Cost 3 vext3 LHS, <2,2,5,7>
+ 2619180986U, // <u,2,2,6>: Cost 3 vext2 <0,2,u,2>, <2,6,3,7>
+ 2593436837U, // <u,2,2,7>: Cost 3 vext1 <7,2,2,2>, <7,2,2,2>
+ 269271142U, // <u,2,2,u>: Cost 1 vdup2 LHS
+ 408134301U, // <u,2,3,0>: Cost 1 vext1 LHS, LHS
+ 1481876214U, // <u,2,3,1>: Cost 2 vext1 LHS, <1,0,3,2>
+ 1481877096U, // <u,2,3,2>: Cost 2 vext1 LHS, <2,2,2,2>
+ 1880326246U, // <u,2,3,3>: Cost 2 vzipr LHS, LHS
+ 408137014U, // <u,2,3,4>: Cost 1 vext1 LHS, RHS
+ 1529654992U, // <u,2,3,5>: Cost 2 vext1 LHS, <5,1,7,3>
+ 1529655802U, // <u,2,3,6>: Cost 2 vext1 LHS, <6,2,7,3>
+ 1529656314U, // <u,2,3,7>: Cost 2 vext1 LHS, <7,0,1,2>
+ 408139566U, // <u,2,3,u>: Cost 1 vext1 LHS, LHS
+ 1567853468U, // <u,2,4,0>: Cost 2 vext2 <4,0,6,2>, <4,0,6,2>
+ 2561598362U, // <u,2,4,1>: Cost 3 vext1 <1,u,2,4>, <1,2,3,4>
+ 2555627214U, // <u,2,4,2>: Cost 3 vext1 <0,u,2,4>, <2,3,4,5>
+ 2685232918U, // <u,2,4,3>: Cost 3 vext3 LHS, <2,4,3,5>
+ 2555628854U, // <u,2,4,4>: Cost 3 vext1 <0,u,2,4>, RHS
+ 1545440566U, // <u,2,4,5>: Cost 2 vext2 <0,2,u,2>, RHS
+ 1571982740U, // <u,2,4,6>: Cost 2 vext2 <4,6,u,2>, <4,6,u,2>
+ 2592125957U, // <u,2,4,7>: Cost 3 vext1 <7,0,2,4>, <7,0,2,4>
+ 1545440809U, // <u,2,4,u>: Cost 2 vext2 <0,2,u,2>, RHS
+ 2555633766U, // <u,2,5,0>: Cost 3 vext1 <0,u,2,5>, LHS
+ 2561606550U, // <u,2,5,1>: Cost 3 vext1 <1,u,2,5>, <1,2,3,0>
+ 2689877856U, // <u,2,5,2>: Cost 3 vext3 LHS, <2,5,2,7>
+ 2685233000U, // <u,2,5,3>: Cost 3 vext3 LHS, <2,5,3,6>
+ 1158441059U, // <u,2,5,4>: Cost 2 vrev <2,u,4,5>
+ 2645725188U, // <u,2,5,5>: Cost 3 vext2 <4,6,u,2>, <5,5,5,5>
+ 2689877892U, // <u,2,5,6>: Cost 3 vext3 LHS, <2,5,6,7>
+ 2823900470U, // <u,2,5,7>: Cost 3 vuzpr <0,u,0,2>, RHS
+ 1158736007U, // <u,2,5,u>: Cost 2 vrev <2,u,u,5>
+ 1481900134U, // <u,2,6,0>: Cost 2 vext1 <0,u,2,6>, LHS
+ 2555642614U, // <u,2,6,1>: Cost 3 vext1 <0,u,2,6>, <1,0,3,2>
+ 2555643496U, // <u,2,6,2>: Cost 3 vext1 <0,u,2,6>, <2,2,2,2>
+ 1611491258U, // <u,2,6,3>: Cost 2 vext3 LHS, <2,6,3,7>
+ 1481903414U, // <u,2,6,4>: Cost 2 vext1 <0,u,2,6>, RHS
+ 2689877964U, // <u,2,6,5>: Cost 3 vext3 LHS, <2,6,5,7>
+ 2689877973U, // <u,2,6,6>: Cost 3 vext3 LHS, <2,6,6,7>
+ 2645726030U, // <u,2,6,7>: Cost 3 vext2 <4,6,u,2>, <6,7,0,1>
+ 1611933671U, // <u,2,6,u>: Cost 2 vext3 LHS, <2,6,u,7>
+ 1585919033U, // <u,2,7,0>: Cost 2 vext2 <7,0,u,2>, <7,0,u,2>
+ 2573566710U, // <u,2,7,1>: Cost 3 vext1 <3,u,2,7>, <1,0,3,2>
+ 2567596115U, // <u,2,7,2>: Cost 3 vext1 <2,u,2,7>, <2,u,2,7>
+ 1906901094U, // <u,2,7,3>: Cost 2 vzipr RHS, LHS
+ 2555653430U, // <u,2,7,4>: Cost 3 vext1 <0,u,2,7>, RHS
+ 2800080230U, // <u,2,7,5>: Cost 3 vuzpl LHS, <7,4,5,6>
+ 2980643164U, // <u,2,7,6>: Cost 3 vzipr RHS, <0,4,2,6>
+ 2645726828U, // <u,2,7,7>: Cost 3 vext2 <4,6,u,2>, <7,7,7,7>
+ 1906901099U, // <u,2,7,u>: Cost 2 vzipr RHS, LHS
+ 408175266U, // <u,2,u,0>: Cost 1 vext1 LHS, LHS
+ 1545443118U, // <u,2,u,1>: Cost 2 vext2 <0,2,u,2>, LHS
+ 269271142U, // <u,2,u,2>: Cost 1 vdup2 LHS
+ 1611491416U, // <u,2,u,3>: Cost 2 vext3 LHS, <2,u,3,3>
+ 408177974U, // <u,2,u,4>: Cost 1 vext1 LHS, RHS
+ 1545443482U, // <u,2,u,5>: Cost 2 vext2 <0,2,u,2>, RHS
+ 1726339226U, // <u,2,u,6>: Cost 2 vuzpl LHS, RHS
+ 1529697274U, // <u,2,u,7>: Cost 2 vext1 LHS, <7,0,1,2>
+ 408180526U, // <u,2,u,u>: Cost 1 vext1 LHS, LHS
+ 1544781824U, // <u,3,0,0>: Cost 2 vext2 LHS, <0,0,0,0>
+ 471040156U, // <u,3,0,1>: Cost 1 vext2 LHS, LHS
+ 1544781988U, // <u,3,0,2>: Cost 2 vext2 LHS, <0,2,0,2>
+ 2618523900U, // <u,3,0,3>: Cost 3 vext2 LHS, <0,3,1,0>
+ 1544782162U, // <u,3,0,4>: Cost 2 vext2 LHS, <0,4,1,5>
+ 2238188352U, // <u,3,0,5>: Cost 3 vrev <3,u,5,0>
+ 2623169023U, // <u,3,0,6>: Cost 3 vext2 LHS, <0,6,2,7>
+ 2238335826U, // <u,3,0,7>: Cost 3 vrev <3,u,7,0>
+ 471040669U, // <u,3,0,u>: Cost 1 vext2 LHS, LHS
+ 1544782582U, // <u,3,1,0>: Cost 2 vext2 LHS, <1,0,3,2>
+ 1544782644U, // <u,3,1,1>: Cost 2 vext2 LHS, <1,1,1,1>
+ 1544782742U, // <u,3,1,2>: Cost 2 vext2 LHS, <1,2,3,0>
+ 1544782808U, // <u,3,1,3>: Cost 2 vext2 LHS, <1,3,1,3>
+ 2618524733U, // <u,3,1,4>: Cost 3 vext2 LHS, <1,4,3,5>
+ 1544782992U, // <u,3,1,5>: Cost 2 vext2 LHS, <1,5,3,7>
+ 2618524897U, // <u,3,1,6>: Cost 3 vext2 LHS, <1,6,3,7>
+ 2703517987U, // <u,3,1,7>: Cost 3 vext3 <3,1,7,u>, <3,1,7,u>
+ 1544783213U, // <u,3,1,u>: Cost 2 vext2 LHS, <1,u,1,3>
+ 1529716838U, // <u,3,2,0>: Cost 2 vext1 <u,u,3,2>, LHS
+ 1164167966U, // <u,3,2,1>: Cost 2 vrev <3,u,1,2>
+ 1544783464U, // <u,3,2,2>: Cost 2 vext2 LHS, <2,2,2,2>
+ 1544783526U, // <u,3,2,3>: Cost 2 vext2 LHS, <2,3,0,1>
+ 1529720118U, // <u,3,2,4>: Cost 2 vext1 <u,u,3,2>, RHS
+ 2618525544U, // <u,3,2,5>: Cost 3 vext2 LHS, <2,5,3,6>
+ 1544783802U, // <u,3,2,6>: Cost 2 vext2 LHS, <2,6,3,7>
+ 2704181620U, // <u,3,2,7>: Cost 3 vext3 <3,2,7,u>, <3,2,7,u>
+ 1544783931U, // <u,3,2,u>: Cost 2 vext2 LHS, <2,u,0,1>
+ 1544784022U, // <u,3,3,0>: Cost 2 vext2 LHS, <3,0,1,2>
+ 1487922559U, // <u,3,3,1>: Cost 2 vext1 <1,u,3,3>, <1,u,3,3>
+ 1493895256U, // <u,3,3,2>: Cost 2 vext1 <2,u,3,3>, <2,u,3,3>
+ 336380006U, // <u,3,3,3>: Cost 1 vdup3 LHS
+ 1544784386U, // <u,3,3,4>: Cost 2 vext2 LHS, <3,4,5,6>
+ 2824054478U, // <u,3,3,5>: Cost 3 vuzpr LHS, <2,3,4,5>
+ 2238286668U, // <u,3,3,6>: Cost 3 vrev <3,u,6,3>
+ 2954069136U, // <u,3,3,7>: Cost 3 vzipr LHS, <1,5,3,7>
+ 336380006U, // <u,3,3,u>: Cost 1 vdup3 LHS
+ 1487929446U, // <u,3,4,0>: Cost 2 vext1 <1,u,3,4>, LHS
+ 1487930752U, // <u,3,4,1>: Cost 2 vext1 <1,u,3,4>, <1,u,3,4>
+ 2623171644U, // <u,3,4,2>: Cost 3 vext2 LHS, <4,2,6,0>
+ 2561673366U, // <u,3,4,3>: Cost 3 vext1 <1,u,3,4>, <3,0,1,2>
+ 1487932726U, // <u,3,4,4>: Cost 2 vext1 <1,u,3,4>, RHS
+ 471043382U, // <u,3,4,5>: Cost 1 vext2 LHS, RHS
+ 1592561012U, // <u,3,4,6>: Cost 2 vext2 LHS, <4,6,4,6>
+ 2238368598U, // <u,3,4,7>: Cost 3 vrev <3,u,7,4>
+ 471043625U, // <u,3,4,u>: Cost 1 vext2 LHS, RHS
+ 2555707494U, // <u,3,5,0>: Cost 3 vext1 <0,u,3,5>, LHS
+ 1574645465U, // <u,3,5,1>: Cost 2 vext2 <5,1,u,3>, <5,1,u,3>
+ 2567653106U, // <u,3,5,2>: Cost 3 vext1 <2,u,3,5>, <2,3,u,5>
+ 2555709954U, // <u,3,5,3>: Cost 3 vext1 <0,u,3,5>, <3,4,5,6>
+ 1592561606U, // <u,3,5,4>: Cost 2 vext2 LHS, <5,4,7,6>
+ 1592561668U, // <u,3,5,5>: Cost 2 vext2 LHS, <5,5,5,5>
+ 1592561762U, // <u,3,5,6>: Cost 2 vext2 LHS, <5,6,7,0>
+ 1750314294U, // <u,3,5,7>: Cost 2 vuzpr LHS, RHS
+ 1750314295U, // <u,3,5,u>: Cost 2 vuzpr LHS, RHS
+ 2623172897U, // <u,3,6,0>: Cost 3 vext2 LHS, <6,0,1,2>
+ 2561688962U, // <u,3,6,1>: Cost 3 vext1 <1,u,3,6>, <1,u,3,6>
+ 1581281795U, // <u,3,6,2>: Cost 2 vext2 <6,2,u,3>, <6,2,u,3>
+ 2706541204U, // <u,3,6,3>: Cost 3 vext3 <3,6,3,u>, <3,6,3,u>
+ 2623173261U, // <u,3,6,4>: Cost 3 vext2 LHS, <6,4,5,6>
+ 1164495686U, // <u,3,6,5>: Cost 2 vrev <3,u,5,6>
+ 1592562488U, // <u,3,6,6>: Cost 2 vext2 LHS, <6,6,6,6>
+ 1592562510U, // <u,3,6,7>: Cost 2 vext2 LHS, <6,7,0,1>
+ 1164716897U, // <u,3,6,u>: Cost 2 vrev <3,u,u,6>
+ 1487954022U, // <u,3,7,0>: Cost 2 vext1 <1,u,3,7>, LHS
+ 1487955331U, // <u,3,7,1>: Cost 2 vext1 <1,u,3,7>, <1,u,3,7>
+ 1493928028U, // <u,3,7,2>: Cost 2 vext1 <2,u,3,7>, <2,u,3,7>
+ 2561697942U, // <u,3,7,3>: Cost 3 vext1 <1,u,3,7>, <3,0,1,2>
+ 1487957302U, // <u,3,7,4>: Cost 2 vext1 <1,u,3,7>, RHS
+ 2707352311U, // <u,3,7,5>: Cost 3 vext3 <3,7,5,u>, <3,7,5,u>
+ 2655024623U, // <u,3,7,6>: Cost 3 vext2 <6,2,u,3>, <7,6,2,u>
+ 1592563308U, // <u,3,7,7>: Cost 2 vext2 LHS, <7,7,7,7>
+ 1487959854U, // <u,3,7,u>: Cost 2 vext1 <1,u,3,7>, LHS
+ 1544787667U, // <u,3,u,0>: Cost 2 vext2 LHS, <u,0,1,2>
+ 471045934U, // <u,3,u,1>: Cost 1 vext2 LHS, LHS
+ 1549432709U, // <u,3,u,2>: Cost 2 vext2 LHS, <u,2,3,0>
+ 336380006U, // <u,3,u,3>: Cost 1 vdup3 LHS
+ 1544788031U, // <u,3,u,4>: Cost 2 vext2 LHS, <u,4,5,6>
+ 471046298U, // <u,3,u,5>: Cost 1 vext2 LHS, RHS
+ 1549433040U, // <u,3,u,6>: Cost 2 vext2 LHS, <u,6,3,7>
+ 1750314537U, // <u,3,u,7>: Cost 2 vuzpr LHS, RHS
+ 471046501U, // <u,3,u,u>: Cost 1 vext2 LHS, LHS
+ 2625167360U, // <u,4,0,0>: Cost 3 vext2 <1,2,u,4>, <0,0,0,0>
+ 1551425638U, // <u,4,0,1>: Cost 2 vext2 <1,2,u,4>, LHS
+ 2619195630U, // <u,4,0,2>: Cost 3 vext2 <0,2,u,4>, <0,2,u,4>
+ 2619343104U, // <u,4,0,3>: Cost 3 vext2 <0,3,1,4>, <0,3,1,4>
+ 2625167698U, // <u,4,0,4>: Cost 3 vext2 <1,2,u,4>, <0,4,1,5>
+ 1638329234U, // <u,4,0,5>: Cost 2 vext3 RHS, <4,0,5,1>
+ 1638329244U, // <u,4,0,6>: Cost 2 vext3 RHS, <4,0,6,2>
+ 3787803556U, // <u,4,0,7>: Cost 4 vext3 RHS, <4,0,7,1>
+ 1551426205U, // <u,4,0,u>: Cost 2 vext2 <1,2,u,4>, LHS
+ 2555748454U, // <u,4,1,0>: Cost 3 vext1 <0,u,4,1>, LHS
+ 2625168180U, // <u,4,1,1>: Cost 3 vext2 <1,2,u,4>, <1,1,1,1>
+ 1551426503U, // <u,4,1,2>: Cost 2 vext2 <1,2,u,4>, <1,2,u,4>
+ 2625168344U, // <u,4,1,3>: Cost 3 vext2 <1,2,u,4>, <1,3,1,3>
+ 2555751734U, // <u,4,1,4>: Cost 3 vext1 <0,u,4,1>, RHS
+ 1860554038U, // <u,4,1,5>: Cost 2 vzipl LHS, RHS
+ 2689879022U, // <u,4,1,6>: Cost 3 vext3 LHS, <4,1,6,3>
+ 2592248852U, // <u,4,1,7>: Cost 3 vext1 <7,0,4,1>, <7,0,4,1>
+ 1555408301U, // <u,4,1,u>: Cost 2 vext2 <1,u,u,4>, <1,u,u,4>
+ 2555756646U, // <u,4,2,0>: Cost 3 vext1 <0,u,4,2>, LHS
+ 2625168943U, // <u,4,2,1>: Cost 3 vext2 <1,2,u,4>, <2,1,4,u>
+ 2625169000U, // <u,4,2,2>: Cost 3 vext2 <1,2,u,4>, <2,2,2,2>
+ 2619197134U, // <u,4,2,3>: Cost 3 vext2 <0,2,u,4>, <2,3,4,5>
+ 2555759926U, // <u,4,2,4>: Cost 3 vext1 <0,u,4,2>, RHS
+ 2712071222U, // <u,4,2,5>: Cost 3 vext3 RHS, <4,2,5,3>
+ 1994771766U, // <u,4,2,6>: Cost 2 vtrnl LHS, RHS
+ 2592257045U, // <u,4,2,7>: Cost 3 vext1 <7,0,4,2>, <7,0,4,2>
+ 1994771784U, // <u,4,2,u>: Cost 2 vtrnl LHS, RHS
+ 2625169558U, // <u,4,3,0>: Cost 3 vext2 <1,2,u,4>, <3,0,1,2>
+ 2567709594U, // <u,4,3,1>: Cost 3 vext1 <2,u,4,3>, <1,2,3,4>
+ 2567710817U, // <u,4,3,2>: Cost 3 vext1 <2,u,4,3>, <2,u,4,3>
+ 2625169820U, // <u,4,3,3>: Cost 3 vext2 <1,2,u,4>, <3,3,3,3>
+ 2625169922U, // <u,4,3,4>: Cost 3 vext2 <1,2,u,4>, <3,4,5,6>
+ 2954069710U, // <u,4,3,5>: Cost 3 vzipr LHS, <2,3,4,5>
+ 2954068172U, // <u,4,3,6>: Cost 3 vzipr LHS, <0,2,4,6>
+ 3903849472U, // <u,4,3,7>: Cost 4 vuzpr <1,u,3,4>, <1,3,5,7>
+ 2954068174U, // <u,4,3,u>: Cost 3 vzipr LHS, <0,2,4,u>
+ 1505919078U, // <u,4,4,0>: Cost 2 vext1 <4,u,4,4>, LHS
+ 2567717831U, // <u,4,4,1>: Cost 3 vext1 <2,u,4,4>, <1,2,u,4>
+ 2567719010U, // <u,4,4,2>: Cost 3 vext1 <2,u,4,4>, <2,u,4,4>
+ 2570373542U, // <u,4,4,3>: Cost 3 vext1 <3,3,4,4>, <3,3,4,4>
+ 161926454U, // <u,4,4,4>: Cost 1 vdup0 RHS
+ 1551428918U, // <u,4,4,5>: Cost 2 vext2 <1,2,u,4>, RHS
+ 1638329572U, // <u,4,4,6>: Cost 2 vext3 RHS, <4,4,6,6>
+ 2594927963U, // <u,4,4,7>: Cost 3 vext1 <7,4,4,4>, <7,4,4,4>
+ 161926454U, // <u,4,4,u>: Cost 1 vdup0 RHS
+ 1493983334U, // <u,4,5,0>: Cost 2 vext1 <2,u,4,5>, LHS
+ 2689879301U, // <u,4,5,1>: Cost 3 vext3 LHS, <4,5,1,3>
+ 1493985379U, // <u,4,5,2>: Cost 2 vext1 <2,u,4,5>, <2,u,4,5>
+ 2567727254U, // <u,4,5,3>: Cost 3 vext1 <2,u,4,5>, <3,0,1,2>
+ 1493986614U, // <u,4,5,4>: Cost 2 vext1 <2,u,4,5>, RHS
+ 1863535926U, // <u,4,5,5>: Cost 2 vzipl RHS, RHS
+ 537750838U, // <u,4,5,6>: Cost 1 vext3 LHS, RHS
+ 2830110006U, // <u,4,5,7>: Cost 3 vuzpr <1,u,3,4>, RHS
+ 537750856U, // <u,4,5,u>: Cost 1 vext3 LHS, RHS
+ 1482047590U, // <u,4,6,0>: Cost 2 vext1 <0,u,4,6>, LHS
+ 2555790070U, // <u,4,6,1>: Cost 3 vext1 <0,u,4,6>, <1,0,3,2>
+ 2555790952U, // <u,4,6,2>: Cost 3 vext1 <0,u,4,6>, <2,2,2,2>
+ 2555791510U, // <u,4,6,3>: Cost 3 vext1 <0,u,4,6>, <3,0,1,2>
+ 1482050870U, // <u,4,6,4>: Cost 2 vext1 <0,u,4,6>, RHS
+ 2689879422U, // <u,4,6,5>: Cost 3 vext3 LHS, <4,6,5,7>
+ 1997753654U, // <u,4,6,6>: Cost 2 vtrnl RHS, RHS
+ 2712071562U, // <u,4,6,7>: Cost 3 vext3 RHS, <4,6,7,1>
+ 1482053422U, // <u,4,6,u>: Cost 2 vext1 <0,u,4,6>, LHS
+ 2567741542U, // <u,4,7,0>: Cost 3 vext1 <2,u,4,7>, LHS
+ 2567742362U, // <u,4,7,1>: Cost 3 vext1 <2,u,4,7>, <1,2,3,4>
+ 2567743589U, // <u,4,7,2>: Cost 3 vext1 <2,u,4,7>, <2,u,4,7>
+ 2573716286U, // <u,4,7,3>: Cost 3 vext1 <3,u,4,7>, <3,u,4,7>
+ 2567744822U, // <u,4,7,4>: Cost 3 vext1 <2,u,4,7>, RHS
+ 2712071624U, // <u,4,7,5>: Cost 3 vext3 RHS, <4,7,5,0>
+ 96808489U, // <u,4,7,6>: Cost 1 vrev RHS
+ 2651715180U, // <u,4,7,7>: Cost 3 vext2 <5,6,u,4>, <7,7,7,7>
+ 96955963U, // <u,4,7,u>: Cost 1 vrev RHS
+ 1482063974U, // <u,4,u,0>: Cost 2 vext1 <0,u,4,u>, LHS
+ 1551431470U, // <u,4,u,1>: Cost 2 vext2 <1,2,u,4>, LHS
+ 1494009958U, // <u,4,u,2>: Cost 2 vext1 <2,u,4,u>, <2,u,4,u>
+ 2555807894U, // <u,4,u,3>: Cost 3 vext1 <0,u,4,u>, <3,0,1,2>
+ 161926454U, // <u,4,u,4>: Cost 1 vdup0 RHS
+ 1551431834U, // <u,4,u,5>: Cost 2 vext2 <1,2,u,4>, RHS
+ 537751081U, // <u,4,u,6>: Cost 1 vext3 LHS, RHS
+ 2830110249U, // <u,4,u,7>: Cost 3 vuzpr <1,u,3,4>, RHS
+ 537751099U, // <u,4,u,u>: Cost 1 vext3 LHS, RHS
+ 2631811072U, // <u,5,0,0>: Cost 3 vext2 <2,3,u,5>, <0,0,0,0>
+ 1558069350U, // <u,5,0,1>: Cost 2 vext2 <2,3,u,5>, LHS
+ 2619203823U, // <u,5,0,2>: Cost 3 vext2 <0,2,u,5>, <0,2,u,5>
+ 2619867456U, // <u,5,0,3>: Cost 3 vext2 <0,3,u,5>, <0,3,u,5>
+ 1546273106U, // <u,5,0,4>: Cost 2 vext2 <0,4,1,5>, <0,4,1,5>
+ 2733010539U, // <u,5,0,5>: Cost 3 vext3 LHS, <5,0,5,1>
+ 2597622682U, // <u,5,0,6>: Cost 3 vext1 <7,u,5,0>, <6,7,u,5>
+ 1176539396U, // <u,5,0,7>: Cost 2 vrev <5,u,7,0>
+ 1558069917U, // <u,5,0,u>: Cost 2 vext2 <2,3,u,5>, LHS
+ 1505968230U, // <u,5,1,0>: Cost 2 vext1 <4,u,5,1>, LHS
+ 2624512887U, // <u,5,1,1>: Cost 3 vext2 <1,1,u,5>, <1,1,u,5>
+ 2631811990U, // <u,5,1,2>: Cost 3 vext2 <2,3,u,5>, <1,2,3,0>
+ 2618541056U, // <u,5,1,3>: Cost 3 vext2 <0,1,u,5>, <1,3,5,7>
+ 1505971510U, // <u,5,1,4>: Cost 2 vext1 <4,u,5,1>, RHS
+ 2627167419U, // <u,5,1,5>: Cost 3 vext2 <1,5,u,5>, <1,5,u,5>
+ 2579714554U, // <u,5,1,6>: Cost 3 vext1 <4,u,5,1>, <6,2,7,3>
+ 1638330064U, // <u,5,1,7>: Cost 2 vext3 RHS, <5,1,7,3>
+ 1638477529U, // <u,5,1,u>: Cost 2 vext3 RHS, <5,1,u,3>
+ 2561802342U, // <u,5,2,0>: Cost 3 vext1 <1,u,5,2>, LHS
+ 2561803264U, // <u,5,2,1>: Cost 3 vext1 <1,u,5,2>, <1,3,5,7>
+ 2631149217U, // <u,5,2,2>: Cost 3 vext2 <2,2,u,5>, <2,2,u,5>
+ 1558071026U, // <u,5,2,3>: Cost 2 vext2 <2,3,u,5>, <2,3,u,5>
+ 2561805622U, // <u,5,2,4>: Cost 3 vext1 <1,u,5,2>, RHS
+ 2714062607U, // <u,5,2,5>: Cost 3 vext3 RHS, <5,2,5,3>
+ 2631813050U, // <u,5,2,6>: Cost 3 vext2 <2,3,u,5>, <2,6,3,7>
+ 3092335926U, // <u,5,2,7>: Cost 3 vtrnr <0,u,0,2>, RHS
+ 1561389191U, // <u,5,2,u>: Cost 2 vext2 <2,u,u,5>, <2,u,u,5>
+ 2561810534U, // <u,5,3,0>: Cost 3 vext1 <1,u,5,3>, LHS
+ 2561811857U, // <u,5,3,1>: Cost 3 vext1 <1,u,5,3>, <1,u,5,3>
+ 2631813474U, // <u,5,3,2>: Cost 3 vext2 <2,3,u,5>, <3,2,5,u>
+ 2631813532U, // <u,5,3,3>: Cost 3 vext2 <2,3,u,5>, <3,3,3,3>
+ 2619869698U, // <u,5,3,4>: Cost 3 vext2 <0,3,u,5>, <3,4,5,6>
+ 3001847002U, // <u,5,3,5>: Cost 3 vzipr LHS, <4,4,5,5>
+ 2954070530U, // <u,5,3,6>: Cost 3 vzipr LHS, <3,4,5,6>
+ 2018749750U, // <u,5,3,7>: Cost 2 vtrnr LHS, RHS
+ 2018749751U, // <u,5,3,u>: Cost 2 vtrnr LHS, RHS
+ 2573762662U, // <u,5,4,0>: Cost 3 vext1 <3,u,5,4>, LHS
+ 2620017634U, // <u,5,4,1>: Cost 3 vext2 <0,4,1,5>, <4,1,5,0>
+ 2573764338U, // <u,5,4,2>: Cost 3 vext1 <3,u,5,4>, <2,3,u,5>
+ 2573765444U, // <u,5,4,3>: Cost 3 vext1 <3,u,5,4>, <3,u,5,4>
+ 1570680053U, // <u,5,4,4>: Cost 2 vext2 <4,4,u,5>, <4,4,u,5>
+ 1558072630U, // <u,5,4,5>: Cost 2 vext2 <2,3,u,5>, RHS
+ 2645749143U, // <u,5,4,6>: Cost 3 vext2 <4,6,u,5>, <4,6,u,5>
+ 1638330310U, // <u,5,4,7>: Cost 2 vext3 RHS, <5,4,7,6>
+ 1558072873U, // <u,5,4,u>: Cost 2 vext2 <2,3,u,5>, RHS
+ 1506000998U, // <u,5,5,0>: Cost 2 vext1 <4,u,5,5>, LHS
+ 2561827984U, // <u,5,5,1>: Cost 3 vext1 <1,u,5,5>, <1,5,3,7>
+ 2579744360U, // <u,5,5,2>: Cost 3 vext1 <4,u,5,5>, <2,2,2,2>
+ 2579744918U, // <u,5,5,3>: Cost 3 vext1 <4,u,5,5>, <3,0,1,2>
+ 1506004278U, // <u,5,5,4>: Cost 2 vext1 <4,u,5,5>, RHS
+ 229035318U, // <u,5,5,5>: Cost 1 vdup1 RHS
+ 2712072206U, // <u,5,5,6>: Cost 3 vext3 RHS, <5,5,6,6>
+ 1638330392U, // <u,5,5,7>: Cost 2 vext3 RHS, <5,5,7,7>
+ 229035318U, // <u,5,5,u>: Cost 1 vdup1 RHS
+ 1500037222U, // <u,5,6,0>: Cost 2 vext1 <3,u,5,6>, LHS
+ 2561836436U, // <u,5,6,1>: Cost 3 vext1 <1,u,5,6>, <1,u,5,6>
+ 2567809133U, // <u,5,6,2>: Cost 3 vext1 <2,u,5,6>, <2,u,5,6>
+ 1500040006U, // <u,5,6,3>: Cost 2 vext1 <3,u,5,6>, <3,u,5,6>
+ 1500040502U, // <u,5,6,4>: Cost 2 vext1 <3,u,5,6>, RHS
+ 2714062935U, // <u,5,6,5>: Cost 3 vext3 RHS, <5,6,5,7>
+ 2712072288U, // <u,5,6,6>: Cost 3 vext3 RHS, <5,6,6,7>
+ 27705344U, // <u,5,6,7>: Cost 0 copy RHS
+ 27705344U, // <u,5,6,u>: Cost 0 copy RHS
+ 1488101478U, // <u,5,7,0>: Cost 2 vext1 <1,u,5,7>, LHS
+ 1488102805U, // <u,5,7,1>: Cost 2 vext1 <1,u,5,7>, <1,u,5,7>
+ 2561844840U, // <u,5,7,2>: Cost 3 vext1 <1,u,5,7>, <2,2,2,2>
+ 2561845398U, // <u,5,7,3>: Cost 3 vext1 <1,u,5,7>, <3,0,1,2>
+ 1488104758U, // <u,5,7,4>: Cost 2 vext1 <1,u,5,7>, RHS
+ 1638330536U, // <u,5,7,5>: Cost 2 vext3 RHS, <5,7,5,7>
+ 2712072362U, // <u,5,7,6>: Cost 3 vext3 RHS, <5,7,6,0>
+ 2042965302U, // <u,5,7,7>: Cost 2 vtrnr RHS, RHS
+ 1488107310U, // <u,5,7,u>: Cost 2 vext1 <1,u,5,7>, LHS
+ 1488109670U, // <u,5,u,0>: Cost 2 vext1 <1,u,5,u>, LHS
+ 1488110998U, // <u,5,u,1>: Cost 2 vext1 <1,u,5,u>, <1,u,5,u>
+ 2561853032U, // <u,5,u,2>: Cost 3 vext1 <1,u,5,u>, <2,2,2,2>
+ 1500056392U, // <u,5,u,3>: Cost 2 vext1 <3,u,5,u>, <3,u,5,u>
+ 1488112950U, // <u,5,u,4>: Cost 2 vext1 <1,u,5,u>, RHS
+ 229035318U, // <u,5,u,5>: Cost 1 vdup1 RHS
+ 2954111490U, // <u,5,u,6>: Cost 3 vzipr LHS, <3,4,5,6>
+ 27705344U, // <u,5,u,7>: Cost 0 copy RHS
+ 27705344U, // <u,5,u,u>: Cost 0 copy RHS
+ 2619211776U, // <u,6,0,0>: Cost 3 vext2 <0,2,u,6>, <0,0,0,0>
+ 1545470054U, // <u,6,0,1>: Cost 2 vext2 <0,2,u,6>, LHS
+ 1545470192U, // <u,6,0,2>: Cost 2 vext2 <0,2,u,6>, <0,2,u,6>
+ 2255958969U, // <u,6,0,3>: Cost 3 vrev <6,u,3,0>
+ 1546797458U, // <u,6,0,4>: Cost 2 vext2 <0,4,u,6>, <0,4,u,6>
+ 2720624971U, // <u,6,0,5>: Cost 3 vext3 <6,0,5,u>, <6,0,5,u>
+ 2256180180U, // <u,6,0,6>: Cost 3 vrev <6,u,6,0>
+ 2960682294U, // <u,6,0,7>: Cost 3 vzipr <1,2,u,0>, RHS
+ 1545470621U, // <u,6,0,u>: Cost 2 vext2 <0,2,u,6>, LHS
+ 1182004127U, // <u,6,1,0>: Cost 2 vrev <6,u,0,1>
+ 2619212596U, // <u,6,1,1>: Cost 3 vext2 <0,2,u,6>, <1,1,1,1>
+ 2619212694U, // <u,6,1,2>: Cost 3 vext2 <0,2,u,6>, <1,2,3,0>
+ 2619212760U, // <u,6,1,3>: Cost 3 vext2 <0,2,u,6>, <1,3,1,3>
+ 2626511979U, // <u,6,1,4>: Cost 3 vext2 <1,4,u,6>, <1,4,u,6>
+ 2619212944U, // <u,6,1,5>: Cost 3 vext2 <0,2,u,6>, <1,5,3,7>
+ 2714063264U, // <u,6,1,6>: Cost 3 vext3 RHS, <6,1,6,3>
+ 2967326006U, // <u,6,1,7>: Cost 3 vzipr <2,3,u,1>, RHS
+ 1182594023U, // <u,6,1,u>: Cost 2 vrev <6,u,u,1>
+ 1506050150U, // <u,6,2,0>: Cost 2 vext1 <4,u,6,2>, LHS
+ 2579792630U, // <u,6,2,1>: Cost 3 vext1 <4,u,6,2>, <1,0,3,2>
+ 2619213416U, // <u,6,2,2>: Cost 3 vext2 <0,2,u,6>, <2,2,2,2>
+ 2619213478U, // <u,6,2,3>: Cost 3 vext2 <0,2,u,6>, <2,3,0,1>
+ 1506053430U, // <u,6,2,4>: Cost 2 vext1 <4,u,6,2>, RHS
+ 2633148309U, // <u,6,2,5>: Cost 3 vext2 <2,5,u,6>, <2,5,u,6>
+ 2619213754U, // <u,6,2,6>: Cost 3 vext2 <0,2,u,6>, <2,6,3,7>
+ 1638330874U, // <u,6,2,7>: Cost 2 vext3 RHS, <6,2,7,3>
+ 1638478339U, // <u,6,2,u>: Cost 2 vext3 RHS, <6,2,u,3>
+ 2619213974U, // <u,6,3,0>: Cost 3 vext2 <0,2,u,6>, <3,0,1,2>
+ 2255836074U, // <u,6,3,1>: Cost 3 vrev <6,u,1,3>
+ 2255909811U, // <u,6,3,2>: Cost 3 vrev <6,u,2,3>
+ 2619214236U, // <u,6,3,3>: Cost 3 vext2 <0,2,u,6>, <3,3,3,3>
+ 1564715549U, // <u,6,3,4>: Cost 2 vext2 <3,4,u,6>, <3,4,u,6>
+ 2639121006U, // <u,6,3,5>: Cost 3 vext2 <3,5,u,6>, <3,5,u,6>
+ 3001847012U, // <u,6,3,6>: Cost 3 vzipr LHS, <4,4,6,6>
+ 1880329526U, // <u,6,3,7>: Cost 2 vzipr LHS, RHS
+ 1880329527U, // <u,6,3,u>: Cost 2 vzipr LHS, RHS
+ 2567864422U, // <u,6,4,0>: Cost 3 vext1 <2,u,6,4>, LHS
+ 2733011558U, // <u,6,4,1>: Cost 3 vext3 LHS, <6,4,1,3>
+ 2567866484U, // <u,6,4,2>: Cost 3 vext1 <2,u,6,4>, <2,u,6,4>
+ 2638458005U, // <u,6,4,3>: Cost 3 vext2 <3,4,u,6>, <4,3,6,u>
+ 1570540772U, // <u,6,4,4>: Cost 2 vext2 <4,4,6,6>, <4,4,6,6>
+ 1545473334U, // <u,6,4,5>: Cost 2 vext2 <0,2,u,6>, RHS
+ 1572015512U, // <u,6,4,6>: Cost 2 vext2 <4,6,u,6>, <4,6,u,6>
+ 2960715062U, // <u,6,4,7>: Cost 3 vzipr <1,2,u,4>, RHS
+ 1545473577U, // <u,6,4,u>: Cost 2 vext2 <0,2,u,6>, RHS
+ 2567872614U, // <u,6,5,0>: Cost 3 vext1 <2,u,6,5>, LHS
+ 2645757648U, // <u,6,5,1>: Cost 3 vext2 <4,6,u,6>, <5,1,7,3>
+ 2567874490U, // <u,6,5,2>: Cost 3 vext1 <2,u,6,5>, <2,6,3,7>
+ 2576501250U, // <u,6,5,3>: Cost 3 vext1 <4,3,6,5>, <3,4,5,6>
+ 1576660943U, // <u,6,5,4>: Cost 2 vext2 <5,4,u,6>, <5,4,u,6>
+ 2645757956U, // <u,6,5,5>: Cost 3 vext2 <4,6,u,6>, <5,5,5,5>
+ 2645758050U, // <u,6,5,6>: Cost 3 vext2 <4,6,u,6>, <5,6,7,0>
+ 2824080694U, // <u,6,5,7>: Cost 3 vuzpr <0,u,2,6>, RHS
+ 1182626795U, // <u,6,5,u>: Cost 2 vrev <6,u,u,5>
+ 1506082918U, // <u,6,6,0>: Cost 2 vext1 <4,u,6,6>, LHS
+ 2579825398U, // <u,6,6,1>: Cost 3 vext1 <4,u,6,6>, <1,0,3,2>
+ 2645758458U, // <u,6,6,2>: Cost 3 vext2 <4,6,u,6>, <6,2,7,3>
+ 2579826838U, // <u,6,6,3>: Cost 3 vext1 <4,u,6,6>, <3,0,1,2>
+ 1506086198U, // <u,6,6,4>: Cost 2 vext1 <4,u,6,6>, RHS
+ 2579828432U, // <u,6,6,5>: Cost 3 vext1 <4,u,6,6>, <5,1,7,3>
+ 296144182U, // <u,6,6,6>: Cost 1 vdup2 RHS
+ 1638331202U, // <u,6,6,7>: Cost 2 vext3 RHS, <6,6,7,7>
+ 296144182U, // <u,6,6,u>: Cost 1 vdup2 RHS
+ 432349286U, // <u,6,7,0>: Cost 1 vext1 RHS, LHS
+ 1506091766U, // <u,6,7,1>: Cost 2 vext1 RHS, <1,0,3,2>
+ 1506092648U, // <u,6,7,2>: Cost 2 vext1 RHS, <2,2,2,2>
+ 1506093206U, // <u,6,7,3>: Cost 2 vext1 RHS, <3,0,1,2>
+ 432352809U, // <u,6,7,4>: Cost 1 vext1 RHS, RHS
+ 1506094800U, // <u,6,7,5>: Cost 2 vext1 RHS, <5,1,7,3>
+ 1506095610U, // <u,6,7,6>: Cost 2 vext1 RHS, <6,2,7,3>
+ 1906904374U, // <u,6,7,7>: Cost 2 vzipr RHS, RHS
+ 432355118U, // <u,6,7,u>: Cost 1 vext1 RHS, LHS
+ 432357478U, // <u,6,u,0>: Cost 1 vext1 RHS, LHS
+ 1545475886U, // <u,6,u,1>: Cost 2 vext2 <0,2,u,6>, LHS
+ 1506100840U, // <u,6,u,2>: Cost 2 vext1 RHS, <2,2,2,2>
+ 1506101398U, // <u,6,u,3>: Cost 2 vext1 RHS, <3,0,1,2>
+ 432361002U, // <u,6,u,4>: Cost 1 vext1 RHS, RHS
+ 1545476250U, // <u,6,u,5>: Cost 2 vext2 <0,2,u,6>, RHS
+ 296144182U, // <u,6,u,6>: Cost 1 vdup2 RHS
+ 1880370486U, // <u,6,u,7>: Cost 2 vzipr LHS, RHS
+ 432363310U, // <u,6,u,u>: Cost 1 vext1 RHS, LHS
+ 1571356672U, // <u,7,0,0>: Cost 2 vext2 RHS, <0,0,0,0>
+ 497614950U, // <u,7,0,1>: Cost 1 vext2 RHS, LHS
+ 1571356836U, // <u,7,0,2>: Cost 2 vext2 RHS, <0,2,0,2>
+ 2573880146U, // <u,7,0,3>: Cost 3 vext1 <3,u,7,0>, <3,u,7,0>
+ 1571357010U, // <u,7,0,4>: Cost 2 vext2 RHS, <0,4,1,5>
+ 1512083716U, // <u,7,0,5>: Cost 2 vext1 <5,u,7,0>, <5,u,7,0>
+ 2621874741U, // <u,7,0,6>: Cost 3 vext2 <0,6,u,7>, <0,6,u,7>
+ 2585826298U, // <u,7,0,7>: Cost 3 vext1 <5,u,7,0>, <7,0,1,2>
+ 497615517U, // <u,7,0,u>: Cost 1 vext2 RHS, LHS
+ 1571357430U, // <u,7,1,0>: Cost 2 vext2 RHS, <1,0,3,2>
+ 1571357492U, // <u,7,1,1>: Cost 2 vext2 RHS, <1,1,1,1>
+ 1571357590U, // <u,7,1,2>: Cost 2 vext2 RHS, <1,2,3,0>
+ 1552114715U, // <u,7,1,3>: Cost 2 vext2 <1,3,u,7>, <1,3,u,7>
+ 2573888822U, // <u,7,1,4>: Cost 3 vext1 <3,u,7,1>, RHS
+ 1553441981U, // <u,7,1,5>: Cost 2 vext2 <1,5,u,7>, <1,5,u,7>
+ 2627847438U, // <u,7,1,6>: Cost 3 vext2 <1,6,u,7>, <1,6,u,7>
+ 2727408775U, // <u,7,1,7>: Cost 3 vext3 <7,1,7,u>, <7,1,7,u>
+ 1555432880U, // <u,7,1,u>: Cost 2 vext2 <1,u,u,7>, <1,u,u,7>
+ 2629838337U, // <u,7,2,0>: Cost 3 vext2 <2,0,u,7>, <2,0,u,7>
+ 1188058754U, // <u,7,2,1>: Cost 2 vrev <7,u,1,2>
+ 1571358312U, // <u,7,2,2>: Cost 2 vext2 RHS, <2,2,2,2>
+ 1571358374U, // <u,7,2,3>: Cost 2 vext2 RHS, <2,3,0,1>
+ 2632492869U, // <u,7,2,4>: Cost 3 vext2 <2,4,u,7>, <2,4,u,7>
+ 2633156502U, // <u,7,2,5>: Cost 3 vext2 <2,5,u,7>, <2,5,u,7>
+ 1560078311U, // <u,7,2,6>: Cost 2 vext2 <2,6,u,7>, <2,6,u,7>
+ 2728072408U, // <u,7,2,7>: Cost 3 vext3 <7,2,7,u>, <7,2,7,u>
+ 1561405577U, // <u,7,2,u>: Cost 2 vext2 <2,u,u,7>, <2,u,u,7>
+ 1571358870U, // <u,7,3,0>: Cost 2 vext2 RHS, <3,0,1,2>
+ 2627184913U, // <u,7,3,1>: Cost 3 vext2 <1,5,u,7>, <3,1,5,u>
+ 2633820523U, // <u,7,3,2>: Cost 3 vext2 <2,6,u,7>, <3,2,6,u>
+ 1571359132U, // <u,7,3,3>: Cost 2 vext2 RHS, <3,3,3,3>
+ 1571359234U, // <u,7,3,4>: Cost 2 vext2 RHS, <3,4,5,6>
+ 1512108295U, // <u,7,3,5>: Cost 2 vext1 <5,u,7,3>, <5,u,7,3>
+ 1518080992U, // <u,7,3,6>: Cost 2 vext1 <6,u,7,3>, <6,u,7,3>
+ 2640456465U, // <u,7,3,7>: Cost 3 vext2 <3,7,u,7>, <3,7,u,7>
+ 1571359518U, // <u,7,3,u>: Cost 2 vext2 RHS, <3,u,1,2>
+ 1571359634U, // <u,7,4,0>: Cost 2 vext2 RHS, <4,0,5,1>
+ 2573911067U, // <u,7,4,1>: Cost 3 vext1 <3,u,7,4>, <1,3,u,7>
+ 2645101622U, // <u,7,4,2>: Cost 3 vext2 RHS, <4,2,5,3>
+ 2573912918U, // <u,7,4,3>: Cost 3 vext1 <3,u,7,4>, <3,u,7,4>
+ 1571359952U, // <u,7,4,4>: Cost 2 vext2 RHS, <4,4,4,4>
+ 497618248U, // <u,7,4,5>: Cost 1 vext2 RHS, RHS
+ 1571360116U, // <u,7,4,6>: Cost 2 vext2 RHS, <4,6,4,6>
+ 2645102024U, // <u,7,4,7>: Cost 3 vext2 RHS, <4,7,5,0>
+ 497618473U, // <u,7,4,u>: Cost 1 vext2 RHS, RHS
+ 2645102152U, // <u,7,5,0>: Cost 3 vext2 RHS, <5,0,1,2>
+ 1571360464U, // <u,7,5,1>: Cost 2 vext2 RHS, <5,1,7,3>
+ 2645102334U, // <u,7,5,2>: Cost 3 vext2 RHS, <5,2,3,4>
+ 2645102447U, // <u,7,5,3>: Cost 3 vext2 RHS, <5,3,7,0>
+ 1571360710U, // <u,7,5,4>: Cost 2 vext2 RHS, <5,4,7,6>
+ 1571360772U, // <u,7,5,5>: Cost 2 vext2 RHS, <5,5,5,5>
+ 1571360866U, // <u,7,5,6>: Cost 2 vext2 RHS, <5,6,7,0>
+ 1571360936U, // <u,7,5,7>: Cost 2 vext2 RHS, <5,7,5,7>
+ 1571361017U, // <u,7,5,u>: Cost 2 vext2 RHS, <5,u,5,7>
+ 1530044518U, // <u,7,6,0>: Cost 2 vext1 <u,u,7,6>, LHS
+ 2645103016U, // <u,7,6,1>: Cost 3 vext2 RHS, <6,1,7,2>
+ 1571361274U, // <u,7,6,2>: Cost 2 vext2 RHS, <6,2,7,3>
+ 2645103154U, // <u,7,6,3>: Cost 3 vext2 RHS, <6,3,4,5>
+ 1530047798U, // <u,7,6,4>: Cost 2 vext1 <u,u,7,6>, RHS
+ 1188386474U, // <u,7,6,5>: Cost 2 vrev <7,u,5,6>
+ 1571361592U, // <u,7,6,6>: Cost 2 vext2 RHS, <6,6,6,6>
+ 1571361614U, // <u,7,6,7>: Cost 2 vext2 RHS, <6,7,0,1>
+ 1571361695U, // <u,7,6,u>: Cost 2 vext2 RHS, <6,u,0,1>
+ 1571361786U, // <u,7,7,0>: Cost 2 vext2 RHS, <7,0,1,2>
+ 2573935616U, // <u,7,7,1>: Cost 3 vext1 <3,u,7,7>, <1,3,5,7>
+ 2645103781U, // <u,7,7,2>: Cost 3 vext2 RHS, <7,2,2,2>
+ 2573937497U, // <u,7,7,3>: Cost 3 vext1 <3,u,7,7>, <3,u,7,7>
+ 1571362150U, // <u,7,7,4>: Cost 2 vext2 RHS, <7,4,5,6>
+ 1512141067U, // <u,7,7,5>: Cost 2 vext1 <5,u,7,7>, <5,u,7,7>
+ 1518113764U, // <u,7,7,6>: Cost 2 vext1 <6,u,7,7>, <6,u,7,7>
+ 363253046U, // <u,7,7,7>: Cost 1 vdup3 RHS
+ 363253046U, // <u,7,7,u>: Cost 1 vdup3 RHS
+ 1571362515U, // <u,7,u,0>: Cost 2 vext2 RHS, <u,0,1,2>
+ 497620782U, // <u,7,u,1>: Cost 1 vext2 RHS, LHS
+ 1571362693U, // <u,7,u,2>: Cost 2 vext2 RHS, <u,2,3,0>
+ 1571362748U, // <u,7,u,3>: Cost 2 vext2 RHS, <u,3,0,1>
+ 1571362879U, // <u,7,u,4>: Cost 2 vext2 RHS, <u,4,5,6>
+ 497621146U, // <u,7,u,5>: Cost 1 vext2 RHS, RHS
+ 1571363024U, // <u,7,u,6>: Cost 2 vext2 RHS, <u,6,3,7>
+ 363253046U, // <u,7,u,7>: Cost 1 vdup3 RHS
+ 497621349U, // <u,7,u,u>: Cost 1 vext2 RHS, LHS
+ 135053414U, // <u,u,0,0>: Cost 1 vdup0 LHS
+ 471081121U, // <u,u,0,1>: Cost 1 vext2 LHS, LHS
+ 1544822948U, // <u,u,0,2>: Cost 2 vext2 LHS, <0,2,0,2>
+ 1616140005U, // <u,u,0,3>: Cost 2 vext3 LHS, <u,0,3,2>
+ 1544823122U, // <u,u,0,4>: Cost 2 vext2 LHS, <0,4,1,5>
+ 1512157453U, // <u,u,0,5>: Cost 2 vext1 <5,u,u,0>, <5,u,u,0>
+ 1662220032U, // <u,u,0,6>: Cost 2 vext3 RHS, <u,0,6,2>
+ 1194457487U, // <u,u,0,7>: Cost 2 vrev <u,u,7,0>
+ 471081629U, // <u,u,0,u>: Cost 1 vext2 LHS, LHS
+ 1544823542U, // <u,u,1,0>: Cost 2 vext2 LHS, <1,0,3,2>
+ 202162278U, // <u,u,1,1>: Cost 1 vdup1 LHS
+ 537753390U, // <u,u,1,2>: Cost 1 vext3 LHS, LHS
+ 1544823768U, // <u,u,1,3>: Cost 2 vext2 LHS, <1,3,1,3>
+ 1494248758U, // <u,u,1,4>: Cost 2 vext1 <2,u,u,1>, RHS
+ 1544823952U, // <u,u,1,5>: Cost 2 vext2 LHS, <1,5,3,7>
+ 1518138343U, // <u,u,1,6>: Cost 2 vext1 <6,u,u,1>, <6,u,u,1>
+ 1640322907U, // <u,u,1,7>: Cost 2 vext3 RHS, <u,1,7,3>
+ 537753444U, // <u,u,1,u>: Cost 1 vext3 LHS, LHS
+ 1482309734U, // <u,u,2,0>: Cost 2 vext1 <0,u,u,2>, LHS
+ 1194031451U, // <u,u,2,1>: Cost 2 vrev <u,u,1,2>
+ 269271142U, // <u,u,2,2>: Cost 1 vdup2 LHS
+ 835584U, // <u,u,2,3>: Cost 0 copy LHS
+ 1482313014U, // <u,u,2,4>: Cost 2 vext1 <0,u,u,2>, RHS
+ 2618566504U, // <u,u,2,5>: Cost 3 vext2 LHS, <2,5,3,6>
+ 1544824762U, // <u,u,2,6>: Cost 2 vext2 LHS, <2,6,3,7>
+ 1638479788U, // <u,u,2,7>: Cost 2 vext3 RHS, <u,2,7,3>
+ 835584U, // <u,u,2,u>: Cost 0 copy LHS
+ 408576723U, // <u,u,3,0>: Cost 1 vext1 LHS, LHS
+ 1482318582U, // <u,u,3,1>: Cost 2 vext1 LHS, <1,0,3,2>
+ 120371557U, // <u,u,3,2>: Cost 1 vrev LHS
+ 336380006U, // <u,u,3,3>: Cost 1 vdup3 LHS
+ 408579382U, // <u,u,3,4>: Cost 1 vext1 LHS, RHS
+ 1616140271U, // <u,u,3,5>: Cost 2 vext3 LHS, <u,3,5,7>
+ 1530098170U, // <u,u,3,6>: Cost 2 vext1 LHS, <6,2,7,3>
+ 1880329544U, // <u,u,3,7>: Cost 2 vzipr LHS, RHS
+ 408581934U, // <u,u,3,u>: Cost 1 vext1 LHS, LHS
+ 1488298086U, // <u,u,4,0>: Cost 2 vext1 <1,u,u,4>, LHS
+ 1488299437U, // <u,u,4,1>: Cost 2 vext1 <1,u,u,4>, <1,u,u,4>
+ 1659271204U, // <u,u,4,2>: Cost 2 vext3 LHS, <u,4,2,6>
+ 1194195311U, // <u,u,4,3>: Cost 2 vrev <u,u,3,4>
+ 161926454U, // <u,u,4,4>: Cost 1 vdup0 RHS
+ 471084342U, // <u,u,4,5>: Cost 1 vext2 LHS, RHS
+ 1571368308U, // <u,u,4,6>: Cost 2 vext2 RHS, <4,6,4,6>
+ 1640323153U, // <u,u,4,7>: Cost 2 vext3 RHS, <u,4,7,6>
+ 471084585U, // <u,u,4,u>: Cost 1 vext2 LHS, RHS
+ 1494278246U, // <u,u,5,0>: Cost 2 vext1 <2,u,u,5>, LHS
+ 1571368656U, // <u,u,5,1>: Cost 2 vext2 RHS, <5,1,7,3>
+ 1494280327U, // <u,u,5,2>: Cost 2 vext1 <2,u,u,5>, <2,u,u,5>
+ 1616140415U, // <u,u,5,3>: Cost 2 vext3 LHS, <u,5,3,7>
+ 1494281526U, // <u,u,5,4>: Cost 2 vext1 <2,u,u,5>, RHS
+ 229035318U, // <u,u,5,5>: Cost 1 vdup1 RHS
+ 537753754U, // <u,u,5,6>: Cost 1 vext3 LHS, RHS
+ 1750355254U, // <u,u,5,7>: Cost 2 vuzpr LHS, RHS
+ 537753772U, // <u,u,5,u>: Cost 1 vext3 LHS, RHS
+ 1482342502U, // <u,u,6,0>: Cost 2 vext1 <0,u,u,6>, LHS
+ 2556084982U, // <u,u,6,1>: Cost 3 vext1 <0,u,u,6>, <1,0,3,2>
+ 1571369466U, // <u,u,6,2>: Cost 2 vext2 RHS, <6,2,7,3>
+ 1611938000U, // <u,u,6,3>: Cost 2 vext3 LHS, <u,6,3,7>
+ 1482345782U, // <u,u,6,4>: Cost 2 vext1 <0,u,u,6>, RHS
+ 1194359171U, // <u,u,6,5>: Cost 2 vrev <u,u,5,6>
+ 296144182U, // <u,u,6,6>: Cost 1 vdup2 RHS
+ 27705344U, // <u,u,6,7>: Cost 0 copy RHS
+ 27705344U, // <u,u,6,u>: Cost 0 copy RHS
+ 432496742U, // <u,u,7,0>: Cost 1 vext1 RHS, LHS
+ 1488324016U, // <u,u,7,1>: Cost 2 vext1 <1,u,u,7>, <1,u,u,7>
+ 1494296713U, // <u,u,7,2>: Cost 2 vext1 <2,u,u,7>, <2,u,u,7>
+ 1906901148U, // <u,u,7,3>: Cost 2 vzipr RHS, LHS
+ 432500283U, // <u,u,7,4>: Cost 1 vext1 RHS, RHS
+ 1506242256U, // <u,u,7,5>: Cost 2 vext1 RHS, <5,1,7,3>
+ 120699277U, // <u,u,7,6>: Cost 1 vrev RHS
+ 363253046U, // <u,u,7,7>: Cost 1 vdup3 RHS
+ 432502574U, // <u,u,7,u>: Cost 1 vext1 RHS, LHS
+ 408617688U, // <u,u,u,0>: Cost 1 vext1 LHS, LHS
+ 471086894U, // <u,u,u,1>: Cost 1 vext2 LHS, LHS
+ 537753957U, // <u,u,u,2>: Cost 1 vext3 LHS, LHS
+ 835584U, // <u,u,u,3>: Cost 0 copy LHS
+ 408620342U, // <u,u,u,4>: Cost 1 vext1 LHS, RHS
+ 471087258U, // <u,u,u,5>: Cost 1 vext2 LHS, RHS
+ 537753997U, // <u,u,u,6>: Cost 1 vext3 LHS, RHS
+ 27705344U, // <u,u,u,7>: Cost 0 copy RHS
+ 835584U, // <u,u,u,u>: Cost 0 copy LHS
+ 0
+};
+
+#endif
diff --git a/contrib/llvm/lib/Target/ARM/ARMRegisterInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMRegisterInfo.cpp
new file mode 100644
index 0000000..e6e8cdf
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMRegisterInfo.cpp
@@ -0,0 +1,19 @@
+//===-- ARMRegisterInfo.cpp - ARM Register Information --------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the ARM implementation of the TargetRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARMRegisterInfo.h"
+using namespace llvm;
+
+void ARMRegisterInfo::anchor() { }
+
+ARMRegisterInfo::ARMRegisterInfo() : ARMBaseRegisterInfo() {}
diff --git a/contrib/llvm/lib/Target/ARM/ARMRegisterInfo.h b/contrib/llvm/lib/Target/ARM/ARMRegisterInfo.h
new file mode 100644
index 0000000..e2e650e
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMRegisterInfo.h
@@ -0,0 +1,31 @@
+//===-- ARMRegisterInfo.h - ARM Register Information Impl -------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the ARM implementation of the TargetRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_ARM_ARMREGISTERINFO_H
+#define LLVM_LIB_TARGET_ARM_ARMREGISTERINFO_H
+
+#include "ARMBaseRegisterInfo.h"
+
+namespace llvm {
+
+class ARMSubtarget;
+
+struct ARMRegisterInfo : public ARMBaseRegisterInfo {
+ virtual void anchor();
+public:
+ ARMRegisterInfo();
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/ARM/ARMRegisterInfo.td b/contrib/llvm/lib/Target/ARM/ARMRegisterInfo.td
new file mode 100644
index 0000000..02cbfb1
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMRegisterInfo.td
@@ -0,0 +1,430 @@
+//===-- ARMRegisterInfo.td - ARM Register defs -------------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Declarations that describe the ARM register file
+//===----------------------------------------------------------------------===//
+
+// Registers are identified with 4-bit ID numbers.
+class ARMReg<bits<16> Enc, string n, list<Register> subregs = []> : Register<n> {
+ let HWEncoding = Enc;
+ let Namespace = "ARM";
+ let SubRegs = subregs;
+ // All bits of ARM registers with sub-registers are covered by sub-registers.
+ let CoveredBySubRegs = 1;
+}
+
+class ARMFReg<bits<16> Enc, string n> : Register<n> {
+ let HWEncoding = Enc;
+ let Namespace = "ARM";
+}
+
+// Subregister indices.
+let Namespace = "ARM" in {
+def qqsub_0 : SubRegIndex<256>;
+def qqsub_1 : SubRegIndex<256, 256>;
+
+// Note: Code depends on these having consecutive numbers.
+def qsub_0 : SubRegIndex<128>;
+def qsub_1 : SubRegIndex<128, 128>;
+def qsub_2 : ComposedSubRegIndex<qqsub_1, qsub_0>;
+def qsub_3 : ComposedSubRegIndex<qqsub_1, qsub_1>;
+
+def dsub_0 : SubRegIndex<64>;
+def dsub_1 : SubRegIndex<64, 64>;
+def dsub_2 : ComposedSubRegIndex<qsub_1, dsub_0>;
+def dsub_3 : ComposedSubRegIndex<qsub_1, dsub_1>;
+def dsub_4 : ComposedSubRegIndex<qsub_2, dsub_0>;
+def dsub_5 : ComposedSubRegIndex<qsub_2, dsub_1>;
+def dsub_6 : ComposedSubRegIndex<qsub_3, dsub_0>;
+def dsub_7 : ComposedSubRegIndex<qsub_3, dsub_1>;
+
+def ssub_0 : SubRegIndex<32>;
+def ssub_1 : SubRegIndex<32, 32>;
+def ssub_2 : ComposedSubRegIndex<dsub_1, ssub_0>;
+def ssub_3 : ComposedSubRegIndex<dsub_1, ssub_1>;
+
+def gsub_0 : SubRegIndex<32>;
+def gsub_1 : SubRegIndex<32, 32>;
+// Let TableGen synthesize the remaining 12 ssub_* indices.
+// We don't need to name them.
+}
+
+// Integer registers
+def R0 : ARMReg< 0, "r0">, DwarfRegNum<[0]>;
+def R1 : ARMReg< 1, "r1">, DwarfRegNum<[1]>;
+def R2 : ARMReg< 2, "r2">, DwarfRegNum<[2]>;
+def R3 : ARMReg< 3, "r3">, DwarfRegNum<[3]>;
+def R4 : ARMReg< 4, "r4">, DwarfRegNum<[4]>;
+def R5 : ARMReg< 5, "r5">, DwarfRegNum<[5]>;
+def R6 : ARMReg< 6, "r6">, DwarfRegNum<[6]>;
+def R7 : ARMReg< 7, "r7">, DwarfRegNum<[7]>;
+// These require 32-bit instructions.
+let CostPerUse = 1 in {
+def R8 : ARMReg< 8, "r8">, DwarfRegNum<[8]>;
+def R9 : ARMReg< 9, "r9">, DwarfRegNum<[9]>;
+def R10 : ARMReg<10, "r10">, DwarfRegNum<[10]>;
+def R11 : ARMReg<11, "r11">, DwarfRegNum<[11]>;
+def R12 : ARMReg<12, "r12">, DwarfRegNum<[12]>;
+def SP : ARMReg<13, "sp">, DwarfRegNum<[13]>;
+def LR : ARMReg<14, "lr">, DwarfRegNum<[14]>;
+def PC : ARMReg<15, "pc">, DwarfRegNum<[15]>;
+}
+
+// Float registers
+def S0 : ARMFReg< 0, "s0">; def S1 : ARMFReg< 1, "s1">;
+def S2 : ARMFReg< 2, "s2">; def S3 : ARMFReg< 3, "s3">;
+def S4 : ARMFReg< 4, "s4">; def S5 : ARMFReg< 5, "s5">;
+def S6 : ARMFReg< 6, "s6">; def S7 : ARMFReg< 7, "s7">;
+def S8 : ARMFReg< 8, "s8">; def S9 : ARMFReg< 9, "s9">;
+def S10 : ARMFReg<10, "s10">; def S11 : ARMFReg<11, "s11">;
+def S12 : ARMFReg<12, "s12">; def S13 : ARMFReg<13, "s13">;
+def S14 : ARMFReg<14, "s14">; def S15 : ARMFReg<15, "s15">;
+def S16 : ARMFReg<16, "s16">; def S17 : ARMFReg<17, "s17">;
+def S18 : ARMFReg<18, "s18">; def S19 : ARMFReg<19, "s19">;
+def S20 : ARMFReg<20, "s20">; def S21 : ARMFReg<21, "s21">;
+def S22 : ARMFReg<22, "s22">; def S23 : ARMFReg<23, "s23">;
+def S24 : ARMFReg<24, "s24">; def S25 : ARMFReg<25, "s25">;
+def S26 : ARMFReg<26, "s26">; def S27 : ARMFReg<27, "s27">;
+def S28 : ARMFReg<28, "s28">; def S29 : ARMFReg<29, "s29">;
+def S30 : ARMFReg<30, "s30">; def S31 : ARMFReg<31, "s31">;
+
+// Aliases of the F* registers used to hold 64-bit fp values (doubles)
+let SubRegIndices = [ssub_0, ssub_1] in {
+def D0 : ARMReg< 0, "d0", [S0, S1]>, DwarfRegNum<[256]>;
+def D1 : ARMReg< 1, "d1", [S2, S3]>, DwarfRegNum<[257]>;
+def D2 : ARMReg< 2, "d2", [S4, S5]>, DwarfRegNum<[258]>;
+def D3 : ARMReg< 3, "d3", [S6, S7]>, DwarfRegNum<[259]>;
+def D4 : ARMReg< 4, "d4", [S8, S9]>, DwarfRegNum<[260]>;
+def D5 : ARMReg< 5, "d5", [S10, S11]>, DwarfRegNum<[261]>;
+def D6 : ARMReg< 6, "d6", [S12, S13]>, DwarfRegNum<[262]>;
+def D7 : ARMReg< 7, "d7", [S14, S15]>, DwarfRegNum<[263]>;
+def D8 : ARMReg< 8, "d8", [S16, S17]>, DwarfRegNum<[264]>;
+def D9 : ARMReg< 9, "d9", [S18, S19]>, DwarfRegNum<[265]>;
+def D10 : ARMReg<10, "d10", [S20, S21]>, DwarfRegNum<[266]>;
+def D11 : ARMReg<11, "d11", [S22, S23]>, DwarfRegNum<[267]>;
+def D12 : ARMReg<12, "d12", [S24, S25]>, DwarfRegNum<[268]>;
+def D13 : ARMReg<13, "d13", [S26, S27]>, DwarfRegNum<[269]>;
+def D14 : ARMReg<14, "d14", [S28, S29]>, DwarfRegNum<[270]>;
+def D15 : ARMReg<15, "d15", [S30, S31]>, DwarfRegNum<[271]>;
+}
+
+// VFP3 defines 16 additional double registers
+def D16 : ARMFReg<16, "d16">, DwarfRegNum<[272]>;
+def D17 : ARMFReg<17, "d17">, DwarfRegNum<[273]>;
+def D18 : ARMFReg<18, "d18">, DwarfRegNum<[274]>;
+def D19 : ARMFReg<19, "d19">, DwarfRegNum<[275]>;
+def D20 : ARMFReg<20, "d20">, DwarfRegNum<[276]>;
+def D21 : ARMFReg<21, "d21">, DwarfRegNum<[277]>;
+def D22 : ARMFReg<22, "d22">, DwarfRegNum<[278]>;
+def D23 : ARMFReg<23, "d23">, DwarfRegNum<[279]>;
+def D24 : ARMFReg<24, "d24">, DwarfRegNum<[280]>;
+def D25 : ARMFReg<25, "d25">, DwarfRegNum<[281]>;
+def D26 : ARMFReg<26, "d26">, DwarfRegNum<[282]>;
+def D27 : ARMFReg<27, "d27">, DwarfRegNum<[283]>;
+def D28 : ARMFReg<28, "d28">, DwarfRegNum<[284]>;
+def D29 : ARMFReg<29, "d29">, DwarfRegNum<[285]>;
+def D30 : ARMFReg<30, "d30">, DwarfRegNum<[286]>;
+def D31 : ARMFReg<31, "d31">, DwarfRegNum<[287]>;
+
+// Advanced SIMD (NEON) defines 16 quad-word aliases
+let SubRegIndices = [dsub_0, dsub_1] in {
+def Q0 : ARMReg< 0, "q0", [D0, D1]>;
+def Q1 : ARMReg< 1, "q1", [D2, D3]>;
+def Q2 : ARMReg< 2, "q2", [D4, D5]>;
+def Q3 : ARMReg< 3, "q3", [D6, D7]>;
+def Q4 : ARMReg< 4, "q4", [D8, D9]>;
+def Q5 : ARMReg< 5, "q5", [D10, D11]>;
+def Q6 : ARMReg< 6, "q6", [D12, D13]>;
+def Q7 : ARMReg< 7, "q7", [D14, D15]>;
+}
+let SubRegIndices = [dsub_0, dsub_1] in {
+def Q8 : ARMReg< 8, "q8", [D16, D17]>;
+def Q9 : ARMReg< 9, "q9", [D18, D19]>;
+def Q10 : ARMReg<10, "q10", [D20, D21]>;
+def Q11 : ARMReg<11, "q11", [D22, D23]>;
+def Q12 : ARMReg<12, "q12", [D24, D25]>;
+def Q13 : ARMReg<13, "q13", [D26, D27]>;
+def Q14 : ARMReg<14, "q14", [D28, D29]>;
+def Q15 : ARMReg<15, "q15", [D30, D31]>;
+}
+
+// Current Program Status Register.
+// We model fpscr with two registers: FPSCR models the control bits and will be
+// reserved. FPSCR_NZCV models the flag bits and will be unreserved. APSR_NZCV
+// models the APSR when it's accessed by some special instructions. In such cases
+// it has the same encoding as PC.
+def CPSR : ARMReg<0, "cpsr">;
+def APSR : ARMReg<1, "apsr">;
+def APSR_NZCV : ARMReg<15, "apsr_nzcv">;
+def SPSR : ARMReg<2, "spsr">;
+def FPSCR : ARMReg<3, "fpscr">;
+def FPSCR_NZCV : ARMReg<3, "fpscr_nzcv"> {
+ let Aliases = [FPSCR];
+}
+def ITSTATE : ARMReg<4, "itstate">;
+
+// Special Registers - only available in privileged mode.
+def FPSID : ARMReg<0, "fpsid">;
+def MVFR2 : ARMReg<5, "mvfr2">;
+def MVFR1 : ARMReg<6, "mvfr1">;
+def MVFR0 : ARMReg<7, "mvfr0">;
+def FPEXC : ARMReg<8, "fpexc">;
+def FPINST : ARMReg<9, "fpinst">;
+def FPINST2 : ARMReg<10, "fpinst2">;
+
+// Register classes.
+//
+// pc == Program Counter
+// lr == Link Register
+// sp == Stack Pointer
+// r12 == ip (scratch)
+// r7 == Frame Pointer (thumb-style backtraces)
+// r9 == May be reserved as Thread Register
+// r11 == Frame Pointer (arm-style backtraces)
+// r10 == Stack Limit
+//
+def GPR : RegisterClass<"ARM", [i32], 32, (add (sequence "R%u", 0, 12),
+ SP, LR, PC)> {
+ // Allocate LR as the first CSR since it is always saved anyway.
+ // For Thumb1 mode, we don't want to allocate hi regs at all, as we don't
+ // know how to spill them. If we make our prologue/epilogue code smarter at
+ // some point, we can go back to using the above allocation orders for the
+ // Thumb1 instructions that know how to use hi regs.
+ let AltOrders = [(add LR, GPR), (trunc GPR, 8)];
+ let AltOrderSelect = [{
+ return 1 + MF.getSubtarget<ARMSubtarget>().isThumb1Only();
+ }];
+}
+
+// GPRs without the PC. Some ARM instructions do not allow the PC in
+// certain operand slots, particularly as the destination. Primarily
+// useful for disassembly.
+def GPRnopc : RegisterClass<"ARM", [i32], 32, (sub GPR, PC)> {
+ let AltOrders = [(add LR, GPRnopc), (trunc GPRnopc, 8)];
+ let AltOrderSelect = [{
+ return 1 + MF.getSubtarget<ARMSubtarget>().isThumb1Only();
+ }];
+}
+
+// GPRs without the PC but with APSR. Some instructions allow accessing the
+// APSR, while actually encoding PC in the register field. This is useful
+// for assembly and disassembly only.
+def GPRwithAPSR : RegisterClass<"ARM", [i32], 32, (add (sub GPR, PC), APSR_NZCV)> {
+ let AltOrders = [(add LR, GPRnopc), (trunc GPRnopc, 8)];
+ let AltOrderSelect = [{
+ return 1 + MF.getSubtarget<ARMSubtarget>().isThumb1Only();
+ }];
+}
+
+// GPRsp - Only the SP is legal. Used by Thumb1 instructions that want the
+// implied SP argument list.
+// FIXME: It would be better to not use this at all and refactor the
+// instructions to not have SP an an explicit argument. That makes
+// frame index resolution a bit trickier, though.
+def GPRsp : RegisterClass<"ARM", [i32], 32, (add SP)>;
+
+// restricted GPR register class. Many Thumb2 instructions allow the full
+// register range for operands, but have undefined behaviours when PC
+// or SP (R13 or R15) are used. The ARM ISA refers to these operands
+// via the BadReg() pseudo-code description.
+def rGPR : RegisterClass<"ARM", [i32], 32, (sub GPR, SP, PC)> {
+ let AltOrders = [(add LR, rGPR), (trunc rGPR, 8)];
+ let AltOrderSelect = [{
+ return 1 + MF.getSubtarget<ARMSubtarget>().isThumb1Only();
+ }];
+}
+
+// Thumb registers are R0-R7 normally. Some instructions can still use
+// the general GPR register class above (MOV, e.g.)
+def tGPR : RegisterClass<"ARM", [i32], 32, (trunc GPR, 8)>;
+
+// The high registers in thumb mode, R8-R15.
+def hGPR : RegisterClass<"ARM", [i32], 32, (sub GPR, tGPR)>;
+
+// For tail calls, we can't use callee-saved registers, as they are restored
+// to the saved value before the tail call, which would clobber a call address.
+// Note, getMinimalPhysRegClass(R0) returns tGPR because of the names of
+// this class and the preceding one(!) This is what we want.
+def tcGPR : RegisterClass<"ARM", [i32], 32, (add R0, R1, R2, R3, R12)> {
+ let AltOrders = [(and tcGPR, tGPR)];
+ let AltOrderSelect = [{
+ return MF.getSubtarget<ARMSubtarget>().isThumb1Only();
+ }];
+}
+
+// Condition code registers.
+def CCR : RegisterClass<"ARM", [i32], 32, (add CPSR)> {
+ let CopyCost = -1; // Don't allow copying of status registers.
+ let isAllocatable = 0;
+}
+
+// Scalar single precision floating point register class..
+// FIXME: Allocation order changed to s0, s2, ... or s0, s4, ... as a quick hack
+// to avoid partial-write dependencies on D or Q (depending on platform)
+// registers (S registers are renamed as portions of D/Q registers).
+def SPR : RegisterClass<"ARM", [f32], 32, (sequence "S%u", 0, 31)> {
+ let AltOrders = [(add (decimate SPR, 2), SPR),
+ (add (decimate SPR, 4),
+ (decimate SPR, 2),
+ (decimate (rotl SPR, 1), 4),
+ (decimate (rotl SPR, 1), 2))];
+ let AltOrderSelect = [{
+ return 1 + MF.getSubtarget<ARMSubtarget>().useStride4VFPs(MF);
+ }];
+}
+
+// Subset of SPR which can be used as a source of NEON scalars for 16-bit
+// operations
+def SPR_8 : RegisterClass<"ARM", [f32], 32, (sequence "S%u", 0, 15)>;
+
+// Scalar double precision floating point / generic 64-bit vector register
+// class.
+// ARM requires only word alignment for double. It's more performant if it
+// is double-word alignment though.
+def DPR : RegisterClass<"ARM", [f64, v8i8, v4i16, v2i32, v1i64, v2f32, v4f16], 64,
+ (sequence "D%u", 0, 31)> {
+ // Allocate non-VFP2 registers D16-D31 first, and prefer even registers on
+ // Darwin platforms.
+ let AltOrders = [(rotl DPR, 16),
+ (add (decimate (rotl DPR, 16), 2), (rotl DPR, 16))];
+ let AltOrderSelect = [{
+ return 1 + MF.getSubtarget<ARMSubtarget>().useStride4VFPs(MF);
+ }];
+}
+
+// Subset of DPR that are accessible with VFP2 (and so that also have
+// 32-bit SPR subregs).
+def DPR_VFP2 : RegisterClass<"ARM", [f64, v8i8, v4i16, v2i32, v1i64, v2f32, v4f16], 64,
+ (trunc DPR, 16)>;
+
+// Subset of DPR which can be used as a source of NEON scalars for 16-bit
+// operations
+def DPR_8 : RegisterClass<"ARM", [f64, v8i8, v4i16, v2i32, v1i64, v2f32, v4f16], 64,
+ (trunc DPR, 8)>;
+
+// Generic 128-bit vector register class.
+def QPR : RegisterClass<"ARM", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64, v8f16], 128,
+ (sequence "Q%u", 0, 15)> {
+ // Allocate non-VFP2 aliases Q8-Q15 first.
+ let AltOrders = [(rotl QPR, 8)];
+ let AltOrderSelect = [{ return 1; }];
+}
+
+// Subset of QPR that have 32-bit SPR subregs.
+def QPR_VFP2 : RegisterClass<"ARM", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
+ 128, (trunc QPR, 8)>;
+
+// Subset of QPR that have DPR_8 and SPR_8 subregs.
+def QPR_8 : RegisterClass<"ARM", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
+ 128, (trunc QPR, 4)>;
+
+// Pseudo-registers representing odd-even pairs of D registers. The even-odd
+// pairs are already represented by the Q registers.
+// These are needed by NEON instructions requiring two consecutive D registers.
+// There is no D31_D0 register as that is always an UNPREDICTABLE encoding.
+def TuplesOE2D : RegisterTuples<[dsub_0, dsub_1],
+ [(decimate (shl DPR, 1), 2),
+ (decimate (shl DPR, 2), 2)]>;
+
+// Register class representing a pair of consecutive D registers.
+// Use the Q registers for the even-odd pairs.
+def DPair : RegisterClass<"ARM", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
+ 128, (interleave QPR, TuplesOE2D)> {
+ // Allocate starting at non-VFP2 registers D16-D31 first.
+ // Prefer even-odd pairs as they are easier to copy.
+ let AltOrders = [(add (rotl QPR, 8), (rotl DPair, 16))];
+ let AltOrderSelect = [{ return 1; }];
+}
+
+// Pseudo-registers representing even-odd pairs of GPRs from R1 to R13/SP.
+// These are needed by instructions (e.g. ldrexd/strexd) requiring even-odd GPRs.
+def Tuples2R : RegisterTuples<[gsub_0, gsub_1],
+ [(add R0, R2, R4, R6, R8, R10, R12),
+ (add R1, R3, R5, R7, R9, R11, SP)]>;
+
+// Register class representing a pair of even-odd GPRs.
+def GPRPair : RegisterClass<"ARM", [untyped], 64, (add Tuples2R)> {
+ let Size = 64; // 2 x 32 bits, we have no predefined type of that size.
+}
+
+// Pseudo-registers representing 3 consecutive D registers.
+def Tuples3D : RegisterTuples<[dsub_0, dsub_1, dsub_2],
+ [(shl DPR, 0),
+ (shl DPR, 1),
+ (shl DPR, 2)]>;
+
+// 3 consecutive D registers.
+def DTriple : RegisterClass<"ARM", [untyped], 64, (add Tuples3D)> {
+ let Size = 192; // 3 x 64 bits, we have no predefined type of that size.
+}
+
+// Pseudo 256-bit registers to represent pairs of Q registers. These should
+// never be present in the emitted code.
+// These are used for NEON load / store instructions, e.g., vld4, vst3.
+def Tuples2Q : RegisterTuples<[qsub_0, qsub_1], [(shl QPR, 0), (shl QPR, 1)]>;
+
+// Pseudo 256-bit vector register class to model pairs of Q registers
+// (4 consecutive D registers).
+def QQPR : RegisterClass<"ARM", [v4i64], 256, (add Tuples2Q)> {
+ // Allocate non-VFP2 aliases first.
+ let AltOrders = [(rotl QQPR, 8)];
+ let AltOrderSelect = [{ return 1; }];
+}
+
+// Tuples of 4 D regs that isn't also a pair of Q regs.
+def TuplesOE4D : RegisterTuples<[dsub_0, dsub_1, dsub_2, dsub_3],
+ [(decimate (shl DPR, 1), 2),
+ (decimate (shl DPR, 2), 2),
+ (decimate (shl DPR, 3), 2),
+ (decimate (shl DPR, 4), 2)]>;
+
+// 4 consecutive D registers.
+def DQuad : RegisterClass<"ARM", [v4i64], 256,
+ (interleave Tuples2Q, TuplesOE4D)>;
+
+// Pseudo 512-bit registers to represent four consecutive Q registers.
+def Tuples2QQ : RegisterTuples<[qqsub_0, qqsub_1],
+ [(shl QQPR, 0), (shl QQPR, 2)]>;
+
+// Pseudo 512-bit vector register class to model 4 consecutive Q registers
+// (8 consecutive D registers).
+def QQQQPR : RegisterClass<"ARM", [v8i64], 256, (add Tuples2QQ)> {
+ // Allocate non-VFP2 aliases first.
+ let AltOrders = [(rotl QQQQPR, 8)];
+ let AltOrderSelect = [{ return 1; }];
+}
+
+
+// Pseudo-registers representing 2-spaced consecutive D registers.
+def Tuples2DSpc : RegisterTuples<[dsub_0, dsub_2],
+ [(shl DPR, 0),
+ (shl DPR, 2)]>;
+
+// Spaced pairs of D registers.
+def DPairSpc : RegisterClass<"ARM", [v2i64], 64, (add Tuples2DSpc)>;
+
+def Tuples3DSpc : RegisterTuples<[dsub_0, dsub_2, dsub_4],
+ [(shl DPR, 0),
+ (shl DPR, 2),
+ (shl DPR, 4)]>;
+
+// Spaced triples of D registers.
+def DTripleSpc : RegisterClass<"ARM", [untyped], 64, (add Tuples3DSpc)> {
+ let Size = 192; // 3 x 64 bits, we have no predefined type of that size.
+}
+
+def Tuples4DSpc : RegisterTuples<[dsub_0, dsub_2, dsub_4, dsub_6],
+ [(shl DPR, 0),
+ (shl DPR, 2),
+ (shl DPR, 4),
+ (shl DPR, 6)]>;
+
+// Spaced quads of D registers.
+def DQuadSpc : RegisterClass<"ARM", [v4i64], 64, (add Tuples3DSpc)>;
diff --git a/contrib/llvm/lib/Target/ARM/ARMSchedule.td b/contrib/llvm/lib/Target/ARM/ARMSchedule.td
new file mode 100644
index 0000000..528c4ec
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMSchedule.td
@@ -0,0 +1,354 @@
+//===-- ARMSchedule.td - ARM Scheduling Definitions --------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//===----------------------------------------------------------------------===//
+// Instruction scheduling annotations for out-of-order CPUs.
+// These annotations are independent of the itinerary class defined below.
+// Here we define the subtarget independent read/write per-operand resources.
+// The subtarget schedule definitions will then map these to the subtarget's
+// resource usages.
+// For example:
+// The instruction cycle timings table might contain an entry for an operation
+// like the following:
+// Rd <- ADD Rn, Rm, <shift> Rs
+// Uops | Latency from register | Uops - resource requirements - latency
+// 2 | Rn: 1 Rm: 4 Rs: 4 | uop T0, Rm, Rs - P01 - 3
+// | | uopc Rd, Rn, T0 - P01 - 1
+// This is telling us that the result will be available in destination register
+// Rd after a minimum of three cycles after the result in Rm and Rs is available
+// and one cycle after the result in Rn is available. The micro-ops can execute
+// on resource P01.
+// To model this, we need to express that we need to dispatch two micro-ops,
+// that the resource P01 is needed and that the latency to Rn is different than
+// the latency to Rm and Rs. The scheduler can decrease Rn's producer latency by
+// two.
+// We will do this by assigning (abstract) resources to register defs/uses.
+// ARMSchedule.td:
+// def WriteALUsr : SchedWrite;
+// def ReadAdvanceALUsr : ScheRead;
+//
+// ARMInstrInfo.td:
+// def ADDrs : I<>, Sched<[WriteALUsr, ReadAdvanceALUsr, ReadDefault,
+// ReadDefault]> { ...}
+// ReadAdvance read resources allow us to define "pipeline by-passes" or
+// shorter latencies to certain registers as needed in the example above.
+// The "ReadDefault" can be omitted.
+// Next, the subtarget td file assigns resources to the abstract resources
+// defined here.
+// ARMScheduleSubtarget.td:
+// // Resources.
+// def P01 : ProcResource<3>; // ALU unit (3 of it).
+// ...
+// // Resource usages.
+// def : WriteRes<WriteALUsr, [P01, P01]> {
+// Latency = 4; // Latency of 4.
+// NumMicroOps = 2; // Dispatch 2 micro-ops.
+// // The two instances of resource P01 are occupied for one cycle. It is one
+// // cycle because these resources happen to be pipelined.
+// ResourceCycles = [1, 1];
+// }
+// def : ReadAdvance<ReadAdvanceALUsr, 3>;
+
+// Basic ALU operation.
+def WriteALU : SchedWrite;
+def ReadALU : SchedRead;
+
+// Basic ALU with shifts.
+def WriteALUsi : SchedWrite; // Shift by immediate.
+def WriteALUsr : SchedWrite; // Shift by register.
+def WriteALUSsr : SchedWrite; // Shift by register (flag setting).
+def ReadALUsr : SchedRead; // Some operands are read later.
+
+// Compares.
+def WriteCMP : SchedWrite;
+def WriteCMPsi : SchedWrite;
+def WriteCMPsr : SchedWrite;
+
+// Division.
+def WriteDiv : SchedWrite;
+
+// Loads.
+def WriteLd : SchedWrite;
+def WritePreLd : SchedWrite;
+
+// Branches.
+def WriteBr : SchedWrite;
+def WriteBrL : SchedWrite;
+def WriteBrTbl : SchedWrite;
+
+// Fixpoint conversions.
+def WriteCvtFP : SchedWrite;
+
+// Noop.
+def WriteNoop : SchedWrite;
+
+// Define TII for use in SchedVariant Predicates.
+def : PredicateProlog<[{
+ const ARMBaseInstrInfo *TII =
+ static_cast<const ARMBaseInstrInfo*>(SchedModel->getInstrInfo());
+ (void)TII;
+}]>;
+
+def IsPredicatedPred : SchedPredicate<[{TII->isPredicated(MI)}]>;
+
+//===----------------------------------------------------------------------===//
+// Instruction Itinerary classes used for ARM
+//
+def IIC_iALUx : InstrItinClass;
+def IIC_iALUi : InstrItinClass;
+def IIC_iALUr : InstrItinClass;
+def IIC_iALUsi : InstrItinClass;
+def IIC_iALUsir : InstrItinClass;
+def IIC_iALUsr : InstrItinClass;
+def IIC_iBITi : InstrItinClass;
+def IIC_iBITr : InstrItinClass;
+def IIC_iBITsi : InstrItinClass;
+def IIC_iBITsr : InstrItinClass;
+def IIC_iUNAr : InstrItinClass;
+def IIC_iUNAsi : InstrItinClass;
+def IIC_iEXTr : InstrItinClass;
+def IIC_iEXTAr : InstrItinClass;
+def IIC_iEXTAsr : InstrItinClass;
+def IIC_iCMPi : InstrItinClass;
+def IIC_iCMPr : InstrItinClass;
+def IIC_iCMPsi : InstrItinClass;
+def IIC_iCMPsr : InstrItinClass;
+def IIC_iTSTi : InstrItinClass;
+def IIC_iTSTr : InstrItinClass;
+def IIC_iTSTsi : InstrItinClass;
+def IIC_iTSTsr : InstrItinClass;
+def IIC_iMOVi : InstrItinClass;
+def IIC_iMOVr : InstrItinClass;
+def IIC_iMOVsi : InstrItinClass;
+def IIC_iMOVsr : InstrItinClass;
+def IIC_iMOVix2 : InstrItinClass;
+def IIC_iMOVix2addpc : InstrItinClass;
+def IIC_iMOVix2ld : InstrItinClass;
+def IIC_iMVNi : InstrItinClass;
+def IIC_iMVNr : InstrItinClass;
+def IIC_iMVNsi : InstrItinClass;
+def IIC_iMVNsr : InstrItinClass;
+def IIC_iCMOVi : InstrItinClass;
+def IIC_iCMOVr : InstrItinClass;
+def IIC_iCMOVsi : InstrItinClass;
+def IIC_iCMOVsr : InstrItinClass;
+def IIC_iCMOVix2 : InstrItinClass;
+def IIC_iMUL16 : InstrItinClass;
+def IIC_iMAC16 : InstrItinClass;
+def IIC_iMUL32 : InstrItinClass;
+def IIC_iMAC32 : InstrItinClass;
+def IIC_iMUL64 : InstrItinClass;
+def IIC_iMAC64 : InstrItinClass;
+def IIC_iDIV : InstrItinClass;
+def IIC_iLoad_i : InstrItinClass;
+def IIC_iLoad_r : InstrItinClass;
+def IIC_iLoad_si : InstrItinClass;
+def IIC_iLoad_iu : InstrItinClass;
+def IIC_iLoad_ru : InstrItinClass;
+def IIC_iLoad_siu : InstrItinClass;
+def IIC_iLoad_bh_i : InstrItinClass;
+def IIC_iLoad_bh_r : InstrItinClass;
+def IIC_iLoad_bh_si : InstrItinClass;
+def IIC_iLoad_bh_iu : InstrItinClass;
+def IIC_iLoad_bh_ru : InstrItinClass;
+def IIC_iLoad_bh_siu : InstrItinClass;
+def IIC_iLoad_d_i : InstrItinClass;
+def IIC_iLoad_d_r : InstrItinClass;
+def IIC_iLoad_d_ru : InstrItinClass;
+def IIC_iLoad_m : InstrItinClass;
+def IIC_iLoad_mu : InstrItinClass;
+def IIC_iLoad_mBr : InstrItinClass;
+def IIC_iPop : InstrItinClass;
+def IIC_iPop_Br : InstrItinClass;
+def IIC_iLoadiALU : InstrItinClass;
+def IIC_iStore_i : InstrItinClass;
+def IIC_iStore_r : InstrItinClass;
+def IIC_iStore_si : InstrItinClass;
+def IIC_iStore_iu : InstrItinClass;
+def IIC_iStore_ru : InstrItinClass;
+def IIC_iStore_siu : InstrItinClass;
+def IIC_iStore_bh_i : InstrItinClass;
+def IIC_iStore_bh_r : InstrItinClass;
+def IIC_iStore_bh_si : InstrItinClass;
+def IIC_iStore_bh_iu : InstrItinClass;
+def IIC_iStore_bh_ru : InstrItinClass;
+def IIC_iStore_bh_siu : InstrItinClass;
+def IIC_iStore_d_i : InstrItinClass;
+def IIC_iStore_d_r : InstrItinClass;
+def IIC_iStore_d_ru : InstrItinClass;
+def IIC_iStore_m : InstrItinClass;
+def IIC_iStore_mu : InstrItinClass;
+def IIC_Preload : InstrItinClass;
+def IIC_Br : InstrItinClass;
+def IIC_fpSTAT : InstrItinClass;
+def IIC_fpUNA32 : InstrItinClass;
+def IIC_fpUNA64 : InstrItinClass;
+def IIC_fpCMP32 : InstrItinClass;
+def IIC_fpCMP64 : InstrItinClass;
+def IIC_fpCVTSD : InstrItinClass;
+def IIC_fpCVTDS : InstrItinClass;
+def IIC_fpCVTSH : InstrItinClass;
+def IIC_fpCVTHS : InstrItinClass;
+def IIC_fpCVTIS : InstrItinClass;
+def IIC_fpCVTID : InstrItinClass;
+def IIC_fpCVTSI : InstrItinClass;
+def IIC_fpCVTDI : InstrItinClass;
+def IIC_fpMOVIS : InstrItinClass;
+def IIC_fpMOVID : InstrItinClass;
+def IIC_fpMOVSI : InstrItinClass;
+def IIC_fpMOVDI : InstrItinClass;
+def IIC_fpALU32 : InstrItinClass;
+def IIC_fpALU64 : InstrItinClass;
+def IIC_fpMUL32 : InstrItinClass;
+def IIC_fpMUL64 : InstrItinClass;
+def IIC_fpMAC32 : InstrItinClass;
+def IIC_fpMAC64 : InstrItinClass;
+def IIC_fpFMAC32 : InstrItinClass;
+def IIC_fpFMAC64 : InstrItinClass;
+def IIC_fpDIV32 : InstrItinClass;
+def IIC_fpDIV64 : InstrItinClass;
+def IIC_fpSQRT32 : InstrItinClass;
+def IIC_fpSQRT64 : InstrItinClass;
+def IIC_fpLoad32 : InstrItinClass;
+def IIC_fpLoad64 : InstrItinClass;
+def IIC_fpLoad_m : InstrItinClass;
+def IIC_fpLoad_mu : InstrItinClass;
+def IIC_fpStore32 : InstrItinClass;
+def IIC_fpStore64 : InstrItinClass;
+def IIC_fpStore_m : InstrItinClass;
+def IIC_fpStore_mu : InstrItinClass;
+def IIC_VLD1 : InstrItinClass;
+def IIC_VLD1x2 : InstrItinClass;
+def IIC_VLD1x3 : InstrItinClass;
+def IIC_VLD1x4 : InstrItinClass;
+def IIC_VLD1u : InstrItinClass;
+def IIC_VLD1x2u : InstrItinClass;
+def IIC_VLD1x3u : InstrItinClass;
+def IIC_VLD1x4u : InstrItinClass;
+def IIC_VLD1ln : InstrItinClass;
+def IIC_VLD1lnu : InstrItinClass;
+def IIC_VLD1dup : InstrItinClass;
+def IIC_VLD1dupu : InstrItinClass;
+def IIC_VLD2 : InstrItinClass;
+def IIC_VLD2x2 : InstrItinClass;
+def IIC_VLD2u : InstrItinClass;
+def IIC_VLD2x2u : InstrItinClass;
+def IIC_VLD2ln : InstrItinClass;
+def IIC_VLD2lnu : InstrItinClass;
+def IIC_VLD2dup : InstrItinClass;
+def IIC_VLD2dupu : InstrItinClass;
+def IIC_VLD3 : InstrItinClass;
+def IIC_VLD3ln : InstrItinClass;
+def IIC_VLD3u : InstrItinClass;
+def IIC_VLD3lnu : InstrItinClass;
+def IIC_VLD3dup : InstrItinClass;
+def IIC_VLD3dupu : InstrItinClass;
+def IIC_VLD4 : InstrItinClass;
+def IIC_VLD4ln : InstrItinClass;
+def IIC_VLD4u : InstrItinClass;
+def IIC_VLD4lnu : InstrItinClass;
+def IIC_VLD4dup : InstrItinClass;
+def IIC_VLD4dupu : InstrItinClass;
+def IIC_VST1 : InstrItinClass;
+def IIC_VST1x2 : InstrItinClass;
+def IIC_VST1x3 : InstrItinClass;
+def IIC_VST1x4 : InstrItinClass;
+def IIC_VST1u : InstrItinClass;
+def IIC_VST1x2u : InstrItinClass;
+def IIC_VST1x3u : InstrItinClass;
+def IIC_VST1x4u : InstrItinClass;
+def IIC_VST1ln : InstrItinClass;
+def IIC_VST1lnu : InstrItinClass;
+def IIC_VST2 : InstrItinClass;
+def IIC_VST2x2 : InstrItinClass;
+def IIC_VST2u : InstrItinClass;
+def IIC_VST2x2u : InstrItinClass;
+def IIC_VST2ln : InstrItinClass;
+def IIC_VST2lnu : InstrItinClass;
+def IIC_VST3 : InstrItinClass;
+def IIC_VST3u : InstrItinClass;
+def IIC_VST3ln : InstrItinClass;
+def IIC_VST3lnu : InstrItinClass;
+def IIC_VST4 : InstrItinClass;
+def IIC_VST4u : InstrItinClass;
+def IIC_VST4ln : InstrItinClass;
+def IIC_VST4lnu : InstrItinClass;
+def IIC_VUNAD : InstrItinClass;
+def IIC_VUNAQ : InstrItinClass;
+def IIC_VBIND : InstrItinClass;
+def IIC_VBINQ : InstrItinClass;
+def IIC_VPBIND : InstrItinClass;
+def IIC_VFMULD : InstrItinClass;
+def IIC_VFMULQ : InstrItinClass;
+def IIC_VMOV : InstrItinClass;
+def IIC_VMOVImm : InstrItinClass;
+def IIC_VMOVD : InstrItinClass;
+def IIC_VMOVQ : InstrItinClass;
+def IIC_VMOVIS : InstrItinClass;
+def IIC_VMOVID : InstrItinClass;
+def IIC_VMOVISL : InstrItinClass;
+def IIC_VMOVSI : InstrItinClass;
+def IIC_VMOVDI : InstrItinClass;
+def IIC_VMOVN : InstrItinClass;
+def IIC_VPERMD : InstrItinClass;
+def IIC_VPERMQ : InstrItinClass;
+def IIC_VPERMQ3 : InstrItinClass;
+def IIC_VMACD : InstrItinClass;
+def IIC_VMACQ : InstrItinClass;
+def IIC_VFMACD : InstrItinClass;
+def IIC_VFMACQ : InstrItinClass;
+def IIC_VRECSD : InstrItinClass;
+def IIC_VRECSQ : InstrItinClass;
+def IIC_VCNTiD : InstrItinClass;
+def IIC_VCNTiQ : InstrItinClass;
+def IIC_VUNAiD : InstrItinClass;
+def IIC_VUNAiQ : InstrItinClass;
+def IIC_VQUNAiD : InstrItinClass;
+def IIC_VQUNAiQ : InstrItinClass;
+def IIC_VBINiD : InstrItinClass;
+def IIC_VBINiQ : InstrItinClass;
+def IIC_VSUBiD : InstrItinClass;
+def IIC_VSUBiQ : InstrItinClass;
+def IIC_VBINi4D : InstrItinClass;
+def IIC_VBINi4Q : InstrItinClass;
+def IIC_VSUBi4D : InstrItinClass;
+def IIC_VSUBi4Q : InstrItinClass;
+def IIC_VABAD : InstrItinClass;
+def IIC_VABAQ : InstrItinClass;
+def IIC_VSHLiD : InstrItinClass;
+def IIC_VSHLiQ : InstrItinClass;
+def IIC_VSHLi4D : InstrItinClass;
+def IIC_VSHLi4Q : InstrItinClass;
+def IIC_VPALiD : InstrItinClass;
+def IIC_VPALiQ : InstrItinClass;
+def IIC_VMULi16D : InstrItinClass;
+def IIC_VMULi32D : InstrItinClass;
+def IIC_VMULi16Q : InstrItinClass;
+def IIC_VMULi32Q : InstrItinClass;
+def IIC_VMACi16D : InstrItinClass;
+def IIC_VMACi32D : InstrItinClass;
+def IIC_VMACi16Q : InstrItinClass;
+def IIC_VMACi32Q : InstrItinClass;
+def IIC_VEXTD : InstrItinClass;
+def IIC_VEXTQ : InstrItinClass;
+def IIC_VTB1 : InstrItinClass;
+def IIC_VTB2 : InstrItinClass;
+def IIC_VTB3 : InstrItinClass;
+def IIC_VTB4 : InstrItinClass;
+def IIC_VTBX1 : InstrItinClass;
+def IIC_VTBX2 : InstrItinClass;
+def IIC_VTBX3 : InstrItinClass;
+def IIC_VTBX4 : InstrItinClass;
+
+//===----------------------------------------------------------------------===//
+// Processor instruction itineraries.
+
+include "ARMScheduleV6.td"
+include "ARMScheduleA8.td"
+include "ARMScheduleA9.td"
+include "ARMScheduleSwift.td"
diff --git a/contrib/llvm/lib/Target/ARM/ARMScheduleA8.td b/contrib/llvm/lib/Target/ARM/ARMScheduleA8.td
new file mode 100644
index 0000000..2c63825
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMScheduleA8.td
@@ -0,0 +1,1075 @@
+//=- ARMScheduleA8.td - ARM Cortex-A8 Scheduling Definitions -*- tablegen -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the itinerary class data for the ARM Cortex A8 processors.
+//
+//===----------------------------------------------------------------------===//
+
+//
+// Scheduling information derived from "Cortex-A8 Technical Reference Manual".
+// Functional Units.
+def A8_Pipe0 : FuncUnit; // pipeline 0
+def A8_Pipe1 : FuncUnit; // pipeline 1
+def A8_LSPipe : FuncUnit; // Load / store pipeline
+def A8_NPipe : FuncUnit; // NEON ALU/MUL pipe
+def A8_NLSPipe : FuncUnit; // NEON LS pipe
+//
+// Dual issue pipeline represented by A8_Pipe0 | A8_Pipe1
+//
+def CortexA8Itineraries : ProcessorItineraries<
+ [A8_Pipe0, A8_Pipe1, A8_LSPipe, A8_NPipe, A8_NLSPipe],
+ [], [
+ // Two fully-pipelined integer ALU pipelines
+ //
+ // No operand cycles
+ InstrItinData<IIC_iALUx , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>]>,
+ //
+ // Binary Instructions that produce a result
+ InstrItinData<IIC_iALUi ,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2]>,
+ InstrItinData<IIC_iALUr ,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 2]>,
+ InstrItinData<IIC_iALUsi,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 1]>,
+ InstrItinData<IIC_iALUsir,[InstrStage<1,[A8_Pipe0, A8_Pipe1]>], [2, 1, 2]>,
+ InstrItinData<IIC_iALUsr,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 1, 1]>,
+ //
+ // Bitwise Instructions that produce a result
+ InstrItinData<IIC_iBITi ,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2]>,
+ InstrItinData<IIC_iBITr ,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 2]>,
+ InstrItinData<IIC_iBITsi,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 1]>,
+ InstrItinData<IIC_iBITsr,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 1, 1]>,
+ //
+ // Unary Instructions that produce a result
+ InstrItinData<IIC_iUNAr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2]>,
+ InstrItinData<IIC_iUNAsi, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1]>,
+ //
+ // Zero and sign extension instructions
+ InstrItinData<IIC_iEXTr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1]>,
+ InstrItinData<IIC_iEXTAr, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 1]>,
+ InstrItinData<IIC_iEXTAsr,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>],[2, 2, 1, 1]>,
+ //
+ // Compare instructions
+ InstrItinData<IIC_iCMPi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2]>,
+ InstrItinData<IIC_iCMPr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2]>,
+ InstrItinData<IIC_iCMPsi, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1]>,
+ InstrItinData<IIC_iCMPsr, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1, 1]>,
+ //
+ // Test instructions
+ InstrItinData<IIC_iTSTi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2]>,
+ InstrItinData<IIC_iTSTr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2]>,
+ InstrItinData<IIC_iTSTsi, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1]>,
+ InstrItinData<IIC_iTSTsr, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1, 1]>,
+ //
+ // Move instructions, unconditional
+ InstrItinData<IIC_iMOVi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1]>,
+ InstrItinData<IIC_iMOVr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1]>,
+ InstrItinData<IIC_iMOVsi, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1]>,
+ InstrItinData<IIC_iMOVsr, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1, 1]>,
+ InstrItinData<IIC_iMOVix2,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2]>,
+ InstrItinData<IIC_iMOVix2addpc,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [3]>,
+ InstrItinData<IIC_iMOVix2ld,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrStage<1, [A8_LSPipe]>], [5]>,
+ //
+ // Move instructions, conditional
+ InstrItinData<IIC_iCMOVi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2]>,
+ InstrItinData<IIC_iCMOVr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1]>,
+ InstrItinData<IIC_iCMOVsi, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1]>,
+ InstrItinData<IIC_iCMOVsr, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1, 1]>,
+ InstrItinData<IIC_iCMOVix2,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [3, 1]>,
+ //
+ // MVN instructions
+ InstrItinData<IIC_iMVNi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1]>,
+ InstrItinData<IIC_iMVNr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1]>,
+ InstrItinData<IIC_iMVNsi, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1]>,
+ InstrItinData<IIC_iMVNsr, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1, 1]>,
+
+ // Integer multiply pipeline
+ // Result written in E5, but that is relative to the last cycle of multicycle,
+ // so we use 6 for those cases
+ //
+ InstrItinData<IIC_iMUL16 , [InstrStage<1, [A8_Pipe0]>], [5, 1, 1]>,
+ InstrItinData<IIC_iMAC16 , [InstrStage<2, [A8_Pipe0]>], [6, 1, 1, 4]>,
+ InstrItinData<IIC_iMUL32 , [InstrStage<2, [A8_Pipe0]>], [6, 1, 1]>,
+ InstrItinData<IIC_iMAC32 , [InstrStage<2, [A8_Pipe0]>], [6, 1, 1, 4]>,
+ InstrItinData<IIC_iMUL64 , [InstrStage<3, [A8_Pipe0]>], [6, 6, 1, 1]>,
+ InstrItinData<IIC_iMAC64 , [InstrStage<3, [A8_Pipe0]>], [6, 6, 1, 1]>,
+
+ // Integer load pipeline
+ //
+ // Immediate offset
+ InstrItinData<IIC_iLoad_i , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_LSPipe]>], [3, 1]>,
+ InstrItinData<IIC_iLoad_bh_i, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_LSPipe]>], [3, 1]>,
+ InstrItinData<IIC_iLoad_d_i, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_LSPipe]>], [3, 1]>,
+ //
+ // Register offset
+ InstrItinData<IIC_iLoad_r , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_LSPipe]>], [3, 1, 1]>,
+ InstrItinData<IIC_iLoad_bh_r, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_LSPipe]>], [3, 1, 1]>,
+ InstrItinData<IIC_iLoad_d_r , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_LSPipe]>], [3, 1, 1]>,
+ //
+ // Scaled register offset, issues over 2 cycles
+ // FIXME: lsl by 2 takes 1 cycle.
+ InstrItinData<IIC_iLoad_si , [InstrStage<2, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_LSPipe]>], [4, 1, 1]>,
+ InstrItinData<IIC_iLoad_bh_si,[InstrStage<2, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_LSPipe]>], [4, 1, 1]>,
+ //
+ // Immediate offset with update
+ InstrItinData<IIC_iLoad_iu , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_LSPipe]>], [3, 2, 1]>,
+ InstrItinData<IIC_iLoad_bh_iu,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_LSPipe]>], [3, 2, 1]>,
+ //
+ // Register offset with update
+ InstrItinData<IIC_iLoad_ru , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_LSPipe]>], [3, 2, 1, 1]>,
+ InstrItinData<IIC_iLoad_bh_ru,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_LSPipe]>], [3, 2, 1, 1]>,
+ InstrItinData<IIC_iLoad_d_ru, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_LSPipe]>], [3, 2, 1, 1]>,
+ //
+ // Scaled register offset with update, issues over 2 cycles
+ InstrItinData<IIC_iLoad_siu , [InstrStage<2, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_LSPipe]>], [4, 3, 1, 1]>,
+ InstrItinData<IIC_iLoad_bh_siu,[InstrStage<2, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_LSPipe]>], [4, 3, 1, 1]>,
+ //
+ // Load multiple, def is the 5th operand. Pipeline 0 only.
+ // FIXME: A8_LSPipe cycle time is dynamic, this assumes 3 to 4 registers.
+ InstrItinData<IIC_iLoad_m , [InstrStage<2, [A8_Pipe0], 0>,
+ InstrStage<2, [A8_LSPipe]>],
+ [1, 1, 1, 1, 3], [], -1>, // dynamic uops
+ //
+ // Load multiple + update, defs are the 1st and 5th operands.
+ InstrItinData<IIC_iLoad_mu , [InstrStage<3, [A8_Pipe0], 0>,
+ InstrStage<3, [A8_LSPipe]>],
+ [2, 1, 1, 1, 3], [], -1>, // dynamic uops
+ //
+ // Load multiple plus branch
+ InstrItinData<IIC_iLoad_mBr, [InstrStage<3, [A8_Pipe0], 0>,
+ InstrStage<3, [A8_LSPipe]>,
+ InstrStage<1, [A8_Pipe0, A8_Pipe1]>],
+ [1, 2, 1, 1, 3], [], -1>, // dynamic uops
+ //
+ // Pop, def is the 3rd operand.
+ InstrItinData<IIC_iPop , [InstrStage<3, [A8_Pipe0], 0>,
+ InstrStage<3, [A8_LSPipe]>],
+ [1, 1, 3], [], -1>, // dynamic uops
+ //
+ // Push, def is the 3th operand.
+ InstrItinData<IIC_iPop_Br, [InstrStage<3, [A8_Pipe0], 0>,
+ InstrStage<3, [A8_LSPipe]>,
+ InstrStage<1, [A8_Pipe0, A8_Pipe1]>],
+ [1, 1, 3], [], -1>, // dynamic uops
+ //
+ // iLoadi + iALUr for t2LDRpci_pic.
+ InstrItinData<IIC_iLoadiALU, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_LSPipe]>,
+ InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [4, 1]>,
+
+
+ // Integer store pipeline
+ //
+ // Immediate offset
+ InstrItinData<IIC_iStore_i , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_LSPipe]>], [3, 1]>,
+ InstrItinData<IIC_iStore_bh_i,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_LSPipe]>], [3, 1]>,
+ InstrItinData<IIC_iStore_d_i, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_LSPipe]>], [3, 1]>,
+ //
+ // Register offset
+ InstrItinData<IIC_iStore_r , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_LSPipe]>], [3, 1, 1]>,
+ InstrItinData<IIC_iStore_bh_r,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_LSPipe]>], [3, 1, 1]>,
+ InstrItinData<IIC_iStore_d_r, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_LSPipe]>], [3, 1, 1]>,
+ //
+ // Scaled register offset, issues over 2 cycles
+ InstrItinData<IIC_iStore_si , [InstrStage<2, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_LSPipe]>], [3, 1, 1]>,
+ InstrItinData<IIC_iStore_bh_si,[InstrStage<2, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_LSPipe]>], [3, 1, 1]>,
+ //
+ // Immediate offset with update
+ InstrItinData<IIC_iStore_iu , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_LSPipe]>], [2, 3, 1]>,
+ InstrItinData<IIC_iStore_bh_iu,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_LSPipe]>], [2, 3, 1]>,
+ //
+ // Register offset with update
+ InstrItinData<IIC_iStore_ru , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_LSPipe]>], [2, 3, 1, 1]>,
+ InstrItinData<IIC_iStore_bh_ru,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_LSPipe]>], [2, 3, 1, 1]>,
+ InstrItinData<IIC_iStore_d_ru, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_LSPipe]>], [2, 3, 1, 1]>,
+ //
+ // Scaled register offset with update, issues over 2 cycles
+ InstrItinData<IIC_iStore_siu, [InstrStage<2, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_LSPipe]>], [3, 3, 1, 1]>,
+ InstrItinData<IIC_iStore_bh_siu,[InstrStage<2, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_LSPipe]>], [3, 3, 1, 1]>,
+ //
+ // Store multiple. Pipeline 0 only.
+ // FIXME: A8_LSPipe cycle time is dynamic, this assumes 3 to 4 registers.
+ InstrItinData<IIC_iStore_m , [InstrStage<2, [A8_Pipe0], 0>,
+ InstrStage<2, [A8_LSPipe]>],
+ [], [], -1>, // dynamic uops
+ //
+ // Store multiple + update
+ InstrItinData<IIC_iStore_mu, [InstrStage<2, [A8_Pipe0], 0>,
+ InstrStage<2, [A8_LSPipe]>],
+ [2], [], -1>, // dynamic uops
+ //
+ // Preload
+ InstrItinData<IIC_Preload, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2]>,
+
+ // Branch
+ //
+ // no delay slots, so the latency of a branch is unimportant
+ InstrItinData<IIC_Br , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>]>,
+
+ // VFP
+ // Issue through integer pipeline, and execute in NEON unit. We assume
+ // RunFast mode so that NFP pipeline is used for single-precision when
+ // possible.
+ //
+ // FP Special Register to Integer Register File Move
+ InstrItinData<IIC_fpSTAT , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NLSPipe]>], [20]>,
+ //
+ // Single-precision FP Unary
+ InstrItinData<IIC_fpUNA32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [7, 1]>,
+ //
+ // Double-precision FP Unary
+ InstrItinData<IIC_fpUNA64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<4, [A8_NPipe], 0>,
+ InstrStage<4, [A8_NLSPipe]>], [4, 1]>,
+ //
+ // Single-precision FP Compare
+ InstrItinData<IIC_fpCMP32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [1, 1]>,
+ //
+ // Double-precision FP Compare
+ InstrItinData<IIC_fpCMP64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<4, [A8_NPipe], 0>,
+ InstrStage<4, [A8_NLSPipe]>], [4, 1]>,
+ //
+ // Single to Double FP Convert
+ InstrItinData<IIC_fpCVTSD , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<7, [A8_NPipe], 0>,
+ InstrStage<7, [A8_NLSPipe]>], [7, 1]>,
+ //
+ // Double to Single FP Convert
+ InstrItinData<IIC_fpCVTDS , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<5, [A8_NPipe], 0>,
+ InstrStage<5, [A8_NLSPipe]>], [5, 1]>,
+ //
+ // Single-Precision FP to Integer Convert
+ InstrItinData<IIC_fpCVTSI , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [7, 1]>,
+ //
+ // Double-Precision FP to Integer Convert
+ InstrItinData<IIC_fpCVTDI , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<8, [A8_NPipe], 0>,
+ InstrStage<8, [A8_NLSPipe]>], [8, 1]>,
+ //
+ // Integer to Single-Precision FP Convert
+ InstrItinData<IIC_fpCVTIS , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [7, 1]>,
+ //
+ // Integer to Double-Precision FP Convert
+ InstrItinData<IIC_fpCVTID , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<8, [A8_NPipe], 0>,
+ InstrStage<8, [A8_NLSPipe]>], [8, 1]>,
+ //
+ // Single-precision FP ALU
+ InstrItinData<IIC_fpALU32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [7, 1, 1]>,
+ //
+ // Double-precision FP ALU
+ InstrItinData<IIC_fpALU64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<9, [A8_NPipe], 0>,
+ InstrStage<9, [A8_NLSPipe]>], [9, 1, 1]>,
+ //
+ // Single-precision FP Multiply
+ InstrItinData<IIC_fpMUL32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [7, 1, 1]>,
+ //
+ // Double-precision FP Multiply
+ InstrItinData<IIC_fpMUL64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<11, [A8_NPipe], 0>,
+ InstrStage<11, [A8_NLSPipe]>], [11, 1, 1]>,
+ //
+ // Single-precision FP MAC
+ InstrItinData<IIC_fpMAC32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [7, 2, 1, 1]>,
+ //
+ // Double-precision FP MAC
+ InstrItinData<IIC_fpMAC64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<19, [A8_NPipe], 0>,
+ InstrStage<19, [A8_NLSPipe]>], [19, 2, 1, 1]>,
+ //
+ // Single-precision Fused FP MAC
+ InstrItinData<IIC_fpFMAC32, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [7, 2, 1, 1]>,
+ //
+ // Double-precision Fused FP MAC
+ InstrItinData<IIC_fpFMAC64, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<19, [A8_NPipe], 0>,
+ InstrStage<19, [A8_NLSPipe]>], [19, 2, 1, 1]>,
+ //
+ // Single-precision FP DIV
+ InstrItinData<IIC_fpDIV32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<20, [A8_NPipe], 0>,
+ InstrStage<20, [A8_NLSPipe]>], [20, 1, 1]>,
+ //
+ // Double-precision FP DIV
+ InstrItinData<IIC_fpDIV64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<29, [A8_NPipe], 0>,
+ InstrStage<29, [A8_NLSPipe]>], [29, 1, 1]>,
+ //
+ // Single-precision FP SQRT
+ InstrItinData<IIC_fpSQRT32, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<19, [A8_NPipe], 0>,
+ InstrStage<19, [A8_NLSPipe]>], [19, 1]>,
+ //
+ // Double-precision FP SQRT
+ InstrItinData<IIC_fpSQRT64, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<29, [A8_NPipe], 0>,
+ InstrStage<29, [A8_NLSPipe]>], [29, 1]>,
+
+ //
+ // Integer to Single-precision Move
+ InstrItinData<IIC_fpMOVIS, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>],
+ [2, 1]>,
+ //
+ // Integer to Double-precision Move
+ InstrItinData<IIC_fpMOVID, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>],
+ [2, 1, 1]>,
+ //
+ // Single-precision to Integer Move
+ InstrItinData<IIC_fpMOVSI, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>],
+ [20, 1]>,
+ //
+ // Double-precision to Integer Move
+ InstrItinData<IIC_fpMOVDI, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>],
+ [20, 20, 1]>,
+
+ //
+ // Single-precision FP Load
+ InstrItinData<IIC_fpLoad32, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NLSPipe], 0>,
+ InstrStage<1, [A8_LSPipe]>],
+ [2, 1]>,
+ //
+ // Double-precision FP Load
+ InstrItinData<IIC_fpLoad64, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NLSPipe], 0>,
+ InstrStage<1, [A8_LSPipe]>],
+ [2, 1]>,
+ //
+ // FP Load Multiple
+ // FIXME: A8_LSPipe cycle time is dynamic, this assumes 3 to 4 registers.
+ InstrItinData<IIC_fpLoad_m, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NLSPipe], 0>,
+ InstrStage<1, [A8_LSPipe]>,
+ InstrStage<1, [A8_NLSPipe], 0>,
+ InstrStage<1, [A8_LSPipe]>],
+ [1, 1, 1, 2], [], -1>, // dynamic uops
+ //
+ // FP Load Multiple + update
+ InstrItinData<IIC_fpLoad_mu,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NLSPipe], 0>,
+ InstrStage<1, [A8_LSPipe]>,
+ InstrStage<1, [A8_NLSPipe], 0>,
+ InstrStage<1, [A8_LSPipe]>],
+ [2, 1, 1, 1, 2], [], -1>, // dynamic uops
+ //
+ // Single-precision FP Store
+ InstrItinData<IIC_fpStore32,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NLSPipe], 0>,
+ InstrStage<1, [A8_LSPipe]>],
+ [1, 1]>,
+ //
+ // Double-precision FP Store
+ InstrItinData<IIC_fpStore64,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NLSPipe], 0>,
+ InstrStage<1, [A8_LSPipe]>],
+ [1, 1]>,
+ //
+ // FP Store Multiple
+ InstrItinData<IIC_fpStore_m,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NLSPipe], 0>,
+ InstrStage<1, [A8_LSPipe]>,
+ InstrStage<1, [A8_NLSPipe], 0>,
+ InstrStage<1, [A8_LSPipe]>],
+ [1, 1, 1, 1], [], -1>, // dynamic uops
+ //
+ // FP Store Multiple + update
+ InstrItinData<IIC_fpStore_mu,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NLSPipe], 0>,
+ InstrStage<1, [A8_LSPipe]>,
+ InstrStage<1, [A8_NLSPipe], 0>,
+ InstrStage<1, [A8_LSPipe]>],
+ [2, 1, 1, 1, 1], [], -1>, // dynamic uops
+ // NEON
+ // Issue through integer pipeline, and execute in NEON unit.
+ //
+ // VLD1
+ InstrItinData<IIC_VLD1, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NLSPipe], 0>,
+ InstrStage<2, [A8_LSPipe]>],
+ [2, 1]>,
+ // VLD1x2
+ InstrItinData<IIC_VLD1x2, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NLSPipe], 0>,
+ InstrStage<2, [A8_LSPipe]>],
+ [2, 2, 1]>,
+ //
+ // VLD1x3
+ InstrItinData<IIC_VLD1x3, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<3, [A8_NLSPipe], 0>,
+ InstrStage<3, [A8_LSPipe]>],
+ [2, 2, 3, 1]>,
+ //
+ // VLD1x4
+ InstrItinData<IIC_VLD1x4, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<3, [A8_NLSPipe], 0>,
+ InstrStage<3, [A8_LSPipe]>],
+ [2, 2, 3, 3, 1]>,
+ //
+ // VLD1u
+ InstrItinData<IIC_VLD1u, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NLSPipe], 0>,
+ InstrStage<2, [A8_LSPipe]>],
+ [2, 2, 1]>,
+ //
+ // VLD1x2u
+ InstrItinData<IIC_VLD1x2u, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NLSPipe], 0>,
+ InstrStage<2, [A8_LSPipe]>],
+ [2, 2, 2, 1]>,
+ //
+ // VLD1x3u
+ InstrItinData<IIC_VLD1x3u, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<3, [A8_NLSPipe], 0>,
+ InstrStage<3, [A8_LSPipe]>],
+ [2, 2, 3, 2, 1]>,
+ //
+ // VLD1x4u
+ InstrItinData<IIC_VLD1x4u, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<3, [A8_NLSPipe], 0>,
+ InstrStage<3, [A8_LSPipe]>],
+ [2, 2, 3, 3, 2, 1]>,
+ //
+ // VLD1ln
+ InstrItinData<IIC_VLD1ln, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrStage<3, [A8_NLSPipe], 0>,
+ InstrStage<3, [A8_LSPipe]>],
+ [3, 1, 1, 1]>,
+ //
+ // VLD1lnu
+ InstrItinData<IIC_VLD1lnu, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrStage<3, [A8_NLSPipe], 0>,
+ InstrStage<3, [A8_LSPipe]>],
+ [3, 2, 1, 1, 1, 1]>,
+ //
+ // VLD1dup
+ InstrItinData<IIC_VLD1dup, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrStage<2, [A8_NLSPipe], 0>,
+ InstrStage<2, [A8_LSPipe]>],
+ [2, 1]>,
+ //
+ // VLD1dupu
+ InstrItinData<IIC_VLD1dupu, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrStage<2, [A8_NLSPipe], 0>,
+ InstrStage<2, [A8_LSPipe]>],
+ [2, 2, 1, 1]>,
+ //
+ // VLD2
+ InstrItinData<IIC_VLD2, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NLSPipe], 0>,
+ InstrStage<2, [A8_LSPipe]>],
+ [2, 2, 1]>,
+ //
+ // VLD2x2
+ InstrItinData<IIC_VLD2x2, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<3, [A8_NLSPipe], 0>,
+ InstrStage<3, [A8_LSPipe]>],
+ [2, 2, 3, 3, 1]>,
+ //
+ // VLD2ln
+ InstrItinData<IIC_VLD2ln, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<3, [A8_NLSPipe], 0>,
+ InstrStage<3, [A8_LSPipe]>],
+ [3, 3, 1, 1, 1, 1]>,
+ //
+ // VLD2u
+ InstrItinData<IIC_VLD2u, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NLSPipe], 0>,
+ InstrStage<2, [A8_LSPipe]>],
+ [2, 2, 2, 1, 1, 1]>,
+ //
+ // VLD2x2u
+ InstrItinData<IIC_VLD2x2u, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<3, [A8_NLSPipe], 0>,
+ InstrStage<3, [A8_LSPipe]>],
+ [2, 2, 3, 3, 2, 1]>,
+ //
+ // VLD2lnu
+ InstrItinData<IIC_VLD2lnu, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<3, [A8_NLSPipe], 0>,
+ InstrStage<3, [A8_LSPipe]>],
+ [3, 3, 2, 1, 1, 1, 1, 1]>,
+ //
+ // VLD2dup
+ InstrItinData<IIC_VLD2dup, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrStage<2, [A8_NLSPipe], 0>,
+ InstrStage<2, [A8_LSPipe]>],
+ [2, 2, 1]>,
+ //
+ // VLD2dupu
+ InstrItinData<IIC_VLD2dupu, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrStage<2, [A8_NLSPipe], 0>,
+ InstrStage<2, [A8_LSPipe]>],
+ [2, 2, 2, 1, 1]>,
+ //
+ // VLD3
+ InstrItinData<IIC_VLD3, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<4, [A8_NLSPipe], 0>,
+ InstrStage<4, [A8_LSPipe]>],
+ [3, 3, 4, 1]>,
+ //
+ // VLD3ln
+ InstrItinData<IIC_VLD3ln, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<5, [A8_NLSPipe], 0>,
+ InstrStage<5, [A8_LSPipe]>],
+ [4, 4, 5, 1, 1, 1, 1, 2]>,
+ //
+ // VLD3u
+ InstrItinData<IIC_VLD3u, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<4, [A8_NLSPipe], 0>,
+ InstrStage<4, [A8_LSPipe]>],
+ [3, 3, 4, 2, 1]>,
+ //
+ // VLD3lnu
+ InstrItinData<IIC_VLD3lnu, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<5, [A8_NLSPipe], 0>,
+ InstrStage<5, [A8_LSPipe]>],
+ [4, 4, 5, 2, 1, 1, 1, 1, 1, 2]>,
+ //
+ // VLD3dup
+ InstrItinData<IIC_VLD3dup, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrStage<3, [A8_NLSPipe], 0>,
+ InstrStage<3, [A8_LSPipe]>],
+ [2, 2, 3, 1]>,
+ //
+ // VLD3dupu
+ InstrItinData<IIC_VLD3dupu, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrStage<3, [A8_NLSPipe], 0>,
+ InstrStage<3, [A8_LSPipe]>],
+ [2, 2, 3, 2, 1, 1]>,
+ //
+ // VLD4
+ InstrItinData<IIC_VLD4, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<4, [A8_NLSPipe], 0>,
+ InstrStage<4, [A8_LSPipe]>],
+ [3, 3, 4, 4, 1]>,
+ //
+ // VLD4ln
+ InstrItinData<IIC_VLD4ln, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<5, [A8_NLSPipe], 0>,
+ InstrStage<5, [A8_LSPipe]>],
+ [4, 4, 5, 5, 1, 1, 1, 1, 2, 2]>,
+ //
+ // VLD4u
+ InstrItinData<IIC_VLD4u, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<4, [A8_NLSPipe], 0>,
+ InstrStage<4, [A8_LSPipe]>],
+ [3, 3, 4, 4, 2, 1]>,
+ //
+ // VLD4lnu
+ InstrItinData<IIC_VLD4lnu, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<5, [A8_NLSPipe], 0>,
+ InstrStage<5, [A8_LSPipe]>],
+ [4, 4, 5, 5, 2, 1, 1, 1, 1, 1, 2, 2]>,
+ //
+ // VLD4dup
+ InstrItinData<IIC_VLD4dup, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrStage<3, [A8_NLSPipe], 0>,
+ InstrStage<3, [A8_LSPipe]>],
+ [2, 2, 3, 3, 1]>,
+ //
+ // VLD4dupu
+ InstrItinData<IIC_VLD4dupu, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrStage<3, [A8_NLSPipe], 0>,
+ InstrStage<3, [A8_LSPipe]>],
+ [2, 2, 3, 3, 2, 1, 1]>,
+ //
+ // VST1
+ InstrItinData<IIC_VST1, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NLSPipe], 0>,
+ InstrStage<2, [A8_LSPipe]>],
+ [1, 1, 1]>,
+ //
+ // VST1x2
+ InstrItinData<IIC_VST1x2, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NLSPipe], 0>,
+ InstrStage<2, [A8_LSPipe]>],
+ [1, 1, 1, 1]>,
+ //
+ // VST1x3
+ InstrItinData<IIC_VST1x3, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<3, [A8_NLSPipe], 0>,
+ InstrStage<3, [A8_LSPipe]>],
+ [1, 1, 1, 1, 2]>,
+ //
+ // VST1x4
+ InstrItinData<IIC_VST1x4, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<3, [A8_NLSPipe], 0>,
+ InstrStage<3, [A8_LSPipe]>],
+ [1, 1, 1, 1, 2, 2]>,
+ //
+ // VST1u
+ InstrItinData<IIC_VST1u, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NLSPipe], 0>,
+ InstrStage<2, [A8_LSPipe]>],
+ [2, 1, 1, 1, 1]>,
+ //
+ // VST1x2u
+ InstrItinData<IIC_VST1x2u, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NLSPipe], 0>,
+ InstrStage<2, [A8_LSPipe]>],
+ [2, 1, 1, 1, 1, 1]>,
+ //
+ // VST1x3u
+ InstrItinData<IIC_VST1x3u, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<3, [A8_NLSPipe], 0>,
+ InstrStage<3, [A8_LSPipe]>],
+ [2, 1, 1, 1, 1, 1, 2]>,
+ //
+ // VST1x4u
+ InstrItinData<IIC_VST1x4u, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<3, [A8_NLSPipe], 0>,
+ InstrStage<3, [A8_LSPipe]>],
+ [2, 1, 1, 1, 1, 1, 2, 2]>,
+ //
+ // VST1ln
+ InstrItinData<IIC_VST1ln, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrStage<2, [A8_NLSPipe], 0>,
+ InstrStage<2, [A8_LSPipe]>],
+ [1, 1, 1]>,
+ //
+ // VST1lnu
+ InstrItinData<IIC_VST1lnu, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrStage<2, [A8_NLSPipe], 0>,
+ InstrStage<2, [A8_LSPipe]>],
+ [2, 1, 1, 1, 1]>,
+ //
+ // VST2
+ InstrItinData<IIC_VST2, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NLSPipe], 0>,
+ InstrStage<2, [A8_LSPipe]>],
+ [1, 1, 1, 1]>,
+ //
+ // VST2x2
+ InstrItinData<IIC_VST2x2, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<4, [A8_NLSPipe], 0>,
+ InstrStage<4, [A8_LSPipe]>],
+ [1, 1, 1, 1, 2, 2]>,
+ //
+ // VST2u
+ InstrItinData<IIC_VST2u, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NLSPipe], 0>,
+ InstrStage<2, [A8_LSPipe]>],
+ [2, 1, 1, 1, 1, 1]>,
+ //
+ // VST2x2u
+ InstrItinData<IIC_VST2x2u, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<4, [A8_NLSPipe], 0>,
+ InstrStage<4, [A8_LSPipe]>],
+ [2, 1, 1, 1, 1, 1, 2, 2]>,
+ //
+ // VST2ln
+ InstrItinData<IIC_VST2ln, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NLSPipe], 0>,
+ InstrStage<2, [A8_LSPipe]>],
+ [1, 1, 1, 1]>,
+ //
+ // VST2lnu
+ InstrItinData<IIC_VST2lnu, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NLSPipe], 0>,
+ InstrStage<2, [A8_LSPipe]>],
+ [2, 1, 1, 1, 1, 1]>,
+ //
+ // VST3
+ InstrItinData<IIC_VST3, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<3, [A8_NLSPipe], 0>,
+ InstrStage<3, [A8_LSPipe]>],
+ [1, 1, 1, 1, 2]>,
+ //
+ // VST3u
+ InstrItinData<IIC_VST3u, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<3, [A8_NLSPipe], 0>,
+ InstrStage<3, [A8_LSPipe]>],
+ [2, 1, 1, 1, 1, 1, 2]>,
+ //
+ // VST3ln
+ InstrItinData<IIC_VST3ln, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<3, [A8_NLSPipe], 0>,
+ InstrStage<3, [A8_LSPipe]>],
+ [1, 1, 1, 1, 2]>,
+ //
+ // VST3lnu
+ InstrItinData<IIC_VST3lnu, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<3, [A8_NLSPipe], 0>,
+ InstrStage<3, [A8_LSPipe]>],
+ [2, 1, 1, 1, 1, 1, 2]>,
+ //
+ // VST4
+ InstrItinData<IIC_VST4, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<4, [A8_NLSPipe], 0>,
+ InstrStage<4, [A8_LSPipe]>],
+ [1, 1, 1, 1, 2, 2]>,
+ //
+ // VST4u
+ InstrItinData<IIC_VST4u, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<4, [A8_NLSPipe], 0>,
+ InstrStage<4, [A8_LSPipe]>],
+ [2, 1, 1, 1, 1, 1, 2, 2]>,
+ //
+ // VST4ln
+ InstrItinData<IIC_VST4ln, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<4, [A8_NLSPipe], 0>,
+ InstrStage<4, [A8_LSPipe]>],
+ [1, 1, 1, 1, 2, 2]>,
+ //
+ // VST4lnu
+ InstrItinData<IIC_VST4lnu, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<4, [A8_NLSPipe], 0>,
+ InstrStage<4, [A8_LSPipe]>],
+ [2, 1, 1, 1, 1, 1, 2, 2]>,
+ //
+ // Double-register FP Unary
+ InstrItinData<IIC_VUNAD, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [5, 2]>,
+ //
+ // Quad-register FP Unary
+ // Result written in N5, but that is relative to the last cycle of multicycle,
+ // so we use 6 for those cases
+ InstrItinData<IIC_VUNAQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NPipe]>], [6, 2]>,
+ //
+ // Double-register FP Binary
+ InstrItinData<IIC_VBIND, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [5, 2, 2]>,
+ //
+ // VPADD, etc.
+ InstrItinData<IIC_VPBIND, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [5, 2, 2]>,
+ //
+ // Double-register FP VMUL
+ InstrItinData<IIC_VFMULD, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [5, 2, 1]>,
+
+ //
+ // Quad-register FP Binary
+ // Result written in N5, but that is relative to the last cycle of multicycle,
+ // so we use 6 for those cases
+ InstrItinData<IIC_VBINQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NPipe]>], [6, 2, 2]>,
+ //
+ // Quad-register FP VMUL
+ InstrItinData<IIC_VFMULQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [6, 2, 1]>,
+ //
+ // Move
+ InstrItinData<IIC_VMOV, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [1, 1]>,
+ //
+ // Move Immediate
+ InstrItinData<IIC_VMOVImm, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [3]>,
+ //
+ // Double-register Permute Move
+ InstrItinData<IIC_VMOVD, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NLSPipe]>], [2, 1]>,
+ //
+ // Quad-register Permute Move
+ // Result written in N2, but that is relative to the last cycle of multicycle,
+ // so we use 3 for those cases
+ InstrItinData<IIC_VMOVQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NLSPipe]>], [3, 1]>,
+ //
+ // Integer to Single-precision Move
+ InstrItinData<IIC_VMOVIS , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NLSPipe]>], [2, 1]>,
+ //
+ // Integer to Double-precision Move
+ InstrItinData<IIC_VMOVID , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NLSPipe]>], [2, 1, 1]>,
+ //
+ // Single-precision to Integer Move
+ InstrItinData<IIC_VMOVSI , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NLSPipe]>], [20, 1]>,
+ //
+ // Double-precision to Integer Move
+ InstrItinData<IIC_VMOVDI , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NLSPipe]>], [20, 20, 1]>,
+ //
+ // Integer to Lane Move
+ InstrItinData<IIC_VMOVISL , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NLSPipe]>], [3, 1, 1]>,
+ //
+ // Vector narrow move
+ InstrItinData<IIC_VMOVN , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [2, 1]>,
+ //
+ // Double-register Permute
+ InstrItinData<IIC_VPERMD, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NLSPipe]>], [2, 2, 1, 1]>,
+ //
+ // Quad-register Permute
+ // Result written in N2, but that is relative to the last cycle of multicycle,
+ // so we use 3 for those cases
+ InstrItinData<IIC_VPERMQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NLSPipe]>], [3, 3, 1, 1]>,
+ //
+ // Quad-register Permute (3 cycle issue)
+ // Result written in N2, but that is relative to the last cycle of multicycle,
+ // so we use 4 for those cases
+ InstrItinData<IIC_VPERMQ3, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NLSPipe]>,
+ InstrStage<1, [A8_NPipe], 0>,
+ InstrStage<2, [A8_NLSPipe]>], [4, 4, 1, 1]>,
+ //
+ // Double-register FP Multiple-Accumulate
+ InstrItinData<IIC_VMACD, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [9, 3, 2, 2]>,
+ //
+ // Quad-register FP Multiple-Accumulate
+ // Result written in N9, but that is relative to the last cycle of multicycle,
+ // so we use 10 for those cases
+ InstrItinData<IIC_VMACQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NPipe]>], [10, 3, 2, 2]>,
+ //
+ // Double-register Fused FP Multiple-Accumulate
+ InstrItinData<IIC_VFMACD, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [9, 3, 2, 2]>,
+ //
+ // Quad-register Fused FP Multiple-Accumulate
+ // Result written in N9, but that is relative to the last cycle of multicycle,
+ // so we use 10 for those cases
+ InstrItinData<IIC_VFMACQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NPipe]>], [10, 3, 2, 2]>,
+ //
+ // Double-register Reciprical Step
+ InstrItinData<IIC_VRECSD, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [9, 2, 2]>,
+ //
+ // Quad-register Reciprical Step
+ InstrItinData<IIC_VRECSQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NPipe]>], [10, 2, 2]>,
+ //
+ // Double-register Integer Count
+ InstrItinData<IIC_VCNTiD, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [3, 2, 2]>,
+ //
+ // Quad-register Integer Count
+ // Result written in N3, but that is relative to the last cycle of multicycle,
+ // so we use 4 for those cases
+ InstrItinData<IIC_VCNTiQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NPipe]>], [4, 2, 2]>,
+ //
+ // Double-register Integer Unary
+ InstrItinData<IIC_VUNAiD, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [4, 2]>,
+ //
+ // Quad-register Integer Unary
+ InstrItinData<IIC_VUNAiQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [4, 2]>,
+ //
+ // Double-register Integer Q-Unary
+ InstrItinData<IIC_VQUNAiD, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [4, 1]>,
+ //
+ // Quad-register Integer CountQ-Unary
+ InstrItinData<IIC_VQUNAiQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [4, 1]>,
+ //
+ // Double-register Integer Binary
+ InstrItinData<IIC_VBINiD, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [3, 2, 2]>,
+ //
+ // Quad-register Integer Binary
+ InstrItinData<IIC_VBINiQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [3, 2, 2]>,
+ //
+ // Double-register Integer Binary (4 cycle)
+ InstrItinData<IIC_VBINi4D, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [4, 2, 1]>,
+ //
+ // Quad-register Integer Binary (4 cycle)
+ InstrItinData<IIC_VBINi4Q, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [4, 2, 1]>,
+
+ //
+ // Double-register Integer Subtract
+ InstrItinData<IIC_VSUBiD, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [3, 2, 1]>,
+ //
+ // Quad-register Integer Subtract
+ InstrItinData<IIC_VSUBiQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [3, 2, 1]>,
+ //
+ // Double-register Integer Subtract
+ InstrItinData<IIC_VSUBi4D, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [4, 2, 1]>,
+ //
+ // Quad-register Integer Subtract
+ InstrItinData<IIC_VSUBi4Q, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [4, 2, 1]>,
+ //
+ // Double-register Integer Shift
+ InstrItinData<IIC_VSHLiD, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [3, 1, 1]>,
+ //
+ // Quad-register Integer Shift
+ InstrItinData<IIC_VSHLiQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NPipe]>], [4, 1, 1]>,
+ //
+ // Double-register Integer Shift (4 cycle)
+ InstrItinData<IIC_VSHLi4D, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [4, 1, 1]>,
+ //
+ // Quad-register Integer Shift (4 cycle)
+ InstrItinData<IIC_VSHLi4Q, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NPipe]>], [5, 1, 1]>,
+ //
+ // Double-register Integer Pair Add Long
+ InstrItinData<IIC_VPALiD, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [6, 3, 1]>,
+ //
+ // Quad-register Integer Pair Add Long
+ InstrItinData<IIC_VPALiQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NPipe]>], [7, 3, 1]>,
+ //
+ // Double-register Absolute Difference and Accumulate
+ InstrItinData<IIC_VABAD, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [6, 3, 2, 1]>,
+ //
+ // Quad-register Absolute Difference and Accumulate
+ InstrItinData<IIC_VABAQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NPipe]>], [6, 3, 2, 1]>,
+
+ //
+ // Double-register Integer Multiply (.8, .16)
+ InstrItinData<IIC_VMULi16D, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [6, 2, 2]>,
+ //
+ // Double-register Integer Multiply (.32)
+ InstrItinData<IIC_VMULi32D, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NPipe]>], [7, 2, 1]>,
+ //
+ // Quad-register Integer Multiply (.8, .16)
+ InstrItinData<IIC_VMULi16Q, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NPipe]>], [7, 2, 2]>,
+ //
+ // Quad-register Integer Multiply (.32)
+ InstrItinData<IIC_VMULi32Q, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>,
+ InstrStage<2, [A8_NLSPipe], 0>,
+ InstrStage<3, [A8_NPipe]>], [9, 2, 1]>,
+ //
+ // Double-register Integer Multiply-Accumulate (.8, .16)
+ InstrItinData<IIC_VMACi16D, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [6, 3, 2, 2]>,
+ //
+ // Double-register Integer Multiply-Accumulate (.32)
+ InstrItinData<IIC_VMACi32D, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NPipe]>], [7, 3, 2, 1]>,
+ //
+ // Quad-register Integer Multiply-Accumulate (.8, .16)
+ InstrItinData<IIC_VMACi16Q, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NPipe]>], [7, 3, 2, 2]>,
+ //
+ // Quad-register Integer Multiply-Accumulate (.32)
+ InstrItinData<IIC_VMACi32Q, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>,
+ InstrStage<2, [A8_NLSPipe], 0>,
+ InstrStage<3, [A8_NPipe]>], [9, 3, 2, 1]>,
+ //
+ // Double-register VEXT
+ InstrItinData<IIC_VEXTD, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NLSPipe]>], [2, 1, 1]>,
+ //
+ // Quad-register VEXT
+ InstrItinData<IIC_VEXTQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NLSPipe]>], [3, 1, 1]>,
+ //
+ // VTB
+ InstrItinData<IIC_VTB1, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NLSPipe]>], [3, 2, 1]>,
+ InstrItinData<IIC_VTB2, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NLSPipe]>], [3, 2, 2, 1]>,
+ InstrItinData<IIC_VTB3, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NLSPipe]>,
+ InstrStage<1, [A8_NPipe], 0>,
+ InstrStage<2, [A8_NLSPipe]>], [4, 2, 2, 3, 1]>,
+ InstrItinData<IIC_VTB4, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NLSPipe]>,
+ InstrStage<1, [A8_NPipe], 0>,
+ InstrStage<2, [A8_NLSPipe]>],[4, 2, 2, 3, 3, 1]>,
+ //
+ // VTBX
+ InstrItinData<IIC_VTBX1, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NLSPipe]>], [3, 1, 2, 1]>,
+ InstrItinData<IIC_VTBX2, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NLSPipe]>], [3, 1, 2, 2, 1]>,
+ InstrItinData<IIC_VTBX3, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NLSPipe]>,
+ InstrStage<1, [A8_NPipe], 0>,
+ InstrStage<2, [A8_NLSPipe]>],[4, 1, 2, 2, 3, 1]>,
+ InstrItinData<IIC_VTBX4, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NLSPipe]>,
+ InstrStage<1, [A8_NPipe], 0>,
+ InstrStage<2, [A8_NLSPipe]>], [4, 1, 2, 2, 3, 3, 1]>
+]>;
+
+// ===---------------------------------------------------------------------===//
+// This following definitions describe the simple machine model which
+// will replace itineraries.
+
+// Cortex-A8 machine model for scheduling and other instruction cost heuristics.
+def CortexA8Model : SchedMachineModel {
+ let IssueWidth = 2; // 2 micro-ops are dispatched per cycle.
+ let MinLatency = -1; // OperandCycles are interpreted as MinLatency.
+ let LoadLatency = 2; // Optimistic load latency assuming bypass.
+ // This is overriden by OperandCycles if the
+ // Itineraries are queried instead.
+ let MispredictPenalty = 13; // Based on estimate of pipeline depth.
+
+ let Itineraries = CortexA8Itineraries;
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMScheduleA9.td b/contrib/llvm/lib/Target/ARM/ARMScheduleA9.td
new file mode 100644
index 0000000..9a1d222
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMScheduleA9.td
@@ -0,0 +1,2529 @@
+//=- ARMScheduleA9.td - ARM Cortex-A9 Scheduling Definitions -*- tablegen -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the itinerary class data for the ARM Cortex A9 processors.
+//
+//===----------------------------------------------------------------------===//
+
+// ===---------------------------------------------------------------------===//
+// This section contains legacy support for itineraries. This is
+// required until SD and PostRA schedulers are replaced by MachineScheduler.
+
+//
+// Ad-hoc scheduling information derived from pretty vague "Cortex-A9 Technical
+// Reference Manual".
+//
+// Functional units
+def A9_Issue0 : FuncUnit; // Issue 0
+def A9_Issue1 : FuncUnit; // Issue 1
+def A9_Branch : FuncUnit; // Branch
+def A9_ALU0 : FuncUnit; // ALU / MUL pipeline 0
+def A9_ALU1 : FuncUnit; // ALU pipeline 1
+def A9_AGU : FuncUnit; // Address generation unit for ld / st
+def A9_NPipe : FuncUnit; // NEON pipeline
+def A9_MUX0 : FuncUnit; // AGU + NEON/FPU multiplexer
+def A9_LSUnit : FuncUnit; // L/S Unit
+def A9_DRegsVFP: FuncUnit; // FP register set, VFP side
+def A9_DRegsN : FuncUnit; // FP register set, NEON side
+
+// Bypasses
+def A9_LdBypass : Bypass;
+
+def CortexA9Itineraries : ProcessorItineraries<
+ [A9_Issue0, A9_Issue1, A9_Branch, A9_ALU0, A9_ALU1, A9_AGU, A9_NPipe, A9_MUX0,
+ A9_LSUnit, A9_DRegsVFP, A9_DRegsN],
+ [A9_LdBypass], [
+ // Two fully-pipelined integer ALU pipelines
+
+ //
+ // Move instructions, unconditional
+ InstrItinData<IIC_iMOVi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_ALU0, A9_ALU1]>], [1]>,
+ InstrItinData<IIC_iMOVr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
+ InstrItinData<IIC_iMOVsi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
+ InstrItinData<IIC_iMOVsr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<2, [A9_ALU0, A9_ALU1]>], [2, 1, 1]>,
+ InstrItinData<IIC_iMOVix2 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_ALU0, A9_ALU1]>,
+ InstrStage<1, [A9_ALU0, A9_ALU1]>], [2]>,
+ InstrItinData<IIC_iMOVix2addpc,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_ALU0, A9_ALU1]>,
+ InstrStage<1, [A9_ALU0, A9_ALU1]>,
+ InstrStage<1, [A9_ALU0, A9_ALU1]>], [3]>,
+ InstrItinData<IIC_iMOVix2ld,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_ALU0, A9_ALU1]>,
+ InstrStage<1, [A9_ALU0, A9_ALU1]>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_AGU], 0>,
+ InstrStage<1, [A9_LSUnit]>], [5]>,
+ //
+ // MVN instructions
+ InstrItinData<IIC_iMVNi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_ALU0, A9_ALU1]>],
+ [1]>,
+ InstrItinData<IIC_iMVNr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_ALU0, A9_ALU1]>],
+ [1, 1], [NoBypass, A9_LdBypass]>,
+ InstrItinData<IIC_iMVNsi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<2, [A9_ALU0, A9_ALU1]>],
+ [2, 1]>,
+ InstrItinData<IIC_iMVNsr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<3, [A9_ALU0, A9_ALU1]>],
+ [3, 1, 1]>,
+ //
+ // No operand cycles
+ InstrItinData<IIC_iALUx , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_ALU0, A9_ALU1]>]>,
+ //
+ // Binary Instructions that produce a result
+ InstrItinData<IIC_iALUi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_ALU0, A9_ALU1]>],
+ [1, 1], [NoBypass, A9_LdBypass]>,
+ InstrItinData<IIC_iALUr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_ALU0, A9_ALU1]>],
+ [1, 1, 1], [NoBypass, A9_LdBypass, A9_LdBypass]>,
+ InstrItinData<IIC_iALUsi, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<2, [A9_ALU0, A9_ALU1]>],
+ [2, 1, 1], [NoBypass, A9_LdBypass, NoBypass]>,
+ InstrItinData<IIC_iALUsir,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<2, [A9_ALU0, A9_ALU1]>],
+ [2, 1, 1], [NoBypass, NoBypass, A9_LdBypass]>,
+ InstrItinData<IIC_iALUsr, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<3, [A9_ALU0, A9_ALU1]>],
+ [3, 1, 1, 1],
+ [NoBypass, A9_LdBypass, NoBypass, NoBypass]>,
+ //
+ // Bitwise Instructions that produce a result
+ InstrItinData<IIC_iBITi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
+ InstrItinData<IIC_iBITr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1, 1]>,
+ InstrItinData<IIC_iBITsi, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<2, [A9_ALU0, A9_ALU1]>], [2, 1, 1]>,
+ InstrItinData<IIC_iBITsr, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<3, [A9_ALU0, A9_ALU1]>], [3, 1, 1, 1]>,
+ //
+ // Unary Instructions that produce a result
+
+ // CLZ, RBIT, etc.
+ InstrItinData<IIC_iUNAr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
+
+ // BFC, BFI, UBFX, SBFX
+ InstrItinData<IIC_iUNAsi, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<2, [A9_ALU0, A9_ALU1]>], [2, 1]>,
+
+ //
+ // Zero and sign extension instructions
+ InstrItinData<IIC_iEXTr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_ALU0, A9_ALU1]>], [2, 1]>,
+ InstrItinData<IIC_iEXTAr, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<2, [A9_ALU0, A9_ALU1]>], [3, 1, 1]>,
+ InstrItinData<IIC_iEXTAsr,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<3, [A9_ALU0, A9_ALU1]>], [3, 1, 1, 1]>,
+ //
+ // Compare instructions
+ InstrItinData<IIC_iCMPi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_ALU0, A9_ALU1]>],
+ [1], [A9_LdBypass]>,
+ InstrItinData<IIC_iCMPr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_ALU0, A9_ALU1]>],
+ [1, 1], [A9_LdBypass, A9_LdBypass]>,
+ InstrItinData<IIC_iCMPsi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<2, [A9_ALU0, A9_ALU1]>],
+ [1, 1], [A9_LdBypass, NoBypass]>,
+ InstrItinData<IIC_iCMPsr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<3, [A9_ALU0, A9_ALU1]>],
+ [1, 1, 1], [A9_LdBypass, NoBypass, NoBypass]>,
+ //
+ // Test instructions
+ InstrItinData<IIC_iTSTi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_ALU0, A9_ALU1]>], [1]>,
+ InstrItinData<IIC_iTSTr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
+ InstrItinData<IIC_iTSTsi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<2, [A9_ALU0, A9_ALU1]>], [1, 1]>,
+ InstrItinData<IIC_iTSTsr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<3, [A9_ALU0, A9_ALU1]>], [1, 1, 1]>,
+ //
+ // Move instructions, conditional
+ // FIXME: Correctly model the extra input dep on the destination.
+ InstrItinData<IIC_iCMOVi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_ALU0, A9_ALU1]>], [1]>,
+ InstrItinData<IIC_iCMOVr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
+ InstrItinData<IIC_iCMOVsi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
+ InstrItinData<IIC_iCMOVsr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<2, [A9_ALU0, A9_ALU1]>], [2, 1, 1]>,
+ InstrItinData<IIC_iCMOVix2, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_ALU0, A9_ALU1]>,
+ InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_ALU0, A9_ALU1]>], [2]>,
+
+ // Integer multiply pipeline
+ //
+ InstrItinData<IIC_iMUL16 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<2, [A9_ALU0]>], [3, 1, 1]>,
+ InstrItinData<IIC_iMAC16 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<2, [A9_ALU0]>],
+ [3, 1, 1, 1]>,
+ InstrItinData<IIC_iMUL32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<2, [A9_ALU0]>], [4, 1, 1]>,
+ InstrItinData<IIC_iMAC32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<2, [A9_ALU0]>],
+ [4, 1, 1, 1]>,
+ InstrItinData<IIC_iMUL64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<3, [A9_ALU0]>], [4, 5, 1, 1]>,
+ InstrItinData<IIC_iMAC64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<3, [A9_ALU0]>],
+ [4, 5, 1, 1]>,
+ // Integer load pipeline
+ // FIXME: The timings are some rough approximations
+ //
+ // Immediate offset
+ InstrItinData<IIC_iLoad_i , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_AGU], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [3, 1], [A9_LdBypass]>,
+ InstrItinData<IIC_iLoad_bh_i, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<2, [A9_AGU], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [4, 1], [A9_LdBypass]>,
+ // FIXME: If address is 64-bit aligned, AGU cycles is 1.
+ InstrItinData<IIC_iLoad_d_i , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<2, [A9_AGU], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [3, 3, 1], [A9_LdBypass]>,
+ //
+ // Register offset
+ InstrItinData<IIC_iLoad_r , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_AGU], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [3, 1, 1], [A9_LdBypass]>,
+ InstrItinData<IIC_iLoad_bh_r, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<2, [A9_AGU], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [4, 1, 1], [A9_LdBypass]>,
+ InstrItinData<IIC_iLoad_d_r , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<2, [A9_AGU], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [3, 3, 1, 1], [A9_LdBypass]>,
+ //
+ // Scaled register offset
+ InstrItinData<IIC_iLoad_si , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_AGU], 0>,
+ InstrStage<1, [A9_LSUnit], 0>],
+ [4, 1, 1], [A9_LdBypass]>,
+ InstrItinData<IIC_iLoad_bh_si,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<2, [A9_AGU], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [5, 1, 1], [A9_LdBypass]>,
+ //
+ // Immediate offset with update
+ InstrItinData<IIC_iLoad_iu , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_AGU], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [3, 2, 1], [A9_LdBypass]>,
+ InstrItinData<IIC_iLoad_bh_iu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<2, [A9_AGU], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [4, 3, 1], [A9_LdBypass]>,
+ //
+ // Register offset with update
+ InstrItinData<IIC_iLoad_ru , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_AGU], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [3, 2, 1, 1], [A9_LdBypass]>,
+ InstrItinData<IIC_iLoad_bh_ru,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<2, [A9_AGU], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [4, 3, 1, 1], [A9_LdBypass]>,
+ InstrItinData<IIC_iLoad_d_ru, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<2, [A9_AGU], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [3, 3, 1, 1], [A9_LdBypass]>,
+ //
+ // Scaled register offset with update
+ InstrItinData<IIC_iLoad_siu , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_AGU], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [4, 3, 1, 1], [A9_LdBypass]>,
+ InstrItinData<IIC_iLoad_bh_siu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<2, [A9_AGU], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [5, 4, 1, 1], [A9_LdBypass]>,
+ //
+ // Load multiple, def is the 5th operand.
+ // FIXME: This assumes 3 to 4 registers.
+ InstrItinData<IIC_iLoad_m , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<2, [A9_AGU], 1>,
+ InstrStage<2, [A9_LSUnit]>],
+ [1, 1, 1, 1, 3],
+ [NoBypass, NoBypass, NoBypass, NoBypass, A9_LdBypass],
+ -1>, // dynamic uops
+ //
+ // Load multiple + update, defs are the 1st and 5th operands.
+ InstrItinData<IIC_iLoad_mu , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<2, [A9_AGU], 1>,
+ InstrStage<2, [A9_LSUnit]>],
+ [2, 1, 1, 1, 3],
+ [NoBypass, NoBypass, NoBypass, NoBypass, A9_LdBypass],
+ -1>, // dynamic uops
+ //
+ // Load multiple plus branch
+ InstrItinData<IIC_iLoad_mBr, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_AGU], 1>,
+ InstrStage<2, [A9_LSUnit]>,
+ InstrStage<1, [A9_Branch]>],
+ [1, 2, 1, 1, 3],
+ [NoBypass, NoBypass, NoBypass, NoBypass, A9_LdBypass],
+ -1>, // dynamic uops
+ //
+ // Pop, def is the 3rd operand.
+ InstrItinData<IIC_iPop , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<2, [A9_AGU], 1>,
+ InstrStage<2, [A9_LSUnit]>],
+ [1, 1, 3],
+ [NoBypass, NoBypass, A9_LdBypass],
+ -1>, // dynamic uops
+ //
+ // Pop + branch, def is the 3rd operand.
+ InstrItinData<IIC_iPop_Br, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<2, [A9_AGU], 1>,
+ InstrStage<2, [A9_LSUnit]>,
+ InstrStage<1, [A9_Branch]>],
+ [1, 1, 3],
+ [NoBypass, NoBypass, A9_LdBypass],
+ -1>, // dynamic uops
+ //
+ // iLoadi + iALUr for t2LDRpci_pic.
+ InstrItinData<IIC_iLoadiALU, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_AGU], 0>,
+ InstrStage<1, [A9_LSUnit]>,
+ InstrStage<1, [A9_ALU0, A9_ALU1]>],
+ [2, 1]>,
+
+ // Integer store pipeline
+ ///
+ // Immediate offset
+ InstrItinData<IIC_iStore_i , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_AGU], 0>,
+ InstrStage<1, [A9_LSUnit]>], [1, 1]>,
+ InstrItinData<IIC_iStore_bh_i,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<2, [A9_AGU], 1>,
+ InstrStage<1, [A9_LSUnit]>], [1, 1]>,
+ // FIXME: If address is 64-bit aligned, AGU cycles is 1.
+ InstrItinData<IIC_iStore_d_i, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<2, [A9_AGU], 1>,
+ InstrStage<1, [A9_LSUnit]>], [1, 1]>,
+ //
+ // Register offset
+ InstrItinData<IIC_iStore_r , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_AGU], 0>,
+ InstrStage<1, [A9_LSUnit]>], [1, 1, 1]>,
+ InstrItinData<IIC_iStore_bh_r,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<2, [A9_AGU], 1>,
+ InstrStage<1, [A9_LSUnit]>], [1, 1, 1]>,
+ InstrItinData<IIC_iStore_d_r, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<2, [A9_AGU], 1>,
+ InstrStage<1, [A9_LSUnit]>], [1, 1, 1]>,
+ //
+ // Scaled register offset
+ InstrItinData<IIC_iStore_si , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_AGU], 0>,
+ InstrStage<1, [A9_LSUnit]>], [1, 1, 1]>,
+ InstrItinData<IIC_iStore_bh_si,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<2, [A9_AGU], 1>,
+ InstrStage<1, [A9_LSUnit]>], [1, 1, 1]>,
+ //
+ // Immediate offset with update
+ InstrItinData<IIC_iStore_iu , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_AGU], 0>,
+ InstrStage<1, [A9_LSUnit]>], [2, 1, 1]>,
+ InstrItinData<IIC_iStore_bh_iu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<2, [A9_AGU], 1>,
+ InstrStage<1, [A9_LSUnit]>], [3, 1, 1]>,
+ //
+ // Register offset with update
+ InstrItinData<IIC_iStore_ru , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_AGU], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [2, 1, 1, 1]>,
+ InstrItinData<IIC_iStore_bh_ru,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<2, [A9_AGU], 1>,
+ InstrStage<1, [A9_LSUnit]>],
+ [3, 1, 1, 1]>,
+ InstrItinData<IIC_iStore_d_ru, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<2, [A9_AGU], 1>,
+ InstrStage<1, [A9_LSUnit]>],
+ [3, 1, 1, 1]>,
+ //
+ // Scaled register offset with update
+ InstrItinData<IIC_iStore_siu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_AGU], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [2, 1, 1, 1]>,
+ InstrItinData<IIC_iStore_bh_siu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<2, [A9_AGU], 1>,
+ InstrStage<1, [A9_LSUnit]>],
+ [3, 1, 1, 1]>,
+ //
+ // Store multiple
+ InstrItinData<IIC_iStore_m , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_AGU], 0>,
+ InstrStage<2, [A9_LSUnit]>],
+ [], [], -1>, // dynamic uops
+ //
+ // Store multiple + update
+ InstrItinData<IIC_iStore_mu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_AGU], 0>,
+ InstrStage<2, [A9_LSUnit]>],
+ [2], [], -1>, // dynamic uops
+ //
+ // Preload
+ InstrItinData<IIC_Preload, [InstrStage<1, [A9_Issue0, A9_Issue1]>], [1, 1]>,
+
+ // Branch
+ //
+ // no delay slots, so the latency of a branch is unimportant
+ InstrItinData<IIC_Br , [InstrStage<1, [A9_Issue0], 0>,
+ InstrStage<1, [A9_Issue1], 0>,
+ InstrStage<1, [A9_Branch]>]>,
+
+ // VFP and NEON shares the same register file. This means that every VFP
+ // instruction should wait for full completion of the consecutive NEON
+ // instruction and vice-versa. We model this behavior with two artificial FUs:
+ // DRegsVFP and DRegsVFP.
+ //
+ // Every VFP instruction:
+ // - Acquires DRegsVFP resource for 1 cycle
+ // - Reserves DRegsN resource for the whole duration (including time to
+ // register file writeback!).
+ // Every NEON instruction does the same but with FUs swapped.
+ //
+ // Since the reserved FU cannot be acquired, this models precisely
+ // "cross-domain" stalls.
+
+ // VFP
+ // Issue through integer pipeline, and execute in NEON unit.
+
+ // FP Special Register to Integer Register File Move
+ InstrItinData<IIC_fpSTAT , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ InstrStage<2, [A9_DRegsN], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [1]>,
+ //
+ // Single-precision FP Unary
+ InstrItinData<IIC_fpUNA32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ // Extra latency cycles since wbck is 2 cycles
+ InstrStage<3, [A9_DRegsN], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [1, 1]>,
+ //
+ // Double-precision FP Unary
+ InstrItinData<IIC_fpUNA64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ // Extra latency cycles since wbck is 2 cycles
+ InstrStage<3, [A9_DRegsN], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [1, 1]>,
+
+ //
+ // Single-precision FP Compare
+ InstrItinData<IIC_fpCMP32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ // Extra latency cycles since wbck is 4 cycles
+ InstrStage<5, [A9_DRegsN], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [1, 1]>,
+ //
+ // Double-precision FP Compare
+ InstrItinData<IIC_fpCMP64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ // Extra latency cycles since wbck is 4 cycles
+ InstrStage<5, [A9_DRegsN], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [1, 1]>,
+ //
+ // Single to Double FP Convert
+ InstrItinData<IIC_fpCVTSD , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ InstrStage<5, [A9_DRegsN], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [4, 1]>,
+ //
+ // Double to Single FP Convert
+ InstrItinData<IIC_fpCVTDS , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ InstrStage<5, [A9_DRegsN], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [4, 1]>,
+
+ //
+ // Single to Half FP Convert
+ InstrItinData<IIC_fpCVTSH , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ InstrStage<5, [A9_DRegsN], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [4, 1]>,
+ //
+ // Half to Single FP Convert
+ InstrItinData<IIC_fpCVTHS , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ InstrStage<3, [A9_DRegsN], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [2, 1]>,
+
+ //
+ // Single-Precision FP to Integer Convert
+ InstrItinData<IIC_fpCVTSI , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ InstrStage<5, [A9_DRegsN], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [4, 1]>,
+ //
+ // Double-Precision FP to Integer Convert
+ InstrItinData<IIC_fpCVTDI , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ InstrStage<5, [A9_DRegsN], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [4, 1]>,
+ //
+ // Integer to Single-Precision FP Convert
+ InstrItinData<IIC_fpCVTIS , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ InstrStage<5, [A9_DRegsN], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [4, 1]>,
+ //
+ // Integer to Double-Precision FP Convert
+ InstrItinData<IIC_fpCVTID , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ InstrStage<5, [A9_DRegsN], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [4, 1]>,
+ //
+ // Single-precision FP ALU
+ InstrItinData<IIC_fpALU32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ InstrStage<5, [A9_DRegsN], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [4, 1, 1]>,
+ //
+ // Double-precision FP ALU
+ InstrItinData<IIC_fpALU64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ InstrStage<5, [A9_DRegsN], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [4, 1, 1]>,
+ //
+ // Single-precision FP Multiply
+ InstrItinData<IIC_fpMUL32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ InstrStage<6, [A9_DRegsN], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [5, 1, 1]>,
+ //
+ // Double-precision FP Multiply
+ InstrItinData<IIC_fpMUL64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ InstrStage<7, [A9_DRegsN], 0, Reserved>,
+ InstrStage<2, [A9_NPipe]>],
+ [6, 1, 1]>,
+ //
+ // Single-precision FP MAC
+ InstrItinData<IIC_fpMAC32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ InstrStage<9, [A9_DRegsN], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [8, 1, 1, 1]>,
+ //
+ // Double-precision FP MAC
+ InstrItinData<IIC_fpMAC64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ InstrStage<10, [A9_DRegsN], 0, Reserved>,
+ InstrStage<2, [A9_NPipe]>],
+ [9, 1, 1, 1]>,
+ //
+ // Single-precision Fused FP MAC
+ InstrItinData<IIC_fpFMAC32, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ InstrStage<9, [A9_DRegsN], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [8, 1, 1, 1]>,
+ //
+ // Double-precision Fused FP MAC
+ InstrItinData<IIC_fpFMAC64, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ InstrStage<10, [A9_DRegsN], 0, Reserved>,
+ InstrStage<2, [A9_NPipe]>],
+ [9, 1, 1, 1]>,
+ //
+ // Single-precision FP DIV
+ InstrItinData<IIC_fpDIV32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ InstrStage<16, [A9_DRegsN], 0, Reserved>,
+ InstrStage<10, [A9_NPipe]>],
+ [15, 1, 1]>,
+ //
+ // Double-precision FP DIV
+ InstrItinData<IIC_fpDIV64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ InstrStage<26, [A9_DRegsN], 0, Reserved>,
+ InstrStage<20, [A9_NPipe]>],
+ [25, 1, 1]>,
+ //
+ // Single-precision FP SQRT
+ InstrItinData<IIC_fpSQRT32, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ InstrStage<18, [A9_DRegsN], 0, Reserved>,
+ InstrStage<13, [A9_NPipe]>],
+ [17, 1]>,
+ //
+ // Double-precision FP SQRT
+ InstrItinData<IIC_fpSQRT64, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ InstrStage<33, [A9_DRegsN], 0, Reserved>,
+ InstrStage<28, [A9_NPipe]>],
+ [32, 1]>,
+
+ //
+ // Integer to Single-precision Move
+ InstrItinData<IIC_fpMOVIS, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ // Extra 1 latency cycle since wbck is 2 cycles
+ InstrStage<3, [A9_DRegsN], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [1, 1]>,
+ //
+ // Integer to Double-precision Move
+ InstrItinData<IIC_fpMOVID, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ // Extra 1 latency cycle since wbck is 2 cycles
+ InstrStage<3, [A9_DRegsN], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [1, 1, 1]>,
+ //
+ // Single-precision to Integer Move
+ //
+ // On A9 move-from-VFP is free to issue with no stall if other VFP
+ // operations are in flight. I assume it still can't dual-issue though.
+ InstrItinData<IIC_fpMOVSI, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>],
+ [2, 1]>,
+ //
+ // Double-precision to Integer Move
+ //
+ // On A9 move-from-VFP is free to issue with no stall if other VFP
+ // operations are in flight. I assume it still can't dual-issue though.
+ InstrItinData<IIC_fpMOVDI, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>],
+ [2, 1, 1]>,
+ //
+ // Single-precision FP Load
+ InstrItinData<IIC_fpLoad32, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ InstrStage<2, [A9_DRegsN], 0, Reserved>,
+ InstrStage<1, [A9_NPipe], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [1, 1]>,
+ //
+ // Double-precision FP Load
+ // FIXME: Result latency is 1 if address is 64-bit aligned.
+ InstrItinData<IIC_fpLoad64, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ InstrStage<2, [A9_DRegsN], 0, Reserved>,
+ InstrStage<1, [A9_NPipe], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [2, 1]>,
+ //
+ // FP Load Multiple
+ // FIXME: assumes 2 doubles which requires 2 LS cycles.
+ InstrItinData<IIC_fpLoad_m, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ InstrStage<2, [A9_DRegsN], 0, Reserved>,
+ InstrStage<1, [A9_NPipe], 0>,
+ InstrStage<2, [A9_LSUnit]>],
+ [1, 1, 1, 1], [], -1>, // dynamic uops
+ //
+ // FP Load Multiple + update
+ // FIXME: assumes 2 doubles which requires 2 LS cycles.
+ InstrItinData<IIC_fpLoad_mu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ InstrStage<2, [A9_DRegsN], 0, Reserved>,
+ InstrStage<1, [A9_NPipe], 0>,
+ InstrStage<2, [A9_LSUnit]>],
+ [2, 1, 1, 1], [], -1>, // dynamic uops
+ //
+ // Single-precision FP Store
+ InstrItinData<IIC_fpStore32,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ InstrStage<2, [A9_DRegsN], 0, Reserved>,
+ InstrStage<1, [A9_NPipe], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [1, 1]>,
+ //
+ // Double-precision FP Store
+ InstrItinData<IIC_fpStore64,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ InstrStage<2, [A9_DRegsN], 0, Reserved>,
+ InstrStage<1, [A9_NPipe], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [1, 1]>,
+ //
+ // FP Store Multiple
+ // FIXME: assumes 2 doubles which requires 2 LS cycles.
+ InstrItinData<IIC_fpStore_m,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ InstrStage<2, [A9_DRegsN], 0, Reserved>,
+ InstrStage<1, [A9_NPipe], 0>,
+ InstrStage<2, [A9_LSUnit]>],
+ [1, 1, 1, 1], [], -1>, // dynamic uops
+ //
+ // FP Store Multiple + update
+ // FIXME: assumes 2 doubles which requires 2 LS cycles.
+ InstrItinData<IIC_fpStore_mu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ InstrStage<2, [A9_DRegsN], 0, Reserved>,
+ InstrStage<1, [A9_NPipe], 0>,
+ InstrStage<2, [A9_LSUnit]>],
+ [2, 1, 1, 1], [], -1>, // dynamic uops
+ // NEON
+ // VLD1
+ InstrItinData<IIC_VLD1, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [1, 1]>,
+ // VLD1x2
+ InstrItinData<IIC_VLD1x2, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [1, 1, 1]>,
+ // VLD1x3
+ InstrItinData<IIC_VLD1x3, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe], 0>,
+ InstrStage<2, [A9_LSUnit]>],
+ [1, 1, 2, 1]>,
+ // VLD1x4
+ InstrItinData<IIC_VLD1x4, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe], 0>,
+ InstrStage<2, [A9_LSUnit]>],
+ [1, 1, 2, 2, 1]>,
+ // VLD1u
+ InstrItinData<IIC_VLD1u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [1, 2, 1]>,
+ // VLD1x2u
+ InstrItinData<IIC_VLD1x2u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [1, 1, 2, 1]>,
+ // VLD1x3u
+ InstrItinData<IIC_VLD1x3u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe], 0>,
+ InstrStage<2, [A9_LSUnit]>],
+ [1, 1, 2, 2, 1]>,
+ // VLD1x4u
+ InstrItinData<IIC_VLD1x4u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe], 0>,
+ InstrStage<2, [A9_LSUnit]>],
+ [1, 1, 2, 2, 2, 1]>,
+ //
+ // VLD1ln
+ InstrItinData<IIC_VLD1ln, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe], 0>,
+ InstrStage<2, [A9_LSUnit]>],
+ [3, 1, 1, 1]>,
+ //
+ // VLD1lnu
+ InstrItinData<IIC_VLD1lnu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe], 0>,
+ InstrStage<2, [A9_LSUnit]>],
+ [3, 2, 1, 1, 1, 1]>,
+ //
+ // VLD1dup
+ InstrItinData<IIC_VLD1dup, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [2, 1]>,
+ //
+ // VLD1dupu
+ InstrItinData<IIC_VLD1dupu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [2, 2, 1, 1]>,
+ //
+ // VLD2
+ InstrItinData<IIC_VLD2, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 7 cycles
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [2, 2, 1]>,
+ //
+ // VLD2x2
+ InstrItinData<IIC_VLD2x2, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe], 0>,
+ InstrStage<2, [A9_LSUnit]>],
+ [2, 3, 2, 3, 1]>,
+ //
+ // VLD2ln
+ InstrItinData<IIC_VLD2ln, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe], 0>,
+ InstrStage<2, [A9_LSUnit]>],
+ [3, 3, 1, 1, 1, 1]>,
+ //
+ // VLD2u
+ InstrItinData<IIC_VLD2u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 7 cycles
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [2, 2, 2, 1, 1, 1]>,
+ //
+ // VLD2x2u
+ InstrItinData<IIC_VLD2x2u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe], 0>,
+ InstrStage<2, [A9_LSUnit]>],
+ [2, 3, 2, 3, 2, 1]>,
+ //
+ // VLD2lnu
+ InstrItinData<IIC_VLD2lnu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe], 0>,
+ InstrStage<2, [A9_LSUnit]>],
+ [3, 3, 2, 1, 1, 1, 1, 1]>,
+ //
+ // VLD2dup
+ InstrItinData<IIC_VLD2dup, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [2, 2, 1]>,
+ //
+ // VLD2dupu
+ InstrItinData<IIC_VLD2dupu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [2, 2, 2, 1, 1]>,
+ //
+ // VLD3
+ InstrItinData<IIC_VLD3, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<9,[A9_DRegsVFP], 0, Reserved>,
+ InstrStage<3, [A9_NPipe], 0>,
+ InstrStage<3, [A9_LSUnit]>],
+ [3, 3, 4, 1]>,
+ //
+ // VLD3ln
+ InstrItinData<IIC_VLD3ln, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<11,[A9_DRegsVFP], 0, Reserved>,
+ InstrStage<5, [A9_NPipe], 0>,
+ InstrStage<5, [A9_LSUnit]>],
+ [5, 5, 6, 1, 1, 1, 1, 2]>,
+ //
+ // VLD3u
+ InstrItinData<IIC_VLD3u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<9,[A9_DRegsVFP], 0, Reserved>,
+ InstrStage<3, [A9_NPipe], 0>,
+ InstrStage<3, [A9_LSUnit]>],
+ [3, 3, 4, 2, 1]>,
+ //
+ // VLD3lnu
+ InstrItinData<IIC_VLD3lnu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<11,[A9_DRegsVFP], 0, Reserved>,
+ InstrStage<5, [A9_NPipe], 0>,
+ InstrStage<5, [A9_LSUnit]>],
+ [5, 5, 6, 2, 1, 1, 1, 1, 1, 2]>,
+ //
+ // VLD3dup
+ InstrItinData<IIC_VLD3dup, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<3, [A9_NPipe], 0>,
+ InstrStage<3, [A9_LSUnit]>],
+ [3, 3, 4, 1]>,
+ //
+ // VLD3dupu
+ InstrItinData<IIC_VLD3dupu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<3, [A9_NPipe], 0>,
+ InstrStage<3, [A9_LSUnit]>],
+ [3, 3, 4, 2, 1, 1]>,
+ //
+ // VLD4
+ InstrItinData<IIC_VLD4, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<9,[A9_DRegsVFP], 0, Reserved>,
+ InstrStage<3, [A9_NPipe], 0>,
+ InstrStage<3, [A9_LSUnit]>],
+ [3, 3, 4, 4, 1]>,
+ //
+ // VLD4ln
+ InstrItinData<IIC_VLD4ln, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<10,[A9_DRegsVFP], 0, Reserved>,
+ InstrStage<4, [A9_NPipe], 0>,
+ InstrStage<4, [A9_LSUnit]>],
+ [4, 4, 5, 5, 1, 1, 1, 1, 2, 2]>,
+ //
+ // VLD4u
+ InstrItinData<IIC_VLD4u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<9,[A9_DRegsVFP], 0, Reserved>,
+ InstrStage<3, [A9_NPipe], 0>,
+ InstrStage<3, [A9_LSUnit]>],
+ [3, 3, 4, 4, 2, 1]>,
+ //
+ // VLD4lnu
+ InstrItinData<IIC_VLD4lnu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<10,[A9_DRegsVFP], 0, Reserved>,
+ InstrStage<4, [A9_NPipe], 0>,
+ InstrStage<4, [A9_LSUnit]>],
+ [4, 4, 5, 5, 2, 1, 1, 1, 1, 1, 2, 2]>,
+ //
+ // VLD4dup
+ InstrItinData<IIC_VLD4dup, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe], 0>,
+ InstrStage<2, [A9_LSUnit]>],
+ [2, 2, 3, 3, 1]>,
+ //
+ // VLD4dupu
+ InstrItinData<IIC_VLD4dupu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe], 0>,
+ InstrStage<2, [A9_LSUnit]>],
+ [2, 2, 3, 3, 2, 1, 1]>,
+ //
+ // VST1
+ InstrItinData<IIC_VST1, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [1, 1, 1]>,
+ //
+ // VST1x2
+ InstrItinData<IIC_VST1x2, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [1, 1, 1, 1]>,
+ //
+ // VST1x3
+ InstrItinData<IIC_VST1x3, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe], 0>,
+ InstrStage<2, [A9_LSUnit]>],
+ [1, 1, 1, 1, 2]>,
+ //
+ // VST1x4
+ InstrItinData<IIC_VST1x4, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe], 0>,
+ InstrStage<2, [A9_LSUnit]>],
+ [1, 1, 1, 1, 2, 2]>,
+ //
+ // VST1u
+ InstrItinData<IIC_VST1u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [2, 1, 1, 1, 1]>,
+ //
+ // VST1x2u
+ InstrItinData<IIC_VST1x2u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [2, 1, 1, 1, 1, 1]>,
+ //
+ // VST1x3u
+ InstrItinData<IIC_VST1x3u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe], 0>,
+ InstrStage<2, [A9_LSUnit]>],
+ [2, 1, 1, 1, 1, 1, 2]>,
+ //
+ // VST1x4u
+ InstrItinData<IIC_VST1x4u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe], 0>,
+ InstrStage<2, [A9_LSUnit]>],
+ [2, 1, 1, 1, 1, 1, 2, 2]>,
+ //
+ // VST1ln
+ InstrItinData<IIC_VST1ln, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [1, 1, 1]>,
+ //
+ // VST1lnu
+ InstrItinData<IIC_VST1lnu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [2, 1, 1, 1, 1]>,
+ //
+ // VST2
+ InstrItinData<IIC_VST2, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [1, 1, 1, 1]>,
+ //
+ // VST2x2
+ InstrItinData<IIC_VST2x2, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<3, [A9_NPipe], 0>,
+ InstrStage<3, [A9_LSUnit]>],
+ [1, 1, 1, 1, 2, 2]>,
+ //
+ // VST2u
+ InstrItinData<IIC_VST2u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [2, 1, 1, 1, 1, 1]>,
+ //
+ // VST2x2u
+ InstrItinData<IIC_VST2x2u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<3, [A9_NPipe], 0>,
+ InstrStage<3, [A9_LSUnit]>],
+ [2, 1, 1, 1, 1, 1, 2, 2]>,
+ //
+ // VST2ln
+ InstrItinData<IIC_VST2ln, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [1, 1, 1, 1]>,
+ //
+ // VST2lnu
+ InstrItinData<IIC_VST2lnu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [2, 1, 1, 1, 1, 1]>,
+ //
+ // VST3
+ InstrItinData<IIC_VST3, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe], 0>,
+ InstrStage<2, [A9_LSUnit]>],
+ [1, 1, 1, 1, 2]>,
+ //
+ // VST3u
+ InstrItinData<IIC_VST3u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe], 0>,
+ InstrStage<2, [A9_LSUnit]>],
+ [2, 1, 1, 1, 1, 1, 2]>,
+ //
+ // VST3ln
+ InstrItinData<IIC_VST3ln, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<3, [A9_NPipe], 0>,
+ InstrStage<3, [A9_LSUnit]>],
+ [1, 1, 1, 1, 2]>,
+ //
+ // VST3lnu
+ InstrItinData<IIC_VST3lnu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<3, [A9_NPipe], 0>,
+ InstrStage<3, [A9_LSUnit]>],
+ [2, 1, 1, 1, 1, 1, 2]>,
+ //
+ // VST4
+ InstrItinData<IIC_VST4, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe], 0>,
+ InstrStage<2, [A9_LSUnit]>],
+ [1, 1, 1, 1, 2, 2]>,
+ //
+ // VST4u
+ InstrItinData<IIC_VST4u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe], 0>,
+ InstrStage<2, [A9_LSUnit]>],
+ [2, 1, 1, 1, 1, 1, 2, 2]>,
+ //
+ // VST4ln
+ InstrItinData<IIC_VST4ln, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe], 0>,
+ InstrStage<2, [A9_LSUnit]>],
+ [1, 1, 1, 1, 2, 2]>,
+ //
+ // VST4lnu
+ InstrItinData<IIC_VST4lnu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe], 0>,
+ InstrStage<2, [A9_LSUnit]>],
+ [2, 1, 1, 1, 1, 1, 2, 2]>,
+
+ //
+ // Double-register Integer Unary
+ InstrItinData<IIC_VUNAiD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 6 cycles
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [4, 2]>,
+ //
+ // Quad-register Integer Unary
+ InstrItinData<IIC_VUNAiQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 6 cycles
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [4, 2]>,
+ //
+ // Double-register Integer Q-Unary
+ InstrItinData<IIC_VQUNAiD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 6 cycles
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [4, 1]>,
+ //
+ // Quad-register Integer CountQ-Unary
+ InstrItinData<IIC_VQUNAiQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 6 cycles
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [4, 1]>,
+ //
+ // Double-register Integer Binary
+ InstrItinData<IIC_VBINiD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 6 cycles
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [3, 2, 2]>,
+ //
+ // Quad-register Integer Binary
+ InstrItinData<IIC_VBINiQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 6 cycles
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [3, 2, 2]>,
+ //
+ // Double-register Integer Subtract
+ InstrItinData<IIC_VSUBiD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 6 cycles
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [3, 2, 1]>,
+ //
+ // Quad-register Integer Subtract
+ InstrItinData<IIC_VSUBiQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 6 cycles
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [3, 2, 1]>,
+ //
+ // Double-register Integer Shift
+ InstrItinData<IIC_VSHLiD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 6 cycles
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [3, 1, 1]>,
+ //
+ // Quad-register Integer Shift
+ InstrItinData<IIC_VSHLiQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 6 cycles
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [3, 1, 1]>,
+ //
+ // Double-register Integer Shift (4 cycle)
+ InstrItinData<IIC_VSHLi4D, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 6 cycles
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [4, 1, 1]>,
+ //
+ // Quad-register Integer Shift (4 cycle)
+ InstrItinData<IIC_VSHLi4Q, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 6 cycles
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [4, 1, 1]>,
+ //
+ // Double-register Integer Binary (4 cycle)
+ InstrItinData<IIC_VBINi4D, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 6 cycles
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [4, 2, 2]>,
+ //
+ // Quad-register Integer Binary (4 cycle)
+ InstrItinData<IIC_VBINi4Q, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 6 cycles
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [4, 2, 2]>,
+ //
+ // Double-register Integer Subtract (4 cycle)
+ InstrItinData<IIC_VSUBi4D, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 6 cycles
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [4, 2, 1]>,
+ //
+ // Quad-register Integer Subtract (4 cycle)
+ InstrItinData<IIC_VSUBi4Q, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 6 cycles
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [4, 2, 1]>,
+
+ //
+ // Double-register Integer Count
+ InstrItinData<IIC_VCNTiD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 6 cycles
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [3, 2, 2]>,
+ //
+ // Quad-register Integer Count
+ // Result written in N3, but that is relative to the last cycle of multicycle,
+ // so we use 4 for those cases
+ InstrItinData<IIC_VCNTiQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 7 cycles
+ InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe]>],
+ [4, 2, 2]>,
+ //
+ // Double-register Absolute Difference and Accumulate
+ InstrItinData<IIC_VABAD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 6 cycles
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [6, 3, 2, 1]>,
+ //
+ // Quad-register Absolute Difference and Accumulate
+ InstrItinData<IIC_VABAQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 6 cycles
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe]>],
+ [6, 3, 2, 1]>,
+ //
+ // Double-register Integer Pair Add Long
+ InstrItinData<IIC_VPALiD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 6 cycles
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [6, 3, 1]>,
+ //
+ // Quad-register Integer Pair Add Long
+ InstrItinData<IIC_VPALiQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 6 cycles
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe]>],
+ [6, 3, 1]>,
+
+ //
+ // Double-register Integer Multiply (.8, .16)
+ InstrItinData<IIC_VMULi16D, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 6 cycles
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [6, 2, 2]>,
+ //
+ // Quad-register Integer Multiply (.8, .16)
+ InstrItinData<IIC_VMULi16Q, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 7 cycles
+ InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe]>],
+ [7, 2, 2]>,
+
+ //
+ // Double-register Integer Multiply (.32)
+ InstrItinData<IIC_VMULi32D, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 7 cycles
+ InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe]>],
+ [7, 2, 1]>,
+ //
+ // Quad-register Integer Multiply (.32)
+ InstrItinData<IIC_VMULi32Q, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 9 cycles
+ InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<4, [A9_NPipe]>],
+ [9, 2, 1]>,
+ //
+ // Double-register Integer Multiply-Accumulate (.8, .16)
+ InstrItinData<IIC_VMACi16D, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 6 cycles
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [6, 3, 2, 2]>,
+ //
+ // Double-register Integer Multiply-Accumulate (.32)
+ InstrItinData<IIC_VMACi32D, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 7 cycles
+ InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe]>],
+ [7, 3, 2, 1]>,
+ //
+ // Quad-register Integer Multiply-Accumulate (.8, .16)
+ InstrItinData<IIC_VMACi16Q, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 7 cycles
+ InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe]>],
+ [7, 3, 2, 2]>,
+ //
+ // Quad-register Integer Multiply-Accumulate (.32)
+ InstrItinData<IIC_VMACi32Q, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 9 cycles
+ InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<4, [A9_NPipe]>],
+ [9, 3, 2, 1]>,
+
+ //
+ // Move
+ InstrItinData<IIC_VMOV, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [1,1]>,
+ //
+ // Move Immediate
+ InstrItinData<IIC_VMOVImm, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 6 cycles
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [3]>,
+ //
+ // Double-register Permute Move
+ InstrItinData<IIC_VMOVD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 6 cycles
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [2, 1]>,
+ //
+ // Quad-register Permute Move
+ InstrItinData<IIC_VMOVQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 6 cycles
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [2, 1]>,
+ //
+ // Integer to Single-precision Move
+ InstrItinData<IIC_VMOVIS , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [1, 1]>,
+ //
+ // Integer to Double-precision Move
+ InstrItinData<IIC_VMOVID , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [1, 1, 1]>,
+ //
+ // Single-precision to Integer Move
+ InstrItinData<IIC_VMOVSI , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [2, 1]>,
+ //
+ // Double-precision to Integer Move
+ InstrItinData<IIC_VMOVDI , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [2, 2, 1]>,
+ //
+ // Integer to Lane Move
+ InstrItinData<IIC_VMOVISL , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<4, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe]>],
+ [3, 1, 1]>,
+
+ //
+ // Vector narrow move
+ InstrItinData<IIC_VMOVN, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 6 cycles
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [3, 1]>,
+ //
+ // Double-register FP Unary
+ InstrItinData<IIC_VUNAD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 6 cycles
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [5, 2]>,
+ //
+ // Quad-register FP Unary
+ // Result written in N5, but that is relative to the last cycle of multicycle,
+ // so we use 6 for those cases
+ InstrItinData<IIC_VUNAQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 7 cycles
+ InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe]>],
+ [6, 2]>,
+ //
+ // Double-register FP Binary
+ // FIXME: We're using this itin for many instructions and [2, 2] here is too
+ // optimistic.
+ InstrItinData<IIC_VBIND, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 6 cycles
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [5, 2, 2]>,
+
+ //
+ // VPADD, etc.
+ InstrItinData<IIC_VPBIND, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 6 cycles
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [5, 1, 1]>,
+ //
+ // Double-register FP VMUL
+ InstrItinData<IIC_VFMULD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 6 cycles
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [5, 2, 1]>,
+ //
+ // Quad-register FP Binary
+ // Result written in N5, but that is relative to the last cycle of multicycle,
+ // so we use 6 for those cases
+ // FIXME: We're using this itin for many instructions and [2, 2] here is too
+ // optimistic.
+ InstrItinData<IIC_VBINQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 7 cycles
+ InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe]>],
+ [6, 2, 2]>,
+ //
+ // Quad-register FP VMUL
+ InstrItinData<IIC_VFMULQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 7 cycles
+ InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [6, 2, 1]>,
+ //
+ // Double-register FP Multiple-Accumulate
+ InstrItinData<IIC_VMACD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 7 cycles
+ InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe]>],
+ [6, 3, 2, 1]>,
+ //
+ // Quad-register FP Multiple-Accumulate
+ // Result written in N9, but that is relative to the last cycle of multicycle,
+ // so we use 10 for those cases
+ InstrItinData<IIC_VMACQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 9 cycles
+ InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<4, [A9_NPipe]>],
+ [8, 4, 2, 1]>,
+ //
+ // Double-register Fused FP Multiple-Accumulate
+ InstrItinData<IIC_VFMACD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 7 cycles
+ InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe]>],
+ [6, 3, 2, 1]>,
+ //
+ // Quad-register Fused FP Multiple-Accumulate
+ // Result written in N9, but that is relative to the last cycle of multicycle,
+ // so we use 10 for those cases
+ InstrItinData<IIC_VFMACQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 9 cycles
+ InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<4, [A9_NPipe]>],
+ [8, 4, 2, 1]>,
+ //
+ // Double-register Reciprical Step
+ InstrItinData<IIC_VRECSD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 10 cycles
+ InstrStage<11, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [9, 2, 2]>,
+ //
+ // Quad-register Reciprical Step
+ InstrItinData<IIC_VRECSQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 11 cycles
+ InstrStage<12, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe]>],
+ [10, 2, 2]>,
+ //
+ // Double-register Permute
+ InstrItinData<IIC_VPERMD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 6 cycles
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [2, 2, 1, 1]>,
+ //
+ // Quad-register Permute
+ // Result written in N2, but that is relative to the last cycle of multicycle,
+ // so we use 3 for those cases
+ InstrItinData<IIC_VPERMQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 7 cycles
+ InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe]>],
+ [3, 3, 1, 1]>,
+ //
+ // Quad-register Permute (3 cycle issue)
+ // Result written in N2, but that is relative to the last cycle of multicycle,
+ // so we use 4 for those cases
+ InstrItinData<IIC_VPERMQ3, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 8 cycles
+ InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<3, [A9_NPipe]>],
+ [4, 4, 1, 1]>,
+
+ //
+ // Double-register VEXT
+ InstrItinData<IIC_VEXTD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 6 cycles
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [2, 1, 1]>,
+ //
+ // Quad-register VEXT
+ InstrItinData<IIC_VEXTQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 7 cycles
+ InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe]>],
+ [3, 1, 2]>,
+ //
+ // VTB
+ InstrItinData<IIC_VTB1, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 7 cycles
+ InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe]>],
+ [3, 2, 1]>,
+ InstrItinData<IIC_VTB2, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<2, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 7 cycles
+ InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe]>],
+ [3, 2, 2, 1]>,
+ InstrItinData<IIC_VTB3, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<2, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 8 cycles
+ InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<3, [A9_NPipe]>],
+ [4, 2, 2, 3, 1]>,
+ InstrItinData<IIC_VTB4, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 8 cycles
+ InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<3, [A9_NPipe]>],
+ [4, 2, 2, 3, 3, 1]>,
+ //
+ // VTBX
+ InstrItinData<IIC_VTBX1, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 7 cycles
+ InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe]>],
+ [3, 1, 2, 1]>,
+ InstrItinData<IIC_VTBX2, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 7 cycles
+ InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe]>],
+ [3, 1, 2, 2, 1]>,
+ InstrItinData<IIC_VTBX3, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 8 cycles
+ InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<3, [A9_NPipe]>],
+ [4, 1, 2, 2, 3, 1]>,
+ InstrItinData<IIC_VTBX4, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 8 cycles
+ InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe]>],
+ [4, 1, 2, 2, 3, 3, 1]>
+]>;
+
+// ===---------------------------------------------------------------------===//
+// The following definitions describe the simpler per-operand machine model.
+// This works with MachineScheduler and will eventually replace itineraries.
+
+class A9WriteLMOpsListType<list<WriteSequence> writes> {
+ list <WriteSequence> Writes = writes;
+ SchedMachineModel SchedModel = ?;
+}
+
+// Cortex-A9 machine model for scheduling and other instruction cost heuristics.
+def CortexA9Model : SchedMachineModel {
+ let IssueWidth = 2; // 2 micro-ops are dispatched per cycle.
+ let MicroOpBufferSize = 56; // Based on available renamed registers.
+ let LoadLatency = 2; // Optimistic load latency assuming bypass.
+ // This is overriden by OperandCycles if the
+ // Itineraries are queried instead.
+ let MispredictPenalty = 8; // Based on estimate of pipeline depth.
+
+ let Itineraries = CortexA9Itineraries;
+
+ // FIXME: Many vector operations were never given an itinerary. We
+ // haven't mapped these to the new model either.
+ let CompleteModel = 0;
+}
+
+//===----------------------------------------------------------------------===//
+// Define each kind of processor resource and number available.
+//
+// The AGU unit has BufferSize=1 so that the latency between operations
+// that use it are considered to stall other operations.
+//
+// The FP unit has BufferSize=0 so that it is a hard dispatch
+// hazard. No instruction may be dispatched while the unit is reserved.
+
+let SchedModel = CortexA9Model in {
+
+def A9UnitALU : ProcResource<2>;
+def A9UnitMul : ProcResource<1> { let Super = A9UnitALU; }
+def A9UnitAGU : ProcResource<1> { let BufferSize = 1; }
+def A9UnitLS : ProcResource<1>;
+def A9UnitFP : ProcResource<1> { let BufferSize = 0; }
+def A9UnitB : ProcResource<1>;
+
+//===----------------------------------------------------------------------===//
+// Define scheduler read/write types with their resources and latency on A9.
+
+// Consume an issue slot, but no processor resources. This is useful when all
+// other writes associated with the operand have NumMicroOps = 0.
+def A9WriteIssue : SchedWriteRes<[]> { let Latency = 0; }
+
+// Write an integer register.
+def A9WriteI : SchedWriteRes<[A9UnitALU]>;
+// Write an integer shifted-by register
+def A9WriteIsr : SchedWriteRes<[A9UnitALU]> { let Latency = 2; }
+
+// Basic ALU.
+def A9WriteALU : SchedWriteRes<[A9UnitALU]>;
+// ALU with operand shifted by immediate.
+def : WriteRes<WriteALUsi, [A9UnitALU]> { let Latency = 2; }
+// ALU with operand shifted by register.
+def A9WriteALUsr : SchedWriteRes<[A9UnitALU]> { let Latency = 3; }
+
+// Multiplication
+def A9WriteM : SchedWriteRes<[A9UnitMul, A9UnitMul]> { let Latency = 4; }
+def A9WriteMHi : SchedWriteRes<[A9UnitMul]> { let Latency = 5;
+ let NumMicroOps = 0; }
+def A9WriteM16 : SchedWriteRes<[A9UnitMul]> { let Latency = 3; }
+def A9WriteM16Hi : SchedWriteRes<[A9UnitMul]> { let Latency = 4;
+ let NumMicroOps = 0; }
+
+// Floating-point
+// Only one FP or AGU instruction may issue per cycle. We model this
+// by having FP instructions consume the AGU resource.
+def A9WriteF : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 4; }
+def A9WriteFMov : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 1; }
+def A9WriteFMulS : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 5; }
+def A9WriteFMulD : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 6; }
+def A9WriteFMAS : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 8; }
+def A9WriteFMAD : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 9; }
+def A9WriteFDivS : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 15; }
+def A9WriteFDivD : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 25; }
+def A9WriteFSqrtS : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 17; }
+def A9WriteFSqrtD : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 32; }
+
+// NEON has an odd mix of latencies. Simply name the write types by latency.
+def A9WriteV1 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 1; }
+def A9WriteV2 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 2; }
+def A9WriteV3 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 3; }
+def A9WriteV4 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 4; }
+def A9WriteV5 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 5; }
+def A9WriteV6 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 6; }
+def A9WriteV7 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 7; }
+def A9WriteV9 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 9; }
+def A9WriteV10 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 10; }
+
+// Reserve A9UnitFP for 2 consecutive cycles.
+def A9Write2V4 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> {
+ let Latency = 4;
+ let ResourceCycles = [2];
+}
+def A9Write2V7 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> {
+ let Latency = 7;
+ let ResourceCycles = [2];
+}
+def A9Write2V9 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> {
+ let Latency = 9;
+ let ResourceCycles = [2];
+}
+
+// Branches don't have a def operand but still consume resources.
+def A9WriteB : SchedWriteRes<[A9UnitB]>;
+
+// Address generation.
+def A9WriteAdr : SchedWriteRes<[A9UnitAGU]> { let NumMicroOps = 0; }
+
+// Load Integer.
+def A9WriteL : SchedWriteRes<[A9UnitLS]> { let Latency = 3; }
+// Load the upper 32-bits using the same micro-op.
+def A9WriteLHi : SchedWriteRes<[]> { let Latency = 3;
+ let NumMicroOps = 0; }
+// Offset shifted by register.
+def A9WriteLsi : SchedWriteRes<[A9UnitLS]> { let Latency = 4; }
+// Load (and zero extend) a byte.
+def A9WriteLb : SchedWriteRes<[A9UnitLS]> { let Latency = 4; }
+def A9WriteLbsi : SchedWriteRes<[A9UnitLS]> { let Latency = 5; }
+
+// Load or Store Float, aligned.
+def A9WriteLSfp : SchedWriteRes<[A9UnitLS, A9UnitFP]> { let Latency = 1; }
+
+// Store Integer.
+def A9WriteS : SchedWriteRes<[A9UnitLS]>;
+
+//===----------------------------------------------------------------------===//
+// Define resources dynamically for load multiple variants.
+
+// Define helpers for extra latency without consuming resources.
+def A9WriteCycle1 : SchedWriteRes<[]> { let Latency = 1; let NumMicroOps = 0; }
+foreach NumCycles = 2-8 in {
+def A9WriteCycle#NumCycles : WriteSequence<[A9WriteCycle1], NumCycles>;
+} // foreach NumCycles
+
+// Define address generation sequences and predicates for 8 flavors of LDMs.
+foreach NumAddr = 1-8 in {
+
+// Define A9WriteAdr1-8 as a sequence of A9WriteAdr with additive
+// latency for instructions that generate multiple loads or stores.
+def A9WriteAdr#NumAddr : WriteSequence<[A9WriteAdr], NumAddr>;
+
+// Define a predicate to select the LDM based on number of memory addresses.
+def A9LMAdr#NumAddr#Pred :
+ SchedPredicate<"(TII->getNumLDMAddresses(MI)+1)/2 == "#NumAddr>;
+
+} // foreach NumAddr
+
+// Fall-back for unknown LDMs.
+def A9LMUnknownPred : SchedPredicate<"TII->getNumLDMAddresses(MI) == 0">;
+
+// LDM/VLDM/VLDn address generation latency & resources.
+// Dynamically select the A9WriteAdrN sequence using a predicate.
+def A9WriteLMAdr : SchedWriteVariant<[
+ SchedVar<A9LMAdr1Pred, [A9WriteAdr1]>,
+ SchedVar<A9LMAdr2Pred, [A9WriteAdr2]>,
+ SchedVar<A9LMAdr3Pred, [A9WriteAdr3]>,
+ SchedVar<A9LMAdr4Pred, [A9WriteAdr4]>,
+ SchedVar<A9LMAdr5Pred, [A9WriteAdr5]>,
+ SchedVar<A9LMAdr6Pred, [A9WriteAdr6]>,
+ SchedVar<A9LMAdr7Pred, [A9WriteAdr7]>,
+ SchedVar<A9LMAdr8Pred, [A9WriteAdr8]>,
+ // For unknown LDM/VLDM/VSTM, assume 2 32-bit registers.
+ SchedVar<A9LMUnknownPred, [A9WriteAdr2]>]>;
+
+// Define LDM Resources.
+// These take no issue resource, so they can be combined with other
+// writes like WriteB.
+// A9WriteLMLo takes a single LS resource and 2 cycles.
+def A9WriteLMLo : SchedWriteRes<[A9UnitLS]> { let Latency = 2;
+ let NumMicroOps = 0; }
+// Assuming aligned access, the upper half of each pair is free with
+// the same latency.
+def A9WriteLMHi : SchedWriteRes<[]> { let Latency = 2;
+ let NumMicroOps = 0; }
+// Each A9WriteL#N variant adds N cycles of latency without consuming
+// additional resources.
+foreach NumAddr = 1-8 in {
+def A9WriteL#NumAddr : WriteSequence<
+ [A9WriteLMLo, !cast<SchedWrite>("A9WriteCycle"#NumAddr)]>;
+def A9WriteL#NumAddr#Hi : WriteSequence<
+ [A9WriteLMHi, !cast<SchedWrite>("A9WriteCycle"#NumAddr)]>;
+}
+
+//===----------------------------------------------------------------------===//
+// LDM: Load multiple into 32-bit integer registers.
+
+def A9WriteLMOpsList : A9WriteLMOpsListType<
+ [A9WriteL1, A9WriteL1Hi,
+ A9WriteL2, A9WriteL2Hi,
+ A9WriteL3, A9WriteL3Hi,
+ A9WriteL4, A9WriteL4Hi,
+ A9WriteL5, A9WriteL5Hi,
+ A9WriteL6, A9WriteL6Hi,
+ A9WriteL7, A9WriteL7Hi,
+ A9WriteL8, A9WriteL8Hi]>;
+
+// A9WriteLM variants expand into a pair of writes for each 64-bit
+// value loaded. When the number of registers is odd, the last
+// A9WriteLnHi is naturally ignored because the instruction has no
+// following def operands. These variants take no issue resource, so
+// they may need to be part of a WriteSequence that includes A9WriteIssue.
+def A9WriteLM : SchedWriteVariant<[
+ SchedVar<A9LMAdr1Pred, A9WriteLMOpsList.Writes[0-1]>,
+ SchedVar<A9LMAdr2Pred, A9WriteLMOpsList.Writes[0-3]>,
+ SchedVar<A9LMAdr3Pred, A9WriteLMOpsList.Writes[0-5]>,
+ SchedVar<A9LMAdr4Pred, A9WriteLMOpsList.Writes[0-7]>,
+ SchedVar<A9LMAdr5Pred, A9WriteLMOpsList.Writes[0-9]>,
+ SchedVar<A9LMAdr6Pred, A9WriteLMOpsList.Writes[0-11]>,
+ SchedVar<A9LMAdr7Pred, A9WriteLMOpsList.Writes[0-13]>,
+ SchedVar<A9LMAdr8Pred, A9WriteLMOpsList.Writes[0-15]>,
+ // For unknown LDMs, define the maximum number of writes, but only
+ // make the first two consume resources.
+ SchedVar<A9LMUnknownPred, [A9WriteL1, A9WriteL1Hi,
+ A9WriteL2, A9WriteL2Hi,
+ A9WriteL3Hi, A9WriteL3Hi,
+ A9WriteL4Hi, A9WriteL4Hi,
+ A9WriteL5Hi, A9WriteL5Hi,
+ A9WriteL6Hi, A9WriteL6Hi,
+ A9WriteL7Hi, A9WriteL7Hi,
+ A9WriteL8Hi, A9WriteL8Hi]>]> {
+ let Variadic = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// VFP Load/Store Multiple Variants, and NEON VLDn/VSTn support.
+
+// A9WriteLfpOp is the same as A9WriteLSfp but takes no issue resources
+// so can be used in WriteSequences for in single-issue instructions that
+// encapsulate multiple loads.
+def A9WriteLfpOp : SchedWriteRes<[A9UnitLS, A9UnitFP]> {
+ let Latency = 1;
+ let NumMicroOps = 0;
+}
+
+foreach NumAddr = 1-8 in {
+
+// Helper for A9WriteLfp1-8: A sequence of fp loads with no micro-ops.
+def A9WriteLfp#NumAddr#Seq : WriteSequence<[A9WriteLfpOp], NumAddr>;
+
+// A9WriteLfp1-8 definitions are statically expanded into a sequence of
+// A9WriteLfpOps with additive latency that takes a single issue slot.
+// Used directly to describe NEON VLDn.
+def A9WriteLfp#NumAddr : WriteSequence<
+ [A9WriteIssue, !cast<SchedWrite>("A9WriteLfp"#NumAddr#Seq)]>;
+
+// A9WriteLfp1-8Mov adds a cycle of latency and FP resource for
+// permuting loaded values.
+def A9WriteLfp#NumAddr#Mov : WriteSequence<
+ [A9WriteF, !cast<SchedWrite>("A9WriteLfp"#NumAddr#Seq)]>;
+
+} // foreach NumAddr
+
+// Define VLDM/VSTM PreRA resources.
+// A9WriteLMfpPreRA are dynamically expanded into the correct
+// A9WriteLfp1-8 sequence based on a predicate. This supports the
+// preRA VLDM variants in which all 64-bit loads are written to the
+// same tuple of either single or double precision registers.
+def A9WriteLMfpPreRA : SchedWriteVariant<[
+ SchedVar<A9LMAdr1Pred, [A9WriteLfp1]>,
+ SchedVar<A9LMAdr2Pred, [A9WriteLfp2]>,
+ SchedVar<A9LMAdr3Pred, [A9WriteLfp3]>,
+ SchedVar<A9LMAdr4Pred, [A9WriteLfp4]>,
+ SchedVar<A9LMAdr5Pred, [A9WriteLfp5]>,
+ SchedVar<A9LMAdr6Pred, [A9WriteLfp6]>,
+ SchedVar<A9LMAdr7Pred, [A9WriteLfp7]>,
+ SchedVar<A9LMAdr8Pred, [A9WriteLfp8]>,
+ // For unknown VLDM/VSTM PreRA, assume 2xS registers.
+ SchedVar<A9LMUnknownPred, [A9WriteLfp2]>]>;
+
+// Define VLDM/VSTM PostRA Resources.
+// A9WriteLMfpLo takes a LS and FP resource and one issue slot but no latency.
+def A9WriteLMfpLo : SchedWriteRes<[A9UnitLS, A9UnitFP]> { let Latency = 0; }
+
+foreach NumAddr = 1-8 in {
+
+// Each A9WriteL#N variant adds N cycles of latency without consuming
+// additional resources.
+def A9WriteLMfp#NumAddr : WriteSequence<
+ [A9WriteLMfpLo, !cast<SchedWrite>("A9WriteCycle"#NumAddr)]>;
+
+// Assuming aligned access, the upper half of each pair is free with
+// the same latency.
+def A9WriteLMfp#NumAddr#Hi : WriteSequence<
+ [A9WriteLMHi, !cast<SchedWrite>("A9WriteCycle"#NumAddr)]>;
+
+} // foreach NumAddr
+
+// VLDM PostRA Variants. These variants expand A9WriteLMfpPostRA into a
+// pair of writes for each 64-bit data loaded. When the number of
+// registers is odd, the last WriteLMfpnHi is naturally ignored because
+// the instruction has no following def operands.
+
+def A9WriteLMfpPostRAOpsList : A9WriteLMOpsListType<
+ [A9WriteLMfp1, A9WriteLMfp2, // 0-1
+ A9WriteLMfp3, A9WriteLMfp4, // 2-3
+ A9WriteLMfp5, A9WriteLMfp6, // 4-5
+ A9WriteLMfp7, A9WriteLMfp8, // 6-7
+ A9WriteLMfp1Hi, // 8-8
+ A9WriteLMfp2Hi, A9WriteLMfp2Hi, // 9-10
+ A9WriteLMfp3Hi, A9WriteLMfp3Hi, // 11-12
+ A9WriteLMfp4Hi, A9WriteLMfp4Hi, // 13-14
+ A9WriteLMfp5Hi, A9WriteLMfp5Hi, // 15-16
+ A9WriteLMfp6Hi, A9WriteLMfp6Hi, // 17-18
+ A9WriteLMfp7Hi, A9WriteLMfp7Hi, // 19-20
+ A9WriteLMfp8Hi, A9WriteLMfp8Hi]>; // 21-22
+
+def A9WriteLMfpPostRA : SchedWriteVariant<[
+ SchedVar<A9LMAdr1Pred, A9WriteLMfpPostRAOpsList.Writes[0-0, 8-8]>,
+ SchedVar<A9LMAdr2Pred, A9WriteLMfpPostRAOpsList.Writes[0-1, 9-10]>,
+ SchedVar<A9LMAdr3Pred, A9WriteLMfpPostRAOpsList.Writes[0-2, 10-12]>,
+ SchedVar<A9LMAdr4Pred, A9WriteLMfpPostRAOpsList.Writes[0-3, 11-14]>,
+ SchedVar<A9LMAdr5Pred, A9WriteLMfpPostRAOpsList.Writes[0-4, 12-16]>,
+ SchedVar<A9LMAdr6Pred, A9WriteLMfpPostRAOpsList.Writes[0-5, 13-18]>,
+ SchedVar<A9LMAdr7Pred, A9WriteLMfpPostRAOpsList.Writes[0-6, 14-20]>,
+ SchedVar<A9LMAdr8Pred, A9WriteLMfpPostRAOpsList.Writes[0-7, 15-22]>,
+ // For unknown LDMs, define the maximum number of writes, but only
+ // make the first two consume resources. We are optimizing for the case
+ // where the operands are DPRs, and this determines the first eight
+ // types. The remaining eight types are filled to cover the case
+ // where the operands are SPRs.
+ SchedVar<A9LMUnknownPred, [A9WriteLMfp1, A9WriteLMfp2,
+ A9WriteLMfp3Hi, A9WriteLMfp4Hi,
+ A9WriteLMfp5Hi, A9WriteLMfp6Hi,
+ A9WriteLMfp7Hi, A9WriteLMfp8Hi,
+ A9WriteLMfp5Hi, A9WriteLMfp5Hi,
+ A9WriteLMfp6Hi, A9WriteLMfp6Hi,
+ A9WriteLMfp7Hi, A9WriteLMfp7Hi,
+ A9WriteLMfp8Hi, A9WriteLMfp8Hi]>]> {
+ let Variadic = 1;
+}
+
+// Distinguish between our multiple MI-level forms of the same
+// VLDM/VSTM instructions.
+def A9PreRA : SchedPredicate<
+ "TargetRegisterInfo::isVirtualRegister(MI->getOperand(0).getReg())">;
+def A9PostRA : SchedPredicate<
+ "TargetRegisterInfo::isPhysicalRegister(MI->getOperand(0).getReg())">;
+
+// VLDM represents all destination registers as a single register
+// tuple, unlike LDM. So the number of write operands is not variadic.
+def A9WriteLMfp : SchedWriteVariant<[
+ SchedVar<A9PreRA, [A9WriteLMfpPreRA]>,
+ SchedVar<A9PostRA, [A9WriteLMfpPostRA]>]>;
+
+//===----------------------------------------------------------------------===//
+// Resources for other (non-LDM/VLDM) Variants.
+
+// These mov immediate writers are unconditionally expanded with
+// additive latency.
+def A9WriteI2 : WriteSequence<[A9WriteI, A9WriteI]>;
+def A9WriteI2pc : WriteSequence<[A9WriteI, A9WriteI, WriteALU]>;
+def A9WriteI2ld : WriteSequence<[A9WriteI, A9WriteI, A9WriteL]>;
+
+// Some ALU operations can read loaded integer values one cycle early.
+def A9ReadALU : SchedReadAdvance<1,
+ [A9WriteL, A9WriteLHi, A9WriteLsi, A9WriteLb, A9WriteLbsi,
+ A9WriteL1, A9WriteL2, A9WriteL3, A9WriteL4,
+ A9WriteL5, A9WriteL6, A9WriteL7, A9WriteL8,
+ A9WriteL1Hi, A9WriteL2Hi, A9WriteL3Hi, A9WriteL4Hi,
+ A9WriteL5Hi, A9WriteL6Hi, A9WriteL7Hi, A9WriteL8Hi]>;
+
+// Read types for operands that are unconditionally read in cycle N
+// after the instruction issues, decreases producer latency by N-1.
+def A9Read2 : SchedReadAdvance<1>;
+def A9Read3 : SchedReadAdvance<2>;
+def A9Read4 : SchedReadAdvance<3>;
+
+//===----------------------------------------------------------------------===//
+// Map itinerary classes to scheduler read/write resources per operand.
+//
+// For ARM, we piggyback scheduler resources on the Itinerary classes
+// to avoid perturbing the existing instruction definitions.
+
+// This table follows the ARM Cortex-A9 Technical Reference Manuals,
+// mostly in order.
+
+def :ItinRW<[WriteALU], [IIC_iMOVi,IIC_iMOVr,IIC_iMOVsi,
+ IIC_iMVNi,IIC_iMVNsi,
+ IIC_iCMOVi,IIC_iCMOVr,IIC_iCMOVsi]>;
+def :ItinRW<[WriteALU, A9ReadALU],[IIC_iMVNr]>;
+def :ItinRW<[A9WriteIsr], [IIC_iMOVsr,IIC_iMVNsr,IIC_iCMOVsr]>;
+
+def :ItinRW<[A9WriteI2], [IIC_iMOVix2,IIC_iCMOVix2]>;
+def :ItinRW<[A9WriteI2pc], [IIC_iMOVix2addpc]>;
+def :ItinRW<[A9WriteI2ld], [IIC_iMOVix2ld]>;
+
+def :ItinRW<[WriteALU], [IIC_iBITi,IIC_iBITr,IIC_iUNAr,IIC_iTSTi,IIC_iTSTr]>;
+def :ItinRW<[WriteALU, A9ReadALU], [IIC_iALUi, IIC_iCMPi, IIC_iCMPsi]>;
+def :ItinRW<[WriteALU, A9ReadALU, A9ReadALU],[IIC_iALUr,IIC_iCMPr]>;
+def :ItinRW<[WriteALUsi], [IIC_iBITsi,IIC_iUNAsi,IIC_iEXTr,IIC_iTSTsi]>;
+def :ItinRW<[WriteALUsi, A9ReadALU], [IIC_iALUsi]>;
+def :ItinRW<[WriteALUsi, ReadDefault, A9ReadALU], [IIC_iALUsir]>; // RSB
+def :ItinRW<[A9WriteALUsr], [IIC_iBITsr,IIC_iTSTsr,IIC_iEXTAr,IIC_iEXTAsr]>;
+def :ItinRW<[A9WriteALUsr, A9ReadALU], [IIC_iALUsr,IIC_iCMPsr]>;
+
+// A9WriteHi ignored for MUL32.
+def :ItinRW<[A9WriteM, A9WriteMHi], [IIC_iMUL32,IIC_iMAC32,
+ IIC_iMUL64,IIC_iMAC64]>;
+// FIXME: SMLALxx needs itin classes
+def :ItinRW<[A9WriteM16, A9WriteM16Hi], [IIC_iMUL16,IIC_iMAC16]>;
+
+// TODO: For floating-point ops, we model the pipeline forwarding
+// latencies here. WAW latencies are sometimes longer.
+
+def :ItinRW<[A9WriteFMov], [IIC_fpSTAT, IIC_fpMOVIS, IIC_fpMOVID, IIC_fpMOVSI,
+ IIC_fpUNA32, IIC_fpUNA64,
+ IIC_fpCMP32, IIC_fpCMP64]>;
+def :ItinRW<[A9WriteFMov, A9WriteFMov], [IIC_fpMOVDI]>;
+def :ItinRW<[A9WriteF], [IIC_fpCVTSD, IIC_fpCVTDS, IIC_fpCVTSH, IIC_fpCVTHS,
+ IIC_fpCVTIS, IIC_fpCVTID, IIC_fpCVTSI, IIC_fpCVTDI,
+ IIC_fpALU32, IIC_fpALU64]>;
+def :ItinRW<[A9WriteFMulS], [IIC_fpMUL32]>;
+def :ItinRW<[A9WriteFMulD], [IIC_fpMUL64]>;
+def :ItinRW<[A9WriteFMAS], [IIC_fpMAC32]>;
+def :ItinRW<[A9WriteFMAD], [IIC_fpMAC64]>;
+def :ItinRW<[A9WriteFDivS], [IIC_fpDIV32]>;
+def :ItinRW<[A9WriteFDivD], [IIC_fpDIV64]>;
+def :ItinRW<[A9WriteFSqrtS], [IIC_fpSQRT32]>;
+def :ItinRW<[A9WriteFSqrtD], [IIC_fpSQRT64]>;
+
+def :ItinRW<[A9WriteB], [IIC_Br]>;
+
+// A9 PLD is processed in a dedicated unit.
+def :ItinRW<[], [IIC_Preload]>;
+
+// Note: We must assume that loads are aligned, since the machine
+// model cannot know this statically and A9 ignores alignment hints.
+
+// A9WriteAdr consumes AGU regardless address writeback. But it's
+// latency is only relevant for users of an updated address.
+def :ItinRW<[A9WriteL, A9WriteAdr], [IIC_iLoad_i,IIC_iLoad_r,
+ IIC_iLoad_iu,IIC_iLoad_ru]>;
+def :ItinRW<[A9WriteLsi, A9WriteAdr], [IIC_iLoad_si,IIC_iLoad_siu]>;
+def :ItinRW<[A9WriteLb, A9WriteAdr2], [IIC_iLoad_bh_i,IIC_iLoad_bh_r,
+ IIC_iLoad_bh_iu,IIC_iLoad_bh_ru]>;
+def :ItinRW<[A9WriteLbsi, A9WriteAdr2], [IIC_iLoad_bh_si,IIC_iLoad_bh_siu]>;
+def :ItinRW<[A9WriteL, A9WriteLHi, A9WriteAdr], [IIC_iLoad_d_i,IIC_iLoad_d_r,
+ IIC_iLoad_d_ru]>;
+// Store either has no def operands, or the one def for address writeback.
+def :ItinRW<[A9WriteAdr, A9WriteS], [IIC_iStore_i, IIC_iStore_r,
+ IIC_iStore_iu, IIC_iStore_ru,
+ IIC_iStore_d_i, IIC_iStore_d_r,
+ IIC_iStore_d_ru]>;
+def :ItinRW<[A9WriteAdr2, A9WriteS], [IIC_iStore_si, IIC_iStore_siu,
+ IIC_iStore_bh_i, IIC_iStore_bh_r,
+ IIC_iStore_bh_iu, IIC_iStore_bh_ru]>;
+def :ItinRW<[A9WriteAdr3, A9WriteS], [IIC_iStore_bh_si, IIC_iStore_bh_siu]>;
+
+// A9WriteML will be expanded into a separate write for each def
+// operand. Address generation consumes resources, but A9WriteLMAdr
+// is listed after all def operands, so has no effective latency.
+//
+// Note: A9WriteLM expands into an even number of def operands. The
+// actual number of def operands may be less by one.
+def :ItinRW<[A9WriteLM, A9WriteLMAdr, A9WriteIssue], [IIC_iLoad_m, IIC_iPop]>;
+
+// Load multiple with address writeback has an extra def operand in
+// front of the loaded registers.
+//
+// Reuse the load-multiple variants for store-multiple because the
+// resources are identical, For stores only the address writeback
+// has a def operand so the WriteL latencies are unused.
+def :ItinRW<[A9WriteLMAdr, A9WriteLM, A9WriteIssue], [IIC_iLoad_mu,
+ IIC_iStore_m,
+ IIC_iStore_mu]>;
+def :ItinRW<[A9WriteLM, A9WriteLMAdr, A9WriteB], [IIC_iLoad_mBr, IIC_iPop_Br]>;
+def :ItinRW<[A9WriteL, A9WriteAdr, WriteALU], [IIC_iLoadiALU]>;
+
+def :ItinRW<[A9WriteLSfp, A9WriteAdr], [IIC_fpLoad32, IIC_fpLoad64]>;
+
+def :ItinRW<[A9WriteLMfp, A9WriteLMAdr], [IIC_fpLoad_m]>;
+def :ItinRW<[A9WriteLMAdr, A9WriteLMfp], [IIC_fpLoad_mu]>;
+def :ItinRW<[A9WriteAdr, A9WriteLSfp], [IIC_fpStore32, IIC_fpStore64,
+ IIC_fpStore_m, IIC_fpStore_mu]>;
+
+// Note: Unlike VLDM, VLD1 expects the writeback operand after the
+// normal writes.
+def :ItinRW<[A9WriteLfp1, A9WriteAdr1], [IIC_VLD1, IIC_VLD1u,
+ IIC_VLD1x2, IIC_VLD1x2u]>;
+def :ItinRW<[A9WriteLfp2, A9WriteAdr2], [IIC_VLD1x3, IIC_VLD1x3u,
+ IIC_VLD1x4, IIC_VLD1x4u,
+ IIC_VLD4dup, IIC_VLD4dupu]>;
+def :ItinRW<[A9WriteLfp1Mov, A9WriteAdr1], [IIC_VLD1dup, IIC_VLD1dupu,
+ IIC_VLD2, IIC_VLD2u,
+ IIC_VLD2dup, IIC_VLD2dupu]>;
+def :ItinRW<[A9WriteLfp2Mov, A9WriteAdr1], [IIC_VLD1ln, IIC_VLD1lnu,
+ IIC_VLD2x2, IIC_VLD2x2u,
+ IIC_VLD2ln, IIC_VLD2lnu]>;
+def :ItinRW<[A9WriteLfp3Mov, A9WriteAdr3], [IIC_VLD3, IIC_VLD3u,
+ IIC_VLD3dup, IIC_VLD3dupu]>;
+def :ItinRW<[A9WriteLfp4Mov, A9WriteAdr4], [IIC_VLD4, IIC_VLD4u,
+ IIC_VLD4ln, IIC_VLD4lnu]>;
+def :ItinRW<[A9WriteLfp5Mov, A9WriteAdr5], [IIC_VLD3ln, IIC_VLD3lnu]>;
+
+// Vector stores use similar resources to vector loads, so use the
+// same write types. The address write must be first for stores with
+// address writeback.
+def :ItinRW<[A9WriteAdr1, A9WriteLfp1], [IIC_VST1, IIC_VST1u,
+ IIC_VST1x2, IIC_VST1x2u,
+ IIC_VST1ln, IIC_VST1lnu,
+ IIC_VST2, IIC_VST2u,
+ IIC_VST2x2, IIC_VST2x2u,
+ IIC_VST2ln, IIC_VST2lnu]>;
+def :ItinRW<[A9WriteAdr2, A9WriteLfp2], [IIC_VST1x3, IIC_VST1x3u,
+ IIC_VST1x4, IIC_VST1x4u,
+ IIC_VST3, IIC_VST3u,
+ IIC_VST3ln, IIC_VST3lnu,
+ IIC_VST4, IIC_VST4u,
+ IIC_VST4ln, IIC_VST4lnu]>;
+
+// NEON moves.
+def :ItinRW<[A9WriteV2], [IIC_VMOVSI, IIC_VMOVDI, IIC_VMOVD, IIC_VMOVQ]>;
+def :ItinRW<[A9WriteV1], [IIC_VMOV, IIC_VMOVIS, IIC_VMOVID]>;
+def :ItinRW<[A9WriteV3], [IIC_VMOVISL, IIC_VMOVN]>;
+
+// NEON integer arithmetic
+//
+// VADD/VAND/VORR/VEOR/VBIC/VORN/VBIT/VBIF/VBSL
+def :ItinRW<[A9WriteV3, A9Read2, A9Read2], [IIC_VBINiD, IIC_VBINiQ]>;
+// VSUB/VMVN/VCLSD/VCLZD/VCNTD
+def :ItinRW<[A9WriteV3, A9Read2], [IIC_VSUBiD, IIC_VSUBiQ, IIC_VCNTiD]>;
+// VADDL/VSUBL/VNEG are mapped later under IIC_SHLi.
+// ...
+// VHADD/VRHADD/VQADD/VTST/VADH/VRADH
+def :ItinRW<[A9WriteV4, A9Read2, A9Read2], [IIC_VBINi4D, IIC_VBINi4Q]>;
+
+// VSBH/VRSBH/VHSUB/VQSUB/VABD/VCEQ/VCGE/VCGT/VMAX/VMIN/VPMAX/VPMIN/VABDL
+def :ItinRW<[A9WriteV4, A9Read2], [IIC_VSUBi4D, IIC_VSUBi4Q]>;
+// VQNEG/VQABS
+def :ItinRW<[A9WriteV4], [IIC_VQUNAiD, IIC_VQUNAiQ]>;
+// VABS
+def :ItinRW<[A9WriteV4, A9Read2], [IIC_VUNAiD, IIC_VUNAiQ]>;
+// VPADD/VPADDL are mapped later under IIC_SHLi.
+// ...
+// VCLSQ/VCLZQ/VCNTQ, takes two cycles.
+def :ItinRW<[A9Write2V4, A9Read3], [IIC_VCNTiQ]>;
+// VMOVimm/VMVNimm/VORRimm/VBICimm
+def :ItinRW<[A9WriteV3], [IIC_VMOVImm]>;
+def :ItinRW<[A9WriteV6, A9Read3, A9Read2], [IIC_VABAD, IIC_VABAQ]>;
+def :ItinRW<[A9WriteV6, A9Read3], [IIC_VPALiD, IIC_VPALiQ]>;
+
+// NEON integer multiply
+//
+// Note: these don't quite match the timing docs, but they do match
+// the original A9 itinerary.
+def :ItinRW<[A9WriteV6, A9Read2, A9Read2], [IIC_VMULi16D]>;
+def :ItinRW<[A9WriteV7, A9Read2, A9Read2], [IIC_VMULi16Q]>;
+def :ItinRW<[A9Write2V7, A9Read2], [IIC_VMULi32D]>;
+def :ItinRW<[A9Write2V9, A9Read2], [IIC_VMULi32Q]>;
+def :ItinRW<[A9WriteV6, A9Read3, A9Read2, A9Read2], [IIC_VMACi16D]>;
+def :ItinRW<[A9WriteV7, A9Read3, A9Read2, A9Read2], [IIC_VMACi16Q]>;
+def :ItinRW<[A9Write2V7, A9Read3, A9Read2], [IIC_VMACi32D]>;
+def :ItinRW<[A9Write2V9, A9Read3, A9Read2], [IIC_VMACi32Q]>;
+
+// NEON integer shift
+// TODO: Q,Q,Q shifts should actually reserve FP for 2 cycles.
+def :ItinRW<[A9WriteV3], [IIC_VSHLiD, IIC_VSHLiQ]>;
+def :ItinRW<[A9WriteV4], [IIC_VSHLi4D, IIC_VSHLi4Q]>;
+
+// NEON permute
+def :ItinRW<[A9WriteV2, A9WriteV2], [IIC_VPERMD, IIC_VPERMQ, IIC_VEXTD]>;
+def :ItinRW<[A9WriteV3, A9WriteV4, ReadDefault, A9Read2],
+ [IIC_VPERMQ3, IIC_VEXTQ]>;
+def :ItinRW<[A9WriteV3, A9Read2], [IIC_VTB1]>;
+def :ItinRW<[A9WriteV3, A9Read2, A9Read2], [IIC_VTB2]>;
+def :ItinRW<[A9WriteV4, A9Read2, A9Read2, A9Read3], [IIC_VTB3]>;
+def :ItinRW<[A9WriteV4, A9Read2, A9Read2, A9Read3, A9Read3], [IIC_VTB4]>;
+def :ItinRW<[A9WriteV3, ReadDefault, A9Read2], [IIC_VTBX1]>;
+def :ItinRW<[A9WriteV3, ReadDefault, A9Read2, A9Read2], [IIC_VTBX2]>;
+def :ItinRW<[A9WriteV4, ReadDefault, A9Read2, A9Read2, A9Read3], [IIC_VTBX3]>;
+def :ItinRW<[A9WriteV4, ReadDefault, A9Read2, A9Read2, A9Read3, A9Read3],
+ [IIC_VTBX4]>;
+
+// NEON floating-point
+def :ItinRW<[A9WriteV5, A9Read2, A9Read2], [IIC_VBIND]>;
+def :ItinRW<[A9WriteV6, A9Read2, A9Read2], [IIC_VBINQ]>;
+def :ItinRW<[A9WriteV5, A9Read2], [IIC_VUNAD, IIC_VFMULD]>;
+def :ItinRW<[A9WriteV6, A9Read2], [IIC_VUNAQ, IIC_VFMULQ]>;
+def :ItinRW<[A9WriteV9, A9Read3, A9Read2], [IIC_VMACD, IIC_VFMACD]>;
+def :ItinRW<[A9WriteV10, A9Read3, A9Read2], [IIC_VMACQ, IIC_VFMACQ]>;
+def :ItinRW<[A9WriteV9, A9Read2, A9Read2], [IIC_VRECSD]>;
+def :ItinRW<[A9WriteV10, A9Read2, A9Read2], [IIC_VRECSQ]>;
+
+// Map SchedRWs that are identical for cortexa9 to existing resources.
+def : SchedAlias<WriteALU, A9WriteALU>;
+def : SchedAlias<WriteALUsr, A9WriteALUsr>;
+def : SchedAlias<WriteALUSsr, A9WriteALUsr>;
+def : SchedAlias<ReadALU, A9ReadALU>;
+def : SchedAlias<ReadALUsr, A9ReadALU>;
+def : InstRW< [WriteALU],
+ (instregex "ANDri", "ORRri", "EORri", "BICri", "ANDrr", "ORRrr", "EORrr",
+ "BICrr")>;
+def : InstRW< [WriteALUsi], (instregex "ANDrsi", "ORRrsi", "EORrsi", "BICrsi")>;
+def : InstRW< [WriteALUsr], (instregex "ANDrsr", "ORRrsr", "EORrsr", "BICrsr")>;
+
+
+def : SchedAlias<WriteCMP, A9WriteALU>;
+def : SchedAlias<WriteCMPsi, A9WriteALU>;
+def : SchedAlias<WriteCMPsr, A9WriteALU>;
+
+def : InstRW< [A9WriteIsr], (instregex "MOVsr", "MOVsi", "MVNsr", "MOVCCsi",
+ "MOVCCsr")>;
+def : InstRW< [WriteALU, A9ReadALU], (instregex "MVNr")>;
+def : InstRW< [A9WriteI2], (instregex "MOVCCi32imm", "MOVi32imm",
+ "MOV_ga_dyn")>;
+def : InstRW< [A9WriteI2pc], (instregex "MOV_ga_pcrel")>;
+def : InstRW< [A9WriteI2ld], (instregex "MOV_ga_pcrel_ldr")>;
+
+def : InstRW< [WriteALU], (instregex "SEL")>;
+
+def : InstRW< [WriteALUsi], (instregex "BFC", "BFI", "UBFX", "SBFX")>;
+
+def : InstRW< [A9WriteM],
+ (instregex "MUL", "MULv5", "SMMUL", "SMMULR", "MLA", "MLAv5", "MLS",
+ "SMMLA", "SMMLAR", "SMMLS", "SMMLSR")>;
+def : InstRW< [A9WriteM, A9WriteMHi],
+ (instregex "SMULL", "SMULLv5", "UMULL", "UMULLv5", "SMLAL$", "UMLAL",
+ "UMAAL", "SMLALv5", "UMLALv5", "UMAALv5", "SMLALBB", "SMLALBT", "SMLALTB",
+ "SMLALTT")>;
+// FIXME: These instructions used to have NoItinerary. Just copied the one from above.
+def : InstRW< [A9WriteM, A9WriteMHi],
+ (instregex "SMLAD", "SMLADX", "SMLALD", "SMLALDX", "SMLSD", "SMLSDX",
+ "SMLSLD", "SMLLDX", "SMUAD", "SMUADX", "SMUSD", "SMUSDX")>;
+
+def : InstRW<[A9WriteM16, A9WriteM16Hi],
+ (instregex "SMULBB", "SMULBT", "SMULTB", "SMULTT", "SMULWB", "SMULWT")>;
+def : InstRW<[A9WriteM16, A9WriteM16Hi],
+ (instregex "SMLABB", "SMLABT", "SMLATB", "SMLATT", "SMLAWB", "SMLAWT")>;
+
+def : InstRW<[A9WriteL], (instregex "LDRi12", "PICLDR$")>;
+def : InstRW<[A9WriteLsi], (instregex "LDRrs")>;
+def : InstRW<[A9WriteLb],
+ (instregex "LDRBi12", "PICLDRH", "PICLDRB", "PICLDRSH", "PICLDRSB",
+ "LDRH", "LDRSH", "LDRSB")>;
+def : InstRW<[A9WriteLbsi], (instregex "LDRrs")>;
+
+def : WriteRes<WriteDiv, []> { let Latency = 0; }
+
+def : WriteRes<WriteBr, [A9UnitB]>;
+def : WriteRes<WriteBrL, [A9UnitB]>;
+def : WriteRes<WriteBrTbl, [A9UnitB]>;
+def : WriteRes<WritePreLd, []>;
+def : SchedAlias<WriteCvtFP, A9WriteF>;
+def : WriteRes<WriteNoop, []> { let Latency = 0; let NumMicroOps = 0; }
+} // SchedModel = CortexA9Model
diff --git a/contrib/llvm/lib/Target/ARM/ARMScheduleSwift.td b/contrib/llvm/lib/Target/ARM/ARMScheduleSwift.td
new file mode 100644
index 0000000..3ad7730
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMScheduleSwift.td
@@ -0,0 +1,1046 @@
+//=- ARMScheduleSwift.td - Swift Scheduling Definitions -*- tablegen -*----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the itinerary class data for the Swift processor..
+//
+//===----------------------------------------------------------------------===//
+
+// ===---------------------------------------------------------------------===//
+// This section contains legacy support for itineraries. This is
+// required until SD and PostRA schedulers are replaced by MachineScheduler.
+
+def SW_DIS0 : FuncUnit;
+def SW_DIS1 : FuncUnit;
+def SW_DIS2 : FuncUnit;
+
+def SW_ALU0 : FuncUnit;
+def SW_ALU1 : FuncUnit;
+def SW_LS : FuncUnit;
+def SW_IDIV : FuncUnit;
+def SW_FDIV : FuncUnit;
+
+// FIXME: Need bypasses.
+// FIXME: Model the multiple stages of IIC_iMOVix2, IIC_iMOVix2addpc, and
+// IIC_iMOVix2ld better.
+// FIXME: Model the special immediate shifts that are not microcoded.
+// FIXME: Do we need to model the fact that uses of r15 in a micro-op force it
+// to issue on pipe 1?
+// FIXME: Model the pipelined behavior of CMP / TST instructions.
+// FIXME: Better model the microcode stages of multiply instructions, especially
+// conditional variants.
+// FIXME: Add preload instruction when it is documented.
+// FIXME: Model non-pipelined nature of FP div / sqrt unit.
+
+// Swift machine model for scheduling and other instruction cost heuristics.
+def SwiftModel : SchedMachineModel {
+ let IssueWidth = 3; // 3 micro-ops are dispatched per cycle.
+ let MicroOpBufferSize = 45; // Based on NEON renamed registers.
+ let LoadLatency = 3;
+ let MispredictPenalty = 14; // A branch direction mispredict.
+ let CompleteModel = 0; // FIXME: Remove if all instructions are covered.
+}
+
+// Swift predicates.
+def IsFastImmShiftSwiftPred : SchedPredicate<[{TII->isSwiftFastImmShift(MI)}]>;
+
+// Swift resource mapping.
+let SchedModel = SwiftModel in {
+ // Processor resources.
+ def SwiftUnitP01 : ProcResource<2>; // ALU unit.
+ def SwiftUnitP0 : ProcResource<1> { let Super = SwiftUnitP01; } // Mul unit.
+ def SwiftUnitP1 : ProcResource<1> { let Super = SwiftUnitP01; } // Br unit.
+ def SwiftUnitP2 : ProcResource<1>; // LS unit.
+ def SwiftUnitDiv : ProcResource<1>;
+
+ // Generic resource requirements.
+ def SwiftWriteP0OneCycle : SchedWriteRes<[SwiftUnitP0]>;
+ def SwiftWriteP0TwoCycle : SchedWriteRes<[SwiftUnitP0]> { let Latency = 2; }
+ def SwiftWriteP0FourCycle : SchedWriteRes<[SwiftUnitP0]> { let Latency = 4; }
+ def SwiftWriteP0SixCycle : SchedWriteRes<[SwiftUnitP0]> { let Latency = 6; }
+ def SwiftWriteP0P1FourCycle : SchedWriteRes<[SwiftUnitP0, SwiftUnitP1]> {
+ let Latency = 4;
+ }
+ def SwiftWriteP0P1SixCycle : SchedWriteRes<[SwiftUnitP0, SwiftUnitP1]> {
+ let Latency = 6;
+ }
+ def SwiftWriteP01OneCycle : SchedWriteRes<[SwiftUnitP01]>;
+ def SwiftWriteP1TwoCycle : SchedWriteRes<[SwiftUnitP1]> { let Latency = 2; }
+ def SwiftWriteP1FourCycle : SchedWriteRes<[SwiftUnitP1]> { let Latency = 4; }
+ def SwiftWriteP1SixCycle : SchedWriteRes<[SwiftUnitP1]> { let Latency = 6; }
+ def SwiftWriteP1EightCycle : SchedWriteRes<[SwiftUnitP1]> { let Latency = 8; }
+ def SwiftWriteP1TwelveCyc : SchedWriteRes<[SwiftUnitP1]> { let Latency = 12; }
+ def SwiftWriteP01OneCycle2x : WriteSequence<[SwiftWriteP01OneCycle], 2>;
+ def SwiftWriteP01OneCycle3x : WriteSequence<[SwiftWriteP01OneCycle], 3>;
+ def SwiftWriteP01TwoCycle : SchedWriteRes<[SwiftUnitP01]> { let Latency = 2; }
+ def SwiftWriteP01ThreeCycleTwoUops : SchedWriteRes<[SwiftUnitP01,
+ SwiftUnitP01]> {
+ let Latency = 3;
+ let NumMicroOps = 2;
+ }
+ def SwiftWriteP0ThreeCycleThreeUops : SchedWriteRes<[SwiftUnitP0]> {
+ let Latency = 3;
+ let NumMicroOps = 3;
+ let ResourceCycles = [3];
+ }
+ // Plain load without writeback.
+ def SwiftWriteP2ThreeCycle : SchedWriteRes<[SwiftUnitP2]> {
+ let Latency = 3;
+ }
+ def SwiftWriteP2FourCycle : SchedWriteRes<[SwiftUnitP2]> {
+ let Latency = 4;
+ }
+ // A store does not write to a register.
+ def SwiftWriteP2 : SchedWriteRes<[SwiftUnitP2]> {
+ let Latency = 0;
+ }
+ foreach Num = 1-4 in {
+ def SwiftWrite#Num#xP2 : WriteSequence<[SwiftWriteP2], Num>;
+ }
+ def SwiftWriteP01OneCycle2x_load : WriteSequence<[SwiftWriteP01OneCycle,
+ SwiftWriteP01OneCycle,
+ SwiftWriteP2ThreeCycle]>;
+ // 4.2.4 Arithmetic and Logical.
+ // ALU operation register shifted by immediate variant.
+ def SwiftWriteALUsi : SchedWriteVariant<[
+ // lsl #2, lsl #1, or lsr #1.
+ SchedVar<IsFastImmShiftSwiftPred, [SwiftWriteP01TwoCycle]>,
+ SchedVar<NoSchedPred, [WriteALU]>
+ ]>;
+ def SwiftWriteALUsr : SchedWriteVariant<[
+ SchedVar<IsPredicatedPred, [SwiftWriteP01ThreeCycleTwoUops]>,
+ SchedVar<NoSchedPred, [SwiftWriteP01TwoCycle]>
+ ]>;
+ def SwiftWriteALUSsr : SchedWriteVariant<[
+ SchedVar<IsPredicatedPred, [SwiftWriteP0ThreeCycleThreeUops]>,
+ SchedVar<NoSchedPred, [SwiftWriteP01TwoCycle]>
+ ]>;
+ def SwiftReadAdvanceALUsr : SchedReadVariant<[
+ SchedVar<IsPredicatedPred, [SchedReadAdvance<2>]>,
+ SchedVar<NoSchedPred, [NoReadAdvance]>
+ ]>;
+ // ADC,ADD,NEG,RSB,RSC,SBC,SUB,ADR
+ // AND,BIC,EOR,ORN,ORR
+ // CLZ,RBIT,REV,REV16,REVSH,PKH
+ def : WriteRes<WriteALU, [SwiftUnitP01]>;
+ def : SchedAlias<WriteALUsi, SwiftWriteALUsi>;
+ def : SchedAlias<WriteALUsr, SwiftWriteALUsr>;
+ def : SchedAlias<WriteALUSsr, SwiftWriteALUSsr>;
+ def : ReadAdvance<ReadALU, 0>;
+ def : SchedAlias<ReadALUsr, SwiftReadAdvanceALUsr>;
+
+
+ def SwiftChooseShiftKindP01OneOrTwoCycle : SchedWriteVariant<[
+ SchedVar<IsFastImmShiftSwiftPred, [SwiftWriteP01OneCycle]>,
+ SchedVar<NoSchedPred, [SwiftWriteP01TwoCycle]>
+ ]>;
+
+ // 4.2.5 Integer comparison
+ def : WriteRes<WriteCMP, [SwiftUnitP01]>;
+ def : SchedAlias<WriteCMPsi, SwiftChooseShiftKindP01OneOrTwoCycle>;
+ def : SchedAlias<WriteCMPsr, SwiftWriteP01TwoCycle>;
+
+ // 4.2.6 Shift, Move
+ // Shift
+ // ASR,LSL,ROR,RRX
+ // MOV(register-shiftedregister) MVN(register-shiftedregister)
+ // Move
+ // MOV,MVN
+ // MOVT
+ // Sign/Zero extension
+ def : InstRW<[SwiftWriteP01OneCycle],
+ (instregex "SXTB", "SXTH", "SXTB16", "UXTB", "UXTH", "UXTB16",
+ "t2SXTB", "t2SXTH", "t2SXTB16", "t2UXTB", "t2UXTH",
+ "t2UXTB16")>;
+ // Pseudo instructions.
+ def : InstRW<[SwiftWriteP01OneCycle2x],
+ (instregex "MOVCCi32imm", "MOVi32imm", "MOV_ga_dyn", "t2MOVCCi32imm",
+ "t2MOVi32imm", "t2MOV_ga_dyn")>;
+ def : InstRW<[SwiftWriteP01OneCycle3x],
+ (instregex "MOV_ga_pcrel", "t2MOV_ga_pcrel", "t2MOVi16_ga_pcrel")>;
+ def : InstRW<[SwiftWriteP01OneCycle2x_load],
+ (instregex "MOV_ga_pcrel_ldr", "t2MOV_ga_pcrel_ldr")>;
+
+ def SwiftWriteP0TwoCyleTwoUops : WriteSequence<[SwiftWriteP0OneCycle], 2>;
+
+ def SwiftPredP0OneOrTwoCycle : SchedWriteVariant<[
+ SchedVar<IsPredicatedPred, [ SwiftWriteP0TwoCyleTwoUops ]>,
+ SchedVar<NoSchedPred, [ SwiftWriteP0OneCycle ]>
+ ]>;
+
+ // 4.2.7 Select
+ // SEL
+ def : InstRW<[SwiftPredP0OneOrTwoCycle], (instregex "SEL", "t2SEL")>;
+
+ // 4.2.8 Bitfield
+ // BFI,BFC, SBFX,UBFX
+ def : InstRW< [SwiftWriteP01TwoCycle],
+ (instregex "BFC", "BFI", "UBFX", "SBFX", "(t|t2)BFC", "(t|t2)BFI",
+ "(t|t2)UBFX", "(t|t2)SBFX")>;
+
+ // 4.2.9 Saturating arithmetic
+ def : InstRW< [SwiftWriteP01TwoCycle],
+ (instregex "QADD", "QSUB", "QDADD", "QDSUB", "SSAT", "SSAT16", "USAT",
+ "USAT16", "QADD8", "QADD16", "QSUB8", "QSUB16", "QASX", "QSAX",
+ "UQADD8", "UQADD16","UQSUB8","UQSUB16","UQASX","UQSAX", "t2QADD",
+ "t2QSUB", "t2QDADD", "t2QDSUB", "t2SSAT", "t2SSAT16", "t2USAT",
+ "t2QADD8", "t2QADD16", "t2QSUB8", "t2QSUB16", "t2QASX", "t2QSAX",
+ "t2UQADD8", "t2UQADD16","t2UQSUB8","t2UQSUB16","t2UQASX","t2UQSAX")>;
+
+ // 4.2.10 Parallel Arithmetic
+ // Not flag setting.
+ def : InstRW< [SwiftWriteALUsr],
+ (instregex "SADD8", "SADD16", "SSUB8", "SSUB16", "SASX", "SSAX",
+ "UADD8", "UADD16", "USUB8", "USUB16", "UASX", "USAX", "t2SADD8",
+ "t2SADD16", "t2SSUB8", "t2SSUB16", "t2SASX", "t2SSAX", "t2UADD8",
+ "t2UADD16", "t2USUB8", "t2USUB16", "t2UASX", "t2USAX")>;
+ // Flag setting.
+ def : InstRW< [SwiftWriteP01TwoCycle],
+ (instregex "SHADD8", "SHADD16", "SHSUB8", "SHSUB16", "SHASX", "SHSAX",
+ "SXTAB", "SXTAB16", "SXTAH", "UHADD8", "UHADD16", "UHSUB8", "UHSUB16",
+ "UHASX", "UHSAX", "UXTAB", "UXTAB16", "UXTAH", "t2SHADD8", "t2SHADD16",
+ "t2SHSUB8", "t2SHSUB16", "t2SHASX", "t2SHSAX", "t2SXTAB", "t2SXTAB16",
+ "t2SXTAH", "t2UHADD8", "t2UHADD16", "t2UHSUB8", "t2UHSUB16", "t2UHASX",
+ "t2UHSAX", "t2UXTAB", "t2UXTAB16", "t2UXTAH")>;
+
+ // 4.2.11 Sum of Absolute Difference
+ def : InstRW< [SwiftWriteP0P1FourCycle], (instregex "USAD8") >;
+ def : InstRW<[SwiftWriteP0P1FourCycle, ReadALU, ReadALU, SchedReadAdvance<2>],
+ (instregex "USADA8")>;
+
+ // 4.2.12 Integer Multiply (32-bit result)
+ // Two sources.
+ def : InstRW< [SwiftWriteP0FourCycle],
+ (instregex "MULS", "MUL", "SMMUL", "SMMULR", "SMULBB", "SMULBT",
+ "SMULTB", "SMULTT", "SMULWB", "SMULWT", "SMUSD", "SMUSDXi", "t2MUL",
+ "t2SMMUL", "t2SMMULR", "t2SMULBB", "t2SMULBT", "t2SMULTB", "t2SMULTT",
+ "t2SMULWB", "t2SMULWT", "t2SMUSD")>;
+
+ def SwiftWriteP0P01FiveCycleTwoUops :
+ SchedWriteRes<[SwiftUnitP0, SwiftUnitP01]> {
+ let Latency = 5;
+ }
+
+ def SwiftPredP0P01FourFiveCycle : SchedWriteVariant<[
+ SchedVar<IsPredicatedPred, [ SwiftWriteP0P01FiveCycleTwoUops ]>,
+ SchedVar<NoSchedPred, [ SwiftWriteP0FourCycle ]>
+ ]>;
+
+ def SwiftReadAdvanceFourCyclesPred : SchedReadVariant<[
+ SchedVar<IsPredicatedPred, [SchedReadAdvance<4>]>,
+ SchedVar<NoSchedPred, [ReadALU]>
+ ]>;
+
+ // Multiply accumulate, three sources
+ def : InstRW< [SwiftPredP0P01FourFiveCycle, ReadALU, ReadALU,
+ SwiftReadAdvanceFourCyclesPred],
+ (instregex "MLAS", "MLA", "MLS", "SMMLA", "SMMLAR", "SMMLS", "SMMLSR",
+ "t2MLA", "t2MLS", "t2MLAS", "t2SMMLA", "t2SMMLAR", "t2SMMLS",
+ "t2SMMLSR")>;
+
+ // 4.2.13 Integer Multiply (32-bit result, Q flag)
+ def : InstRW< [SwiftWriteP0FourCycle],
+ (instregex "SMUAD", "SMUADX", "t2SMUAD", "t2SMUADX")>;
+ def : InstRW< [SwiftPredP0P01FourFiveCycle, ReadALU, ReadALU,
+ SwiftReadAdvanceFourCyclesPred],
+ (instregex "SMLABB", "SMLABT", "SMLATB", "SMLATT", "SMLSD", "SMLSDX",
+ "SMLAWB", "SMLAWT", "t2SMLABB", "t2SMLABT", "t2SMLATB", "t2SMLATT",
+ "t2SMLSD", "t2SMLSDX", "t2SMLAWB", "t2SMLAWT")>;
+ def : InstRW< [SwiftPredP0P01FourFiveCycle],
+ (instregex "SMLAD", "SMLADX", "t2SMLAD", "t2SMLADX")>;
+
+ def SwiftP0P0P01FiveCycle : SchedWriteRes<[SwiftUnitP0, SwiftUnitP01]> {
+ let Latency = 5;
+ let NumMicroOps = 3;
+ let ResourceCycles = [2, 1];
+ }
+ def SwiftWrite1Cycle : SchedWriteRes<[]> {
+ let Latency = 1;
+ let NumMicroOps = 0;
+ }
+ def SwiftWrite5Cycle : SchedWriteRes<[]> {
+ let Latency = 5;
+ let NumMicroOps = 0;
+ }
+ def SwiftWrite6Cycle : SchedWriteRes<[]> {
+ let Latency = 6;
+ let NumMicroOps = 0;
+ }
+
+ // 4.2.14 Integer Multiply, Long
+ def : InstRW< [SwiftP0P0P01FiveCycle, SwiftWrite5Cycle],
+ (instregex "SMULL$", "UMULL$", "t2SMULL$", "t2UMULL$")>;
+
+ def Swift2P03P01FiveCycle : SchedWriteRes<[SwiftUnitP0, SwiftUnitP01]> {
+ let Latency = 7;
+ let NumMicroOps = 5;
+ let ResourceCycles = [2, 3];
+ }
+
+ // 4.2.15 Integer Multiply Accumulate, Long
+ // 4.2.16 Integer Multiply Accumulate, Dual
+ // 4.2.17 Integer Multiply Accumulate Accumulate, Long
+ // We are being a bit inaccurate here.
+ def : InstRW< [SwiftWrite5Cycle, Swift2P03P01FiveCycle, ReadALU, ReadALU,
+ SchedReadAdvance<4>, SchedReadAdvance<3>],
+ (instregex "SMLALS", "UMLALS", "SMLAL", "UMLAL", "MLALBB", "SMLALBT",
+ "SMLALTB", "SMLALTT", "SMLALD", "SMLALDX", "SMLSLD", "SMLSLDX",
+ "UMAAL", "t2SMLALS", "t2UMLALS", "t2SMLAL", "t2UMLAL", "t2MLALBB", "t2SMLALBT",
+ "t2SMLALTB", "t2SMLALTT", "t2SMLALD", "t2SMLALDX", "t2SMLSLD", "t2SMLSLDX",
+ "t2UMAAL")>;
+
+ def SwiftDiv : SchedWriteRes<[SwiftUnitP0, SwiftUnitDiv]> {
+ let NumMicroOps = 1;
+ let Latency = 14;
+ let ResourceCycles = [1, 14];
+ }
+ // 4.2.18 Integer Divide
+ def : WriteRes<WriteDiv, [SwiftUnitDiv]>; // Workaround.
+ def : InstRW <[SwiftDiv],
+ (instregex "SDIV", "UDIV", "t2SDIV", "t2UDIV")>;
+
+ // 4.2.19 Integer Load Single Element
+ // 4.2.20 Integer Load Signextended
+ def SwiftWriteP2P01ThreeCycle : SchedWriteRes<[SwiftUnitP2, SwiftUnitP01]> {
+ let Latency = 3;
+ let NumMicroOps = 2;
+ }
+ def SwiftWriteP2P01FourCyle : SchedWriteRes<[SwiftUnitP2, SwiftUnitP01]> {
+ let Latency = 4;
+ let NumMicroOps = 2;
+ }
+ def SwiftWriteP2P01P01FourCycle : SchedWriteRes<[SwiftUnitP2, SwiftUnitP01,
+ SwiftUnitP01]> {
+ let Latency = 4;
+ let NumMicroOps = 3;
+ }
+ def SwiftWriteP2P2ThreeCycle : SchedWriteRes<[SwiftUnitP2, SwiftUnitP2]> {
+ let Latency = 3;
+ let NumMicroOps = 2;
+ }
+ def SwiftWriteP2P2P01ThreeCycle : SchedWriteRes<[SwiftUnitP2, SwiftUnitP2,
+ SwiftUnitP01]> {
+ let Latency = 3;
+ let NumMicroOps = 3;
+ }
+ def SwiftWrBackOne : SchedWriteRes<[]> {
+ let Latency = 1;
+ let NumMicroOps = 0;
+ }
+ def SwiftWriteLdFour : SchedWriteRes<[]> {
+ let Latency = 4;
+ let NumMicroOps = 0;
+ }
+ // Not accurate.
+ def : InstRW<[SwiftWriteP2ThreeCycle],
+ (instregex "LDR(i12|rs)$", "LDRB(i12|rs)$", "t2LDR(i8|i12|s|pci)",
+ "t2LDR(H|B)(i8|i12|s|pci)", "LDREX", "tLDR[BH](r|i|spi|pci|pciASM)",
+ "tLDR(r|i|spi|pci|pciASM)")>;
+ def : InstRW<[SwiftWriteP2ThreeCycle],
+ (instregex "LDRH$", "PICLDR$", "PICLDR(H|B)$", "LDRcp$")>;
+ def : InstRW<[SwiftWriteP2P01FourCyle],
+ (instregex "PICLDRS(H|B)$", "t2LDRS(H|B)(i|r|p|s)", "LDRS(H|B)$",
+ "t2LDRpci_pic", "tLDRS(B|H)")>;
+ def : InstRW<[SwiftWriteP2P01ThreeCycle, SwiftWrBackOne],
+ (instregex "LD(RB|R)(_|T_)(POST|PRE)_(IMM|REG)", "LDRH(_PRE|_POST)",
+ "LDR(T|BT)_POST_(REG|IMM)", "LDRHT(i|r)",
+ "t2LD(R|RB|RH)_(PRE|POST)", "t2LD(R|RB|RH)T")>;
+ def : InstRW<[SwiftWriteP2P01P01FourCycle, SwiftWrBackOne],
+ (instregex "LDR(SH|SB)(_POST|_PRE)", "t2LDR(SH|SB)(_POST|_PRE)",
+ "LDRS(B|H)T(i|r)", "t2LDRS(B|H)T(i|r)", "t2LDRS(B|H)T")>;
+
+ // 4.2.21 Integer Dual Load
+ // Not accurate.
+ def : InstRW<[SwiftWriteP2P2ThreeCycle, SwiftWriteLdFour],
+ (instregex "t2LDRDi8", "LDRD$")>;
+ def : InstRW<[SwiftWriteP2P2P01ThreeCycle, SwiftWriteLdFour, SwiftWrBackOne],
+ (instregex "LDRD_(POST|PRE)", "t2LDRD_(POST|PRE)")>;
+
+ // 4.2.22 Integer Load, Multiple
+ // NumReg = 1 .. 16
+ foreach Lat = 3-25 in {
+ def SwiftWriteLM#Lat#Cy : SchedWriteRes<[SwiftUnitP2]> {
+ let Latency = Lat;
+ }
+ def SwiftWriteLM#Lat#CyNo : SchedWriteRes<[]> {
+ let Latency = Lat;
+ let NumMicroOps = 0;
+ }
+ }
+ // Predicate.
+ foreach NumAddr = 1-16 in {
+ def SwiftLMAddr#NumAddr#Pred : SchedPredicate<"TII->getNumLDMAddresses(MI) == "#NumAddr>;
+ }
+ def SwiftWriteLDMAddrNoWB : SchedWriteRes<[SwiftUnitP01]> { let Latency = 0; }
+ def SwiftWriteLDMAddrWB : SchedWriteRes<[SwiftUnitP01, SwiftUnitP01]>;
+ def SwiftWriteLM : SchedWriteVariant<[
+ SchedVar<SwiftLMAddr2Pred, [SwiftWriteLM3Cy, SwiftWriteLM4Cy]>,
+ SchedVar<SwiftLMAddr3Pred, [SwiftWriteLM3Cy, SwiftWriteLM4Cy,
+ SwiftWriteLM5Cy]>,
+ SchedVar<SwiftLMAddr4Pred, [SwiftWriteLM3Cy, SwiftWriteLM4Cy,
+ SwiftWriteLM5Cy, SwiftWriteLM6Cy]>,
+ SchedVar<SwiftLMAddr5Pred, [SwiftWriteLM3Cy, SwiftWriteLM4Cy,
+ SwiftWriteLM5Cy, SwiftWriteLM6Cy,
+ SwiftWriteLM7Cy]>,
+ SchedVar<SwiftLMAddr6Pred, [SwiftWriteLM3Cy, SwiftWriteLM4Cy,
+ SwiftWriteLM5Cy, SwiftWriteLM6Cy,
+ SwiftWriteLM7Cy, SwiftWriteLM8Cy]>,
+ SchedVar<SwiftLMAddr7Pred, [SwiftWriteLM3Cy, SwiftWriteLM4Cy,
+ SwiftWriteLM5Cy, SwiftWriteLM6Cy,
+ SwiftWriteLM7Cy, SwiftWriteLM8Cy,
+ SwiftWriteLM9Cy]>,
+ SchedVar<SwiftLMAddr8Pred, [SwiftWriteLM3Cy, SwiftWriteLM4Cy,
+ SwiftWriteLM5Cy, SwiftWriteLM6Cy,
+ SwiftWriteLM7Cy, SwiftWriteLM8Cy,
+ SwiftWriteLM9Cy, SwiftWriteLM10Cy]>,
+ SchedVar<SwiftLMAddr9Pred, [SwiftWriteLM3Cy, SwiftWriteLM4Cy,
+ SwiftWriteLM5Cy, SwiftWriteLM6Cy,
+ SwiftWriteLM7Cy, SwiftWriteLM8Cy,
+ SwiftWriteLM9Cy, SwiftWriteLM10Cy,
+ SwiftWriteLM11Cy]>,
+ SchedVar<SwiftLMAddr10Pred,[SwiftWriteLM3Cy, SwiftWriteLM4Cy,
+ SwiftWriteLM5Cy, SwiftWriteLM6Cy,
+ SwiftWriteLM7Cy, SwiftWriteLM8Cy,
+ SwiftWriteLM9Cy, SwiftWriteLM10Cy,
+ SwiftWriteLM11Cy, SwiftWriteLM12Cy]>,
+ SchedVar<SwiftLMAddr11Pred,[SwiftWriteLM3Cy, SwiftWriteLM4Cy,
+ SwiftWriteLM5Cy, SwiftWriteLM6Cy,
+ SwiftWriteLM7Cy, SwiftWriteLM8Cy,
+ SwiftWriteLM9Cy, SwiftWriteLM10Cy,
+ SwiftWriteLM11Cy, SwiftWriteLM12Cy,
+ SwiftWriteLM13Cy]>,
+ SchedVar<SwiftLMAddr12Pred,[SwiftWriteLM3Cy, SwiftWriteLM4Cy,
+ SwiftWriteLM5Cy, SwiftWriteLM6Cy,
+ SwiftWriteLM7Cy, SwiftWriteLM8Cy,
+ SwiftWriteLM9Cy, SwiftWriteLM10Cy,
+ SwiftWriteLM11Cy, SwiftWriteLM12Cy,
+ SwiftWriteLM13Cy, SwiftWriteLM14Cy]>,
+ SchedVar<SwiftLMAddr13Pred,[SwiftWriteLM3Cy, SwiftWriteLM4Cy,
+ SwiftWriteLM5Cy, SwiftWriteLM6Cy,
+ SwiftWriteLM7Cy, SwiftWriteLM8Cy,
+ SwiftWriteLM9Cy, SwiftWriteLM10Cy,
+ SwiftWriteLM11Cy, SwiftWriteLM12Cy,
+ SwiftWriteLM13Cy, SwiftWriteLM14Cy,
+ SwiftWriteLM15Cy]>,
+ SchedVar<SwiftLMAddr14Pred,[SwiftWriteLM3Cy, SwiftWriteLM4Cy,
+ SwiftWriteLM5Cy, SwiftWriteLM6Cy,
+ SwiftWriteLM7Cy, SwiftWriteLM8Cy,
+ SwiftWriteLM9Cy, SwiftWriteLM10Cy,
+ SwiftWriteLM11Cy, SwiftWriteLM12Cy,
+ SwiftWriteLM13Cy, SwiftWriteLM14Cy,
+ SwiftWriteLM15Cy, SwiftWriteLM16Cy]>,
+ SchedVar<SwiftLMAddr15Pred,[SwiftWriteLM3Cy, SwiftWriteLM4Cy,
+ SwiftWriteLM5Cy, SwiftWriteLM6Cy,
+ SwiftWriteLM7Cy, SwiftWriteLM8Cy,
+ SwiftWriteLM9Cy, SwiftWriteLM10Cy,
+ SwiftWriteLM11Cy, SwiftWriteLM12Cy,
+ SwiftWriteLM13Cy, SwiftWriteLM14Cy,
+ SwiftWriteLM15Cy, SwiftWriteLM16Cy,
+ SwiftWriteLM17Cy]>,
+ SchedVar<SwiftLMAddr16Pred,[SwiftWriteLM3Cy, SwiftWriteLM4Cy,
+ SwiftWriteLM5Cy, SwiftWriteLM6Cy,
+ SwiftWriteLM7Cy, SwiftWriteLM8Cy,
+ SwiftWriteLM9Cy, SwiftWriteLM10Cy,
+ SwiftWriteLM11Cy, SwiftWriteLM12Cy,
+ SwiftWriteLM13Cy, SwiftWriteLM14Cy,
+ SwiftWriteLM15Cy, SwiftWriteLM16Cy,
+ SwiftWriteLM17Cy, SwiftWriteLM18Cy]>,
+ // Unknow number of registers, just use resources for two registers.
+ SchedVar<NoSchedPred, [SwiftWriteLM3Cy, SwiftWriteLM4Cy,
+ SwiftWriteLM5CyNo, SwiftWriteLM6CyNo,
+ SwiftWriteLM7CyNo, SwiftWriteLM8CyNo,
+ SwiftWriteLM9CyNo, SwiftWriteLM10CyNo,
+ SwiftWriteLM11CyNo, SwiftWriteLM12CyNo,
+ SwiftWriteLM13CyNo, SwiftWriteLM14CyNo,
+ SwiftWriteLM15CyNo, SwiftWriteLM16CyNo,
+ SwiftWriteLM17CyNo, SwiftWriteLM18CyNo]>
+
+ ]> { let Variadic=1; }
+
+ def : InstRW<[SwiftWriteLM, SwiftWriteLDMAddrNoWB],
+ (instregex "LDM(IA|DA|DB|IB)$", "t2LDM(IA|DA|DB|IB)$",
+ "(t|sys)LDM(IA|DA|DB|IB)$")>;
+ def : InstRW<[SwiftWriteLDMAddrWB, SwiftWriteLM],
+ (instregex /*"t2LDMIA_RET", "tLDMIA_RET", "LDMIA_RET",*/
+ "LDM(IA|DA|DB|IB)_UPD", "(t2|sys|t)LDM(IA|DA|DB|IB)_UPD")>;
+ def : InstRW<[SwiftWriteLDMAddrWB, SwiftWriteLM, SwiftWriteP1TwoCycle],
+ (instregex "LDMIA_RET", "(t|t2)LDMIA_RET", "POP", "tPOP")>;
+ // 4.2.23 Integer Store, Single Element
+ def : InstRW<[SwiftWriteP2],
+ (instregex "PICSTR", "STR(i12|rs)", "STRB(i12|rs)", "STRH$", "STREX",
+ "t2STR(i12|i8|s)$", "t2STR[BH](i12|i8|s)$", "tSTR[BH](i|r)", "tSTR(i|r)", "tSTRspi")>;
+
+ def : InstRW<[SwiftWriteP01OneCycle, SwiftWriteP2],
+ (instregex "STR(B_|_|BT_|T_)(PRE_IMM|PRE_REG|POST_REG|POST_IMM)",
+ "STR(i|r)_preidx", "STRB(i|r)_preidx", "STRH_preidx", "STR(H_|HT_)(PRE|POST)",
+ "STR(BT|HT|T)", "t2STR_(PRE|POST)", "t2STR[BH]_(PRE|POST)",
+ "t2STR_preidx", "t2STR[BH]_preidx", "t2ST(RB|RH|R)T")>;
+
+ // 4.2.24 Integer Store, Dual
+ def : InstRW<[SwiftWriteP2, SwiftWriteP2, SwiftWriteP01OneCycle],
+ (instregex "STRD$", "t2STRDi8")>;
+ def : InstRW<[SwiftWriteP01OneCycle, SwiftWriteP2, SwiftWriteP2,
+ SwiftWriteP01OneCycle],
+ (instregex "(t2|t)STRD_(POST|PRE)", "STRD_(POST|PRE)")>;
+
+ // 4.2.25 Integer Store, Multiple
+ def SwiftWriteStIncAddr : SchedWriteRes<[SwiftUnitP2, SwiftUnitP01]> {
+ let Latency = 0;
+ let NumMicroOps = 2;
+ }
+ foreach NumAddr = 1-16 in {
+ def SwiftWriteSTM#NumAddr : WriteSequence<[SwiftWriteStIncAddr], NumAddr>;
+ }
+ def SwiftWriteSTM : SchedWriteVariant<[
+ SchedVar<SwiftLMAddr2Pred, [SwiftWriteSTM2]>,
+ SchedVar<SwiftLMAddr3Pred, [SwiftWriteSTM3]>,
+ SchedVar<SwiftLMAddr4Pred, [SwiftWriteSTM4]>,
+ SchedVar<SwiftLMAddr5Pred, [SwiftWriteSTM5]>,
+ SchedVar<SwiftLMAddr6Pred, [SwiftWriteSTM6]>,
+ SchedVar<SwiftLMAddr7Pred, [SwiftWriteSTM7]>,
+ SchedVar<SwiftLMAddr8Pred, [SwiftWriteSTM8]>,
+ SchedVar<SwiftLMAddr9Pred, [SwiftWriteSTM9]>,
+ SchedVar<SwiftLMAddr10Pred,[SwiftWriteSTM10]>,
+ SchedVar<SwiftLMAddr11Pred,[SwiftWriteSTM11]>,
+ SchedVar<SwiftLMAddr12Pred,[SwiftWriteSTM12]>,
+ SchedVar<SwiftLMAddr13Pred,[SwiftWriteSTM13]>,
+ SchedVar<SwiftLMAddr14Pred,[SwiftWriteSTM14]>,
+ SchedVar<SwiftLMAddr15Pred,[SwiftWriteSTM15]>,
+ SchedVar<SwiftLMAddr16Pred,[SwiftWriteSTM16]>,
+ // Unknow number of registers, just use resources for two registers.
+ SchedVar<NoSchedPred, [SwiftWriteSTM2]>
+ ]>;
+ def : InstRW<[SwiftWriteSTM],
+ (instregex "STM(IB|IA|DB|DA)$", "(t2|sys|t)STM(IB|IA|DB|DA)$")>;
+ def : InstRW<[SwiftWriteP01OneCycle, SwiftWriteSTM],
+ (instregex "STM(IB|IA|DB|DA)_UPD", "(t2|sys|t)STM(IB|IA|DB|DA)_UPD",
+ "PUSH", "tPUSH")>;
+
+ // LDRLIT pseudo instructions, they expand to LDR + PICADD
+ def : InstRW<[SwiftWriteP2ThreeCycle, WriteALU],
+ (instregex "t?LDRLIT_ga_abs", "t?LDRLIT_ga_pcrel")>;
+ // LDRLIT_ga_pcrel_ldr expands to LDR + PICLDR
+ def : InstRW<[SwiftWriteP2ThreeCycle, SwiftWriteP2ThreeCycle],
+ (instregex "LDRLIT_ga_pcrel_ldr")>;
+
+ // 4.2.26 Branch
+ def : WriteRes<WriteBr, [SwiftUnitP1]> { let Latency = 0; }
+ def : WriteRes<WriteBrL, [SwiftUnitP1]> { let Latency = 2; }
+ def : WriteRes<WriteBrTbl, [SwiftUnitP1, SwiftUnitP2]> { let Latency = 0; }
+
+ // 4.2.27 Not issued
+ def : WriteRes<WriteNoop, []> { let Latency = 0; let NumMicroOps = 0; }
+ def : InstRW<[WriteNoop], (instregex "t2IT", "IT", "NOP")>;
+
+ // 4.2.28 Advanced SIMD, Integer, 2 cycle
+ def : InstRW<[SwiftWriteP0TwoCycle],
+ (instregex "VADDv", "VSUBv", "VNEG(s|f|v)", "VADDL", "VSUBL",
+ "VADDW", "VSUBW", "VHADD", "VHSUB", "VRHADD", "VPADDi",
+ "VPADDL", "VAND", "VBIC", "VEOR", "VORN", "VORR", "VTST",
+ "VSHL", "VSHR(s|u)", "VSHLL", "VQSHL", "VQSHLU", "VBIF",
+ "VBIT", "VBSL", "VSLI", "VSRI", "VCLS", "VCLZ", "VCNT")>;
+
+ def : InstRW<[SwiftWriteP1TwoCycle],
+ (instregex "VEXT", "VREV16", "VREV32", "VREV64")>;
+
+ // 4.2.29 Advanced SIMD, Integer, 4 cycle
+ // 4.2.30 Advanced SIMD, Integer with Accumulate
+ def : InstRW<[SwiftWriteP0FourCycle],
+ (instregex "VABA", "VABAL", "VPADAL", "VRSRA", "VSRA", "VACGE", "VACGT",
+ "VACLE", "VACLT", "VCEQ", "VCGE", "VCGT", "VCLE", "VCLT", "VRSHL",
+ "VQRSHL", "VRSHR(u|s)", "VABS(f|v)", "VQABS", "VQNEG", "VQADD",
+ "VQSUB")>;
+ def : InstRW<[SwiftWriteP1FourCycle],
+ (instregex "VRECPE", "VRSQRTE")>;
+
+ // 4.2.31 Advanced SIMD, Add and Shift with Narrow
+ def : InstRW<[SwiftWriteP0P1FourCycle],
+ (instregex "VADDHN", "VSUBHN", "VSHRN")>;
+ def : InstRW<[SwiftWriteP0P1SixCycle],
+ (instregex "VRADDHN", "VRSUBHN", "VRSHRN", "VQSHRN", "VQSHRUN",
+ "VQRSHRN", "VQRSHRUN")>;
+
+ // 4.2.32 Advanced SIMD, Vector Table Lookup
+ foreach Num = 1-4 in {
+ def SwiftWrite#Num#xP1TwoCycle : WriteSequence<[SwiftWriteP1TwoCycle], Num>;
+ }
+ def : InstRW<[SwiftWrite1xP1TwoCycle],
+ (instregex "VTB(L|X)1")>;
+ def : InstRW<[SwiftWrite2xP1TwoCycle],
+ (instregex "VTB(L|X)2")>;
+ def : InstRW<[SwiftWrite3xP1TwoCycle],
+ (instregex "VTB(L|X)3")>;
+ def : InstRW<[SwiftWrite4xP1TwoCycle],
+ (instregex "VTB(L|X)4")>;
+
+ // 4.2.33 Advanced SIMD, Transpose
+ def : InstRW<[SwiftWriteP1FourCycle, SwiftWriteP1FourCycle,
+ SwiftWriteP1TwoCycle/*RsrcOnly*/, SchedReadAdvance<2>],
+ (instregex "VSWP", "VTRN", "VUZP", "VZIP")>;
+
+ // 4.2.34 Advanced SIMD and VFP, Floating Point
+ def : InstRW<[SwiftWriteP0TwoCycle], (instregex "VABS(S|D)$", "VNEG(S|D)$")>;
+ def : InstRW<[SwiftWriteP0FourCycle],
+ (instregex "VCMP(D|S|ZD|ZS)$", "VCMPE(D|S|ZD|ZS)")>;
+ def : InstRW<[SwiftWriteP0FourCycle],
+ (instregex "VADD(S|f)", "VSUB(S|f)", "VABD", "VPADDf", "VMAX", "VMIN", "VPMAX",
+ "VPMIN")>;
+ def : InstRW<[SwiftWriteP0SixCycle], (instregex "VADDD$", "VSUBD$")>;
+ def : InstRW<[SwiftWriteP1EightCycle], (instregex "VRECPS", "VRSQRTS")>;
+
+ // 4.2.35 Advanced SIMD and VFP, Multiply
+ def : InstRW<[SwiftWriteP1FourCycle],
+ (instregex "VMUL(S|v|p|f|s)", "VNMULS", "VQDMULH", "VQRDMULH",
+ "VMULL", "VQDMULL")>;
+ def : InstRW<[SwiftWriteP1SixCycle],
+ (instregex "VMULD", "VNMULD")>;
+ def : InstRW<[SwiftWriteP1FourCycle],
+ (instregex "VMLA", "VMLS", "VNMLA", "VNMLS", "VFMA(S|D)", "VFMS(S|D)",
+ "VFNMA", "VFNMS", "VMLAL", "VMLSL","VQDMLAL", "VQDMLSL")>;
+ def : InstRW<[SwiftWriteP1EightCycle], (instregex "VFMAfd", "VFMSfd")>;
+ def : InstRW<[SwiftWriteP1TwelveCyc], (instregex "VFMAfq", "VFMSfq")>;
+
+ // 4.2.36 Advanced SIMD and VFP, Convert
+ def : InstRW<[SwiftWriteP1FourCycle], (instregex "VCVT", "V(S|U)IT", "VTO(S|U)")>;
+ // Fixpoint conversions.
+ def : WriteRes<WriteCvtFP, [SwiftUnitP1]> { let Latency = 4; }
+
+ // 4.2.37 Advanced SIMD and VFP, Move
+ def : InstRW<[SwiftWriteP0TwoCycle],
+ (instregex "VMOVv", "VMOV(S|D)$", "VMOV(S|D)cc",
+ "VMVNv", "VMVN(d|q)", "VMVN(S|D)cc",
+ "FCONST(D|S)")>;
+ def : InstRW<[SwiftWriteP1TwoCycle], (instregex "VMOVN", "VMOVL")>;
+ def : InstRW<[WriteSequence<[SwiftWriteP0FourCycle, SwiftWriteP1TwoCycle]>],
+ (instregex "VQMOVN")>;
+ def : InstRW<[SwiftWriteP1TwoCycle], (instregex "VDUPLN", "VDUPf")>;
+ def : InstRW<[WriteSequence<[SwiftWriteP2FourCycle, SwiftWriteP1TwoCycle]>],
+ (instregex "VDUP(8|16|32)")>;
+ def : InstRW<[SwiftWriteP2ThreeCycle], (instregex "VMOVRS$")>;
+ def : InstRW<[WriteSequence<[SwiftWriteP2FourCycle, SwiftWriteP0TwoCycle]>],
+ (instregex "VMOVSR$", "VSETLN")>;
+ def : InstRW<[SwiftWriteP2ThreeCycle, SwiftWriteP2FourCycle],
+ (instregex "VMOVRR(D|S)$")>;
+ def : InstRW<[SwiftWriteP2FourCycle], (instregex "VMOVDRR$")>;
+ def : InstRW<[WriteSequence<[SwiftWriteP2FourCycle, SwiftWriteP1TwoCycle]>,
+ WriteSequence<[SwiftWrite1Cycle, SwiftWriteP2FourCycle,
+ SwiftWriteP1TwoCycle]>],
+ (instregex "VMOVSRR$")>;
+ def : InstRW<[WriteSequence<[SwiftWriteP1TwoCycle, SwiftWriteP2ThreeCycle]>],
+ (instregex "VGETLN(u|i)")>;
+ def : InstRW<[WriteSequence<[SwiftWriteP1TwoCycle, SwiftWriteP2ThreeCycle,
+ SwiftWriteP01OneCycle]>],
+ (instregex "VGETLNs")>;
+
+ // 4.2.38 Advanced SIMD and VFP, Move FPSCR
+ // Serializing instructions.
+ def SwiftWaitP0For15Cy : SchedWriteRes<[SwiftUnitP0]> {
+ let Latency = 15;
+ let ResourceCycles = [15];
+ }
+ def SwiftWaitP1For15Cy : SchedWriteRes<[SwiftUnitP1]> {
+ let Latency = 15;
+ let ResourceCycles = [15];
+ }
+ def SwiftWaitP2For15Cy : SchedWriteRes<[SwiftUnitP2]> {
+ let Latency = 15;
+ let ResourceCycles = [15];
+ }
+ def : InstRW<[SwiftWaitP0For15Cy, SwiftWaitP1For15Cy, SwiftWaitP2For15Cy],
+ (instregex "VMRS")>;
+ def : InstRW<[SwiftWaitP0For15Cy, SwiftWaitP1For15Cy, SwiftWaitP2For15Cy],
+ (instregex "VMSR")>;
+ // Not serializing.
+ def : InstRW<[SwiftWriteP0TwoCycle], (instregex "FMSTAT")>;
+
+ // 4.2.39 Advanced SIMD and VFP, Load Single Element
+ def : InstRW<[SwiftWriteLM4Cy], (instregex "VLDRD$", "VLDRS$")>;
+
+ // 4.2.40 Advanced SIMD and VFP, Store Single Element
+ def : InstRW<[SwiftWriteLM4Cy], (instregex "VSTRD$", "VSTRS$")>;
+
+ // 4.2.41 Advanced SIMD and VFP, Load Multiple
+ // 4.2.42 Advanced SIMD and VFP, Store Multiple
+
+ // Resource requirement for permuting, just reserves the resources.
+ foreach Num = 1-28 in {
+ def SwiftVLDMPerm#Num : SchedWriteRes<[SwiftUnitP1]> {
+ let Latency = 0;
+ let NumMicroOps = Num;
+ let ResourceCycles = [Num];
+ }
+ }
+
+ // Pre RA pseudos - load/store to a Q register as a D register pair.
+ def : InstRW<[SwiftWriteLM4Cy], (instregex "VLDMQIA$", "VSTMQIA$")>;
+
+ // Post RA not modelled accurately. We assume that register use of width 64
+ // bit maps to a D register, 128 maps to a Q register. Not all different kinds
+ // are accurately represented.
+ def SwiftWriteVLDM : SchedWriteVariant<[
+ // Load of one S register.
+ SchedVar<SwiftLMAddr1Pred, [SwiftWriteLM4Cy]>,
+ // Load of one D register.
+ SchedVar<SwiftLMAddr2Pred, [SwiftWriteLM4Cy, SwiftWriteLM4CyNo]>,
+ // Load of 3 S register.
+ SchedVar<SwiftLMAddr3Pred, [SwiftWriteLM9Cy, SwiftWriteLM10Cy,
+ SwiftWriteLM13CyNo, SwiftWriteP01OneCycle,
+ SwiftVLDMPerm3]>,
+ // Load of a Q register (not necessarily true). We should not be mapping to
+ // 4 S registers, either.
+ SchedVar<SwiftLMAddr4Pred, [SwiftWriteLM4Cy, SwiftWriteLM4CyNo,
+ SwiftWriteLM4CyNo, SwiftWriteLM4CyNo]>,
+ // Load of 5 S registers.
+ SchedVar<SwiftLMAddr5Pred, [SwiftWriteLM9Cy, SwiftWriteLM10Cy,
+ SwiftWriteLM13CyNo, SwiftWriteLM14CyNo,
+ SwiftWriteLM17CyNo, SwiftWriteP01OneCycle,
+ SwiftVLDMPerm5]>,
+ // Load of 3 D registers. (Must also be able to handle s register list -
+ // though, not accurate)
+ SchedVar<SwiftLMAddr6Pred, [SwiftWriteLM7Cy, SwiftWriteLM8Cy,
+ SwiftWriteLM10Cy, SwiftWriteLM14CyNo,
+ SwiftWriteLM14CyNo, SwiftWriteLM14CyNo,
+ SwiftWriteP01OneCycle, SwiftVLDMPerm5]>,
+ // Load of 7 S registers.
+ SchedVar<SwiftLMAddr7Pred, [SwiftWriteLM9Cy, SwiftWriteLM10Cy,
+ SwiftWriteLM13Cy, SwiftWriteLM14CyNo,
+ SwiftWriteLM17CyNo, SwiftWriteLM18CyNo,
+ SwiftWriteLM21CyNo, SwiftWriteP01OneCycle,
+ SwiftVLDMPerm7]>,
+ // Load of two Q registers.
+ SchedVar<SwiftLMAddr8Pred, [SwiftWriteLM7Cy, SwiftWriteLM8Cy,
+ SwiftWriteLM13Cy, SwiftWriteLM13CyNo,
+ SwiftWriteLM13CyNo, SwiftWriteLM13CyNo,
+ SwiftWriteLM13CyNo, SwiftWriteLM13CyNo,
+ SwiftWriteP01OneCycle, SwiftVLDMPerm2]>,
+ // Load of 9 S registers.
+ SchedVar<SwiftLMAddr9Pred, [SwiftWriteLM9Cy, SwiftWriteLM10Cy,
+ SwiftWriteLM13Cy, SwiftWriteLM14CyNo,
+ SwiftWriteLM17CyNo, SwiftWriteLM18CyNo,
+ SwiftWriteLM21CyNo, SwiftWriteLM22CyNo,
+ SwiftWriteLM25CyNo, SwiftWriteP01OneCycle,
+ SwiftVLDMPerm9]>,
+ // Load of 5 D registers.
+ SchedVar<SwiftLMAddr10Pred,[SwiftWriteLM7Cy, SwiftWriteLM8Cy,
+ SwiftWriteLM10Cy, SwiftWriteLM14Cy,
+ SwiftWriteLM14CyNo, SwiftWriteLM14CyNo,
+ SwiftWriteLM14CyNo, SwiftWriteLM14CyNo,
+ SwiftWriteLM14CyNo, SwiftWriteLM14CyNo,
+ SwiftWriteP01OneCycle, SwiftVLDMPerm5]>,
+ // Inaccurate: reuse describtion from 9 S registers.
+ SchedVar<SwiftLMAddr11Pred,[SwiftWriteLM9Cy, SwiftWriteLM10Cy,
+ SwiftWriteLM13Cy, SwiftWriteLM14CyNo,
+ SwiftWriteLM17CyNo, SwiftWriteLM18CyNo,
+ SwiftWriteLM21CyNo, SwiftWriteLM22CyNo,
+ SwiftWriteLM21CyNo, SwiftWriteLM22CyNo,
+ SwiftWriteLM25CyNo, SwiftWriteP01OneCycle,
+ SwiftVLDMPerm9]>,
+ // Load of three Q registers.
+ SchedVar<SwiftLMAddr12Pred,[SwiftWriteLM7Cy, SwiftWriteLM8Cy,
+ SwiftWriteLM11Cy, SwiftWriteLM11Cy,
+ SwiftWriteLM11CyNo, SwiftWriteLM11CyNo,
+ SwiftWriteLM11CyNo, SwiftWriteLM11CyNo,
+ SwiftWriteLM11CyNo, SwiftWriteLM11CyNo,
+ SwiftWriteLM11CyNo, SwiftWriteLM11CyNo,
+ SwiftWriteP01OneCycle, SwiftVLDMPerm3]>,
+ // Inaccurate: reuse describtion from 9 S registers.
+ SchedVar<SwiftLMAddr13Pred, [SwiftWriteLM9Cy, SwiftWriteLM10Cy,
+ SwiftWriteLM13Cy, SwiftWriteLM14CyNo,
+ SwiftWriteLM17CyNo, SwiftWriteLM18CyNo,
+ SwiftWriteLM21CyNo, SwiftWriteLM22CyNo,
+ SwiftWriteLM21CyNo, SwiftWriteLM22CyNo,
+ SwiftWriteLM21CyNo, SwiftWriteLM22CyNo,
+ SwiftWriteLM25CyNo, SwiftWriteP01OneCycle,
+ SwiftVLDMPerm9]>,
+ // Load of 7 D registers inaccurate.
+ SchedVar<SwiftLMAddr14Pred,[SwiftWriteLM7Cy, SwiftWriteLM8Cy,
+ SwiftWriteLM10Cy, SwiftWriteLM14Cy,
+ SwiftWriteLM14Cy, SwiftWriteLM14CyNo,
+ SwiftWriteLM14CyNo, SwiftWriteLM14CyNo,
+ SwiftWriteLM14CyNo, SwiftWriteLM14CyNo,
+ SwiftWriteLM14CyNo, SwiftWriteLM14CyNo,
+ SwiftWriteP01OneCycle, SwiftVLDMPerm7]>,
+ SchedVar<SwiftLMAddr15Pred,[SwiftWriteLM9Cy, SwiftWriteLM10Cy,
+ SwiftWriteLM13Cy, SwiftWriteLM14Cy,
+ SwiftWriteLM17Cy, SwiftWriteLM18CyNo,
+ SwiftWriteLM21CyNo, SwiftWriteLM22CyNo,
+ SwiftWriteLM21CyNo, SwiftWriteLM22CyNo,
+ SwiftWriteLM21CyNo, SwiftWriteLM22CyNo,
+ SwiftWriteLM21CyNo, SwiftWriteLM22CyNo,
+ SwiftWriteLM25CyNo, SwiftWriteP01OneCycle,
+ SwiftVLDMPerm9]>,
+ // Load of 4 Q registers.
+ SchedVar<SwiftLMAddr16Pred,[SwiftWriteLM7Cy, SwiftWriteLM10Cy,
+ SwiftWriteLM11Cy, SwiftWriteLM14Cy,
+ SwiftWriteLM15Cy, SwiftWriteLM18CyNo,
+ SwiftWriteLM19CyNo, SwiftWriteLM22CyNo,
+ SwiftWriteLM19CyNo, SwiftWriteLM22CyNo,
+ SwiftWriteLM19CyNo, SwiftWriteLM22CyNo,
+ SwiftWriteLM19CyNo, SwiftWriteLM22CyNo,
+ SwiftWriteLM19CyNo, SwiftWriteLM22CyNo,
+ SwiftWriteP01OneCycle, SwiftVLDMPerm4]>,
+ // Unknow number of registers, just use resources for two registers.
+ SchedVar<NoSchedPred, [SwiftWriteLM7Cy, SwiftWriteLM8Cy,
+ SwiftWriteLM13Cy, SwiftWriteLM13CyNo,
+ SwiftWriteLM13CyNo, SwiftWriteLM13CyNo,
+ SwiftWriteLM13CyNo, SwiftWriteLM13CyNo,
+ SwiftWriteLM13CyNo, SwiftWriteLM13CyNo,
+ SwiftWriteLM13CyNo, SwiftWriteLM13CyNo,
+ SwiftWriteLM13CyNo, SwiftWriteLM13CyNo,
+ SwiftWriteLM13CyNo, SwiftWriteLM13CyNo,
+ SwiftWriteLM13CyNo, SwiftWriteLM13CyNo,
+ SwiftWriteLM13CyNo, SwiftWriteLM13CyNo,
+ SwiftWriteLM13CyNo, SwiftWriteLM13CyNo,
+ SwiftWriteLM13CyNo, SwiftWriteLM13CyNo,
+ SwiftWriteLM13CyNo, SwiftWriteLM13CyNo,
+ SwiftWriteLM13CyNo, SwiftWriteLM13CyNo,
+ SwiftWriteLM13CyNo, SwiftWriteLM13CyNo,
+ SwiftWriteLM13CyNo, SwiftWriteLM13CyNo,
+ SwiftWriteP01OneCycle, SwiftVLDMPerm2]>
+ ]> { let Variadic = 1; }
+
+ def : InstRW<[SwiftWriteVLDM], (instregex "VLDM[SD](IA|DB)$")>;
+
+ def : InstRW<[SwiftWriteP01OneCycle2x, SwiftWriteVLDM],
+ (instregex "VLDM[SD](IA|DB)_UPD$")>;
+
+ def SwiftWriteVSTM : SchedWriteVariant<[
+ // One S register.
+ SchedVar<SwiftLMAddr1Pred, [SwiftWriteSTM1]>,
+ // One D register.
+ SchedVar<SwiftLMAddr2Pred, [SwiftWriteSTM1]>,
+ // Three S registers.
+ SchedVar<SwiftLMAddr3Pred, [SwiftWriteSTM4]>,
+ // Assume one Q register.
+ SchedVar<SwiftLMAddr4Pred, [SwiftWriteSTM1]>,
+ SchedVar<SwiftLMAddr5Pred, [SwiftWriteSTM6]>,
+ // Assume three D registers.
+ SchedVar<SwiftLMAddr6Pred, [SwiftWriteSTM4]>,
+ SchedVar<SwiftLMAddr7Pred, [SwiftWriteSTM8]>,
+ // Assume two Q registers.
+ SchedVar<SwiftLMAddr8Pred, [SwiftWriteSTM3]>,
+ SchedVar<SwiftLMAddr9Pred, [SwiftWriteSTM10]>,
+ // Assume 5 D registers.
+ SchedVar<SwiftLMAddr10Pred, [SwiftWriteSTM6]>,
+ SchedVar<SwiftLMAddr11Pred, [SwiftWriteSTM12]>,
+ // Assume three Q registers.
+ SchedVar<SwiftLMAddr12Pred, [SwiftWriteSTM4]>,
+ SchedVar<SwiftLMAddr13Pred, [SwiftWriteSTM14]>,
+ // Assume 7 D registers.
+ SchedVar<SwiftLMAddr14Pred, [SwiftWriteSTM8]>,
+ SchedVar<SwiftLMAddr15Pred, [SwiftWriteSTM16]>,
+ // Assume four Q registers.
+ SchedVar<SwiftLMAddr16Pred, [SwiftWriteSTM5]>,
+ // Asumme two Q registers.
+ SchedVar<NoSchedPred, [SwiftWriteSTM3]>
+ ]> { let Variadic = 1; }
+
+ def : InstRW<[SwiftWriteVSTM], (instregex "VSTM[SD](IA|DB)$")>;
+
+ def : InstRW<[SwiftWriteP01OneCycle2x, SwiftWriteVSTM],
+ (instregex "VSTM[SD](IA|DB)_UPD")>;
+
+ // 4.2.43 Advanced SIMD, Element or Structure Load and Store
+ def SwiftWrite2xP2FourCy : SchedWriteRes<[SwiftUnitP2]> {
+ let Latency = 4;
+ let ResourceCycles = [2];
+ }
+ def SwiftWrite3xP2FourCy : SchedWriteRes<[SwiftUnitP2]> {
+ let Latency = 4;
+ let ResourceCycles = [3];
+ }
+ foreach Num = 1-2 in {
+ def SwiftExt#Num#xP0 : SchedWriteRes<[SwiftUnitP0]> {
+ let Latency = 0;
+ let NumMicroOps = Num;
+ let ResourceCycles = [Num];
+ }
+ }
+ // VLDx
+ // Multiple structures.
+ // Single element structure loads.
+ // We assume aligned.
+ // Single/two register.
+ def : InstRW<[SwiftWriteLM4Cy], (instregex "VLD1(d|q)(8|16|32|64)$")>;
+ def : InstRW<[SwiftWriteLM4Cy, SwiftWriteP01OneCycle],
+ (instregex "VLD1(d|q)(8|16|32|64)wb")>;
+ // Three register.
+ def : InstRW<[SwiftWrite3xP2FourCy],
+ (instregex "VLD1(d|q)(8|16|32|64)T$", "VLD1d64TPseudo")>;
+ def : InstRW<[SwiftWrite3xP2FourCy, SwiftWriteP01OneCycle],
+ (instregex "VLD1(d|q)(8|16|32|64)Twb")>;
+ /// Four Register.
+ def : InstRW<[SwiftWrite2xP2FourCy],
+ (instregex "VLD1(d|q)(8|16|32|64)Q$", "VLD1d64QPseudo")>;
+ def : InstRW<[SwiftWrite2xP2FourCy, SwiftWriteP01OneCycle],
+ (instregex "VLD1(d|q)(8|16|32|64)Qwb")>;
+ // Two element structure loads.
+ // Two/four register.
+ def : InstRW<[SwiftWriteLM9Cy, SwiftExt2xP0, SwiftVLDMPerm2],
+ (instregex "VLD2(d|q|b)(8|16|32)$", "VLD2q(8|16|32)Pseudo$")>;
+ def : InstRW<[SwiftWriteLM9Cy, SwiftWriteP01OneCycle, SwiftExt2xP0,
+ SwiftVLDMPerm2],
+ (instregex "VLD2(d|q|b)(8|16|32)wb", "VLD2q(8|16|32)PseudoWB")>;
+ // Three element structure.
+ def : InstRW<[SwiftWriteLM9Cy, SwiftWriteLM9CyNo, SwiftWriteLM9CyNo,
+ SwiftVLDMPerm3, SwiftWrite3xP2FourCy],
+ (instregex "VLD3(d|q)(8|16|32)$")>;
+ def : InstRW<[SwiftWriteLM9Cy, SwiftVLDMPerm3, SwiftWrite3xP2FourCy],
+ (instregex "VLD3(d|q)(8|16|32)(oddP|P)seudo$")>;
+
+ def : InstRW<[SwiftWriteLM9Cy, SwiftWriteLM9CyNo, SwiftWriteLM9CyNo,
+ SwiftWriteP01OneCycle, SwiftVLDMPerm3, SwiftWrite3xP2FourCy],
+ (instregex "VLD3(d|q)(8|16|32)_UPD$")>;
+ def : InstRW<[SwiftWriteLM9Cy, SwiftWriteP01OneCycle, SwiftVLDMPerm3,
+ SwiftWrite3xP2FourCy],
+ (instregex "VLD3(d|q)(8|16|32)(oddP|P)seudo_UPD")>;
+ // Four element structure loads.
+ def : InstRW<[SwiftWriteLM11Cy, SwiftWriteLM11Cy, SwiftWriteLM11Cy,
+ SwiftWriteLM11Cy, SwiftExt2xP0, SwiftVLDMPerm4,
+ SwiftWrite3xP2FourCy],
+ (instregex "VLD4(d|q)(8|16|32)$")>;
+ def : InstRW<[SwiftWriteLM11Cy, SwiftExt2xP0, SwiftVLDMPerm4,
+ SwiftWrite3xP2FourCy],
+ (instregex "VLD4(d|q)(8|16|32)(oddP|P)seudo$")>;
+ def : InstRW<[SwiftWriteLM11Cy, SwiftWriteLM11Cy, SwiftWriteLM11Cy,
+ SwiftWriteLM11Cy, SwiftWriteP01OneCycle, SwiftExt2xP0,
+ SwiftVLDMPerm4, SwiftWrite3xP2FourCy],
+ (instregex "VLD4(d|q)(8|16|32)_UPD")>;
+ def : InstRW<[SwiftWriteLM11Cy, SwiftWriteP01OneCycle, SwiftExt2xP0,
+ SwiftVLDMPerm4, SwiftWrite3xP2FourCy],
+ (instregex "VLD4(d|q)(8|16|32)(oddP|P)seudo_UPD")>;
+
+ // Single all/lane loads.
+ // One element structure.
+ def : InstRW<[SwiftWriteLM6Cy, SwiftVLDMPerm2],
+ (instregex "VLD1(LN|DUP)(d|q)(8|16|32)$", "VLD1(LN|DUP)(d|q)(8|16|32)Pseudo$")>;
+ def : InstRW<[SwiftWriteLM6Cy, SwiftWriteP01OneCycle, SwiftVLDMPerm2],
+ (instregex "VLD1(LN|DUP)(d|q)(8|16|32)(wb|_UPD)",
+ "VLD1LNq(8|16|32)Pseudo_UPD")>;
+ // Two element structure.
+ def : InstRW<[SwiftWriteLM6Cy, SwiftWriteLM6Cy, SwiftExt1xP0, SwiftVLDMPerm2],
+ (instregex "VLD2(DUP|LN)(d|q)(8|16|32|8x2|16x2|32x2)$",
+ "VLD2LN(d|q)(8|16|32)Pseudo$")>;
+ def : InstRW<[SwiftWriteLM6Cy, SwiftWriteLM6Cy, SwiftWriteP01OneCycle,
+ SwiftExt1xP0, SwiftVLDMPerm2],
+ (instregex "VLD2LN(d|q)(8|16|32)_UPD$")>;
+ def : InstRW<[SwiftWriteLM6Cy, SwiftWriteP01OneCycle, SwiftWriteLM6Cy,
+ SwiftExt1xP0, SwiftVLDMPerm2],
+ (instregex "VLD2DUPd(8|16|32|8x2|16x2|32x2)wb")>;
+ def : InstRW<[SwiftWriteLM6Cy, SwiftWriteP01OneCycle, SwiftWriteLM6Cy,
+ SwiftExt1xP0, SwiftVLDMPerm2],
+ (instregex "VLD2LN(d|q)(8|16|32)Pseudo_UPD")>;
+ // Three element structure.
+ def : InstRW<[SwiftWriteLM7Cy, SwiftWriteLM8Cy, SwiftWriteLM8Cy, SwiftExt1xP0,
+ SwiftVLDMPerm3],
+ (instregex "VLD3(DUP|LN)(d|q)(8|16|32)$",
+ "VLD3(LN|DUP)(d|q)(8|16|32)Pseudo$")>;
+ def : InstRW<[SwiftWriteLM7Cy, SwiftWriteLM8Cy, SwiftWriteLM8Cy,
+ SwiftWriteP01OneCycle, SwiftExt1xP0, SwiftVLDMPerm3],
+ (instregex "VLD3(LN|DUP)(d|q)(8|16|32)_UPD")>;
+ def : InstRW<[SwiftWriteLM7Cy, SwiftWriteP01OneCycle, SwiftWriteLM8Cy,
+ SwiftWriteLM8Cy, SwiftExt1xP0, SwiftVLDMPerm3],
+ (instregex "VLD3(LN|DUP)(d|q)(8|16|32)Pseudo_UPD")>;
+ // Four element struture.
+ def : InstRW<[SwiftWriteLM8Cy, SwiftWriteLM9Cy, SwiftWriteLM10CyNo,
+ SwiftWriteLM10CyNo, SwiftExt1xP0, SwiftVLDMPerm5],
+ (instregex "VLD4(LN|DUP)(d|q)(8|16|32)$",
+ "VLD4(LN|DUP)(d|q)(8|16|32)Pseudo$")>;
+ def : InstRW<[SwiftWriteLM8Cy, SwiftWriteLM9Cy, SwiftWriteLM10CyNo,
+ SwiftWriteLM10CyNo, SwiftWriteP01OneCycle, SwiftExt1xP0,
+ SwiftVLDMPerm5],
+ (instregex "VLD4(DUP|LN)(d|q)(8|16|32)_UPD")>;
+ def : InstRW<[SwiftWriteLM8Cy, SwiftWriteP01OneCycle, SwiftWriteLM9Cy,
+ SwiftWriteLM10CyNo, SwiftWriteLM10CyNo, SwiftExt1xP0,
+ SwiftVLDMPerm5],
+ (instregex "VLD4(DUP|LN)(d|q)(8|16|32)Pseudo_UPD")>;
+ // VSTx
+ // Multiple structures.
+ // Single element structure store.
+ def : InstRW<[SwiftWrite1xP2], (instregex "VST1d(8|16|32|64)$")>;
+ def : InstRW<[SwiftWrite2xP2], (instregex "VST1q(8|16|32|64)$")>;
+ def : InstRW<[SwiftWriteP01OneCycle, SwiftWrite1xP2],
+ (instregex "VST1d(8|16|32|64)wb")>;
+ def : InstRW<[SwiftWriteP01OneCycle, SwiftWrite2xP2],
+ (instregex "VST1q(8|16|32|64)wb")>;
+ def : InstRW<[SwiftWrite3xP2],
+ (instregex "VST1d(8|16|32|64)T$", "VST1d64TPseudo$")>;
+ def : InstRW<[SwiftWriteP01OneCycle, SwiftWrite3xP2],
+ (instregex "VST1d(8|16|32|64)Twb", "VST1d64TPseudoWB")>;
+ def : InstRW<[SwiftWrite4xP2],
+ (instregex "VST1d(8|16|32|64)(Q|QPseudo)$")>;
+ def : InstRW<[SwiftWriteP01OneCycle, SwiftWrite4xP2],
+ (instregex "VST1d(8|16|32|64)(Qwb|QPseudoWB)")>;
+ // Two element structure store.
+ def : InstRW<[SwiftWrite1xP2, SwiftVLDMPerm1],
+ (instregex "VST2(d|b)(8|16|32)$")>;
+ def : InstRW<[SwiftWriteP01OneCycle, SwiftWrite1xP2, SwiftVLDMPerm1],
+ (instregex "VST2(b|d)(8|16|32)wb")>;
+ def : InstRW<[SwiftWrite2xP2, SwiftVLDMPerm2],
+ (instregex "VST2q(8|16|32)$", "VST2q(8|16|32)Pseudo$")>;
+ def : InstRW<[SwiftWrite2xP2, SwiftVLDMPerm2],
+ (instregex "VST2q(8|16|32)wb", "VST2q(8|16|32)PseudoWB")>;
+ // Three element structure store.
+ def : InstRW<[SwiftWrite4xP2, SwiftVLDMPerm2],
+ (instregex "VST3(d|q)(8|16|32)$", "VST3(d|q)(8|16|32)(oddP|P)seudo$")>;
+ def : InstRW<[SwiftWriteP01OneCycle, SwiftWrite4xP2, SwiftVLDMPerm2],
+ (instregex "VST3(d|q)(8|16|32)_UPD",
+ "VST3(d|q)(8|16|32)(oddP|P)seudo_UPD$")>;
+ // Four element structure store.
+ def : InstRW<[SwiftWrite4xP2, SwiftVLDMPerm2],
+ (instregex "VST4(d|q)(8|16|32)$", "VST4(d|q)(8|16|32)(oddP|P)seudo$")>;
+ def : InstRW<[SwiftWriteP01OneCycle, SwiftWrite4xP2, SwiftVLDMPerm4],
+ (instregex "VST4(d|q)(8|16|32)_UPD",
+ "VST4(d|q)(8|16|32)(oddP|P)seudo_UPD$")>;
+ // Single/all lane store.
+ // One element structure.
+ def : InstRW<[SwiftWrite1xP2, SwiftVLDMPerm1],
+ (instregex "VST1LNd(8|16|32)$", "VST1LNq(8|16|32)Pseudo$")>;
+ def : InstRW<[SwiftWriteP01OneCycle, SwiftWrite1xP2, SwiftVLDMPerm1],
+ (instregex "VST1LNd(8|16|32)_UPD", "VST1LNq(8|16|32)Pseudo_UPD")>;
+ // Two element structure.
+ def : InstRW<[SwiftWrite1xP2, SwiftVLDMPerm2],
+ (instregex "VST2LN(d|q)(8|16|32)$", "VST2LN(d|q)(8|16|32)Pseudo$")>;
+ def : InstRW<[SwiftWriteP01OneCycle, SwiftWrite1xP2, SwiftVLDMPerm2],
+ (instregex "VST2LN(d|q)(8|16|32)_UPD",
+ "VST2LN(d|q)(8|16|32)Pseudo_UPD")>;
+ // Three element structure.
+ def : InstRW<[SwiftWrite4xP2, SwiftVLDMPerm2],
+ (instregex "VST3LN(d|q)(8|16|32)$", "VST3LN(d|q)(8|16|32)Pseudo$")>;
+ def : InstRW<[SwiftWriteP01OneCycle, SwiftWrite4xP2, SwiftVLDMPerm2],
+ (instregex "VST3LN(d|q)(8|16|32)_UPD",
+ "VST3LN(d|q)(8|16|32)Pseudo_UPD")>;
+ // Four element structure.
+ def : InstRW<[SwiftWrite2xP2, SwiftVLDMPerm2],
+ (instregex "VST4LN(d|q)(8|16|32)$", "VST4LN(d|q)(8|16|32)Pseudo$")>;
+ def : InstRW<[SwiftWriteP01OneCycle, SwiftWrite2xP2, SwiftVLDMPerm2],
+ (instregex "VST4LN(d|q)(8|16|32)_UPD",
+ "VST4LN(d|q)(8|16|32)Pseudo_UPD")>;
+
+ // 4.2.44 VFP, Divide and Square Root
+ def SwiftDiv17 : SchedWriteRes<[SwiftUnitP0, SwiftUnitDiv]> {
+ let NumMicroOps = 1;
+ let Latency = 17;
+ let ResourceCycles = [1, 15];
+ }
+ def SwiftDiv32 : SchedWriteRes<[SwiftUnitP0, SwiftUnitDiv]> {
+ let NumMicroOps = 1;
+ let Latency = 32;
+ let ResourceCycles = [1, 30];
+ }
+ def : InstRW<[SwiftDiv17], (instregex "VDIVS", "VSQRTS")>;
+ def : InstRW<[SwiftDiv32], (instregex "VDIVD", "VSQRTD")>;
+
+ // Not specified.
+ def : InstRW<[SwiftWriteP01OneCycle2x], (instregex "ABS")>;
+ // Preload.
+ def : WriteRes<WritePreLd, [SwiftUnitP2]> { let Latency = 0;
+ let ResourceCycles = [0];
+ }
+
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMScheduleV6.td b/contrib/llvm/lib/Target/ARM/ARMScheduleV6.td
new file mode 100644
index 0000000..57d0bfb
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMScheduleV6.td
@@ -0,0 +1,300 @@
+//===-- ARMScheduleV6.td - ARM v6 Scheduling Definitions ---*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the itinerary class data for the ARM v6 processors.
+//
+//===----------------------------------------------------------------------===//
+
+// Model based on ARM1176
+//
+// Functional Units
+def V6_Pipe : FuncUnit; // pipeline
+
+// Scheduling information derived from "ARM1176JZF-S Technical Reference Manual"
+//
+def ARMV6Itineraries : ProcessorItineraries<
+ [V6_Pipe], [], [
+ //
+ // No operand cycles
+ InstrItinData<IIC_iALUx , [InstrStage<1, [V6_Pipe]>]>,
+ //
+ // Binary Instructions that produce a result
+ InstrItinData<IIC_iALUi , [InstrStage<1, [V6_Pipe]>], [2, 2]>,
+ InstrItinData<IIC_iALUr , [InstrStage<1, [V6_Pipe]>], [2, 2, 2]>,
+ InstrItinData<IIC_iALUsi , [InstrStage<1, [V6_Pipe]>], [2, 2, 1]>,
+ InstrItinData<IIC_iALUsr , [InstrStage<2, [V6_Pipe]>], [3, 3, 2, 1]>,
+ //
+ // Bitwise Instructions that produce a result
+ InstrItinData<IIC_iBITi , [InstrStage<1, [V6_Pipe]>], [2, 2]>,
+ InstrItinData<IIC_iBITr , [InstrStage<1, [V6_Pipe]>], [2, 2, 2]>,
+ InstrItinData<IIC_iBITsi , [InstrStage<1, [V6_Pipe]>], [2, 2, 1]>,
+ InstrItinData<IIC_iBITsr , [InstrStage<2, [V6_Pipe]>], [3, 3, 2, 1]>,
+ //
+ // Unary Instructions that produce a result
+ InstrItinData<IIC_iUNAr , [InstrStage<1, [V6_Pipe]>], [2, 2]>,
+ InstrItinData<IIC_iUNAsi , [InstrStage<1, [V6_Pipe]>], [2, 1]>,
+ //
+ // Zero and sign extension instructions
+ InstrItinData<IIC_iEXTr , [InstrStage<1, [V6_Pipe]>], [1, 1]>,
+ InstrItinData<IIC_iEXTAr , [InstrStage<1, [V6_Pipe]>], [2, 2, 1]>,
+ InstrItinData<IIC_iEXTAsr , [InstrStage<2, [V6_Pipe]>], [3, 3, 2, 1]>,
+ //
+ // Compare instructions
+ InstrItinData<IIC_iCMPi , [InstrStage<1, [V6_Pipe]>], [2]>,
+ InstrItinData<IIC_iCMPr , [InstrStage<1, [V6_Pipe]>], [2, 2]>,
+ InstrItinData<IIC_iCMPsi , [InstrStage<1, [V6_Pipe]>], [2, 1]>,
+ InstrItinData<IIC_iCMPsr , [InstrStage<2, [V6_Pipe]>], [3, 2, 1]>,
+ //
+ // Test instructions
+ InstrItinData<IIC_iTSTi , [InstrStage<1, [V6_Pipe]>], [2]>,
+ InstrItinData<IIC_iTSTr , [InstrStage<1, [V6_Pipe]>], [2, 2]>,
+ InstrItinData<IIC_iTSTsi , [InstrStage<1, [V6_Pipe]>], [2, 1]>,
+ InstrItinData<IIC_iTSTsr , [InstrStage<2, [V6_Pipe]>], [3, 2, 1]>,
+ //
+ // Move instructions, unconditional
+ InstrItinData<IIC_iMOVi , [InstrStage<1, [V6_Pipe]>], [2]>,
+ InstrItinData<IIC_iMOVr , [InstrStage<1, [V6_Pipe]>], [2, 2]>,
+ InstrItinData<IIC_iMOVsi , [InstrStage<1, [V6_Pipe]>], [2, 1]>,
+ InstrItinData<IIC_iMOVsr , [InstrStage<2, [V6_Pipe]>], [3, 2, 1]>,
+ InstrItinData<IIC_iMOVix2 , [InstrStage<1, [V6_Pipe]>,
+ InstrStage<1, [V6_Pipe]>], [2]>,
+ InstrItinData<IIC_iMOVix2addpc,[InstrStage<1, [V6_Pipe]>,
+ InstrStage<1, [V6_Pipe]>,
+ InstrStage<1, [V6_Pipe]>], [3]>,
+ InstrItinData<IIC_iMOVix2ld , [InstrStage<1, [V6_Pipe]>,
+ InstrStage<1, [V6_Pipe]>,
+ InstrStage<1, [V6_Pipe]>], [5]>,
+ //
+ // Move instructions, conditional
+ InstrItinData<IIC_iCMOVi , [InstrStage<1, [V6_Pipe]>], [3]>,
+ InstrItinData<IIC_iCMOVr , [InstrStage<1, [V6_Pipe]>], [3, 2]>,
+ InstrItinData<IIC_iCMOVsi , [InstrStage<1, [V6_Pipe]>], [3, 1]>,
+ InstrItinData<IIC_iCMOVsr , [InstrStage<1, [V6_Pipe]>], [4, 2, 1]>,
+ InstrItinData<IIC_iCMOVix2 , [InstrStage<1, [V6_Pipe]>,
+ InstrStage<1, [V6_Pipe]>], [4]>,
+ //
+ // MVN instructions
+ InstrItinData<IIC_iMVNi , [InstrStage<1, [V6_Pipe]>], [2]>,
+ InstrItinData<IIC_iMVNr , [InstrStage<1, [V6_Pipe]>], [2, 2]>,
+ InstrItinData<IIC_iMVNsi , [InstrStage<1, [V6_Pipe]>], [2, 1]>,
+ InstrItinData<IIC_iMVNsr , [InstrStage<2, [V6_Pipe]>], [3, 2, 1]>,
+
+ // Integer multiply pipeline
+ //
+ InstrItinData<IIC_iMUL16 , [InstrStage<1, [V6_Pipe]>], [4, 1, 1]>,
+ InstrItinData<IIC_iMAC16 , [InstrStage<1, [V6_Pipe]>], [4, 1, 1, 2]>,
+ InstrItinData<IIC_iMUL32 , [InstrStage<2, [V6_Pipe]>], [5, 1, 1]>,
+ InstrItinData<IIC_iMAC32 , [InstrStage<2, [V6_Pipe]>], [5, 1, 1, 2]>,
+ InstrItinData<IIC_iMUL64 , [InstrStage<3, [V6_Pipe]>], [6, 1, 1]>,
+ InstrItinData<IIC_iMAC64 , [InstrStage<3, [V6_Pipe]>], [6, 1, 1, 2]>,
+
+ // Integer load pipeline
+ //
+ // Immediate offset
+ InstrItinData<IIC_iLoad_i , [InstrStage<1, [V6_Pipe]>], [4, 1]>,
+ InstrItinData<IIC_iLoad_bh_i, [InstrStage<1, [V6_Pipe]>], [4, 1]>,
+ InstrItinData<IIC_iLoad_d_i , [InstrStage<1, [V6_Pipe]>], [4, 1]>,
+ //
+ // Register offset
+ InstrItinData<IIC_iLoad_r , [InstrStage<1, [V6_Pipe]>], [4, 1, 1]>,
+ InstrItinData<IIC_iLoad_bh_r, [InstrStage<1, [V6_Pipe]>], [4, 1, 1]>,
+ InstrItinData<IIC_iLoad_d_r , [InstrStage<1, [V6_Pipe]>], [4, 1, 1]>,
+ //
+ // Scaled register offset, issues over 2 cycles
+ InstrItinData<IIC_iLoad_si , [InstrStage<2, [V6_Pipe]>], [5, 2, 1]>,
+ InstrItinData<IIC_iLoad_bh_si, [InstrStage<2, [V6_Pipe]>], [5, 2, 1]>,
+ //
+ // Immediate offset with update
+ InstrItinData<IIC_iLoad_iu , [InstrStage<1, [V6_Pipe]>], [4, 2, 1]>,
+ InstrItinData<IIC_iLoad_bh_iu, [InstrStage<1, [V6_Pipe]>], [4, 2, 1]>,
+ //
+ // Register offset with update
+ InstrItinData<IIC_iLoad_ru , [InstrStage<1, [V6_Pipe]>], [4, 2, 1, 1]>,
+ InstrItinData<IIC_iLoad_bh_ru, [InstrStage<1, [V6_Pipe]>], [4, 2, 1, 1]>,
+ InstrItinData<IIC_iLoad_d_ru , [InstrStage<1, [V6_Pipe]>], [4, 2, 1, 1]>,
+ //
+ // Scaled register offset with update, issues over 2 cycles
+ InstrItinData<IIC_iLoad_siu, [InstrStage<2, [V6_Pipe]>], [5, 2, 2, 1]>,
+ InstrItinData<IIC_iLoad_bh_siu,[InstrStage<2, [V6_Pipe]>], [5, 2, 2, 1]>,
+
+ //
+ // Load multiple, def is the 5th operand.
+ InstrItinData<IIC_iLoad_m , [InstrStage<3, [V6_Pipe]>], [1, 1, 1, 1, 4]>,
+ //
+ // Load multiple + update, defs are the 1st and 5th operands.
+ InstrItinData<IIC_iLoad_mu , [InstrStage<3, [V6_Pipe]>], [2, 1, 1, 1, 4]>,
+ //
+ // Load multiple plus branch
+ InstrItinData<IIC_iLoad_mBr, [InstrStage<3, [V6_Pipe]>,
+ InstrStage<1, [V6_Pipe]>], [1, 2, 1, 1, 4]>,
+
+ //
+ // iLoadi + iALUr for t2LDRpci_pic.
+ InstrItinData<IIC_iLoadiALU, [InstrStage<1, [V6_Pipe]>,
+ InstrStage<1, [V6_Pipe]>], [3, 1]>,
+
+ //
+ // Pop, def is the 3rd operand.
+ InstrItinData<IIC_iPop , [InstrStage<3, [V6_Pipe]>], [1, 1, 4]>,
+ //
+ // Pop + branch, def is the 3rd operand.
+ InstrItinData<IIC_iPop_Br, [InstrStage<3, [V6_Pipe]>,
+ InstrStage<1, [V6_Pipe]>], [1, 2, 4]>,
+
+ // Integer store pipeline
+ //
+ // Immediate offset
+ InstrItinData<IIC_iStore_i , [InstrStage<1, [V6_Pipe]>], [2, 1]>,
+ InstrItinData<IIC_iStore_bh_i, [InstrStage<1, [V6_Pipe]>], [2, 1]>,
+ InstrItinData<IIC_iStore_d_i , [InstrStage<1, [V6_Pipe]>], [2, 1]>,
+ //
+ // Register offset
+ InstrItinData<IIC_iStore_r , [InstrStage<1, [V6_Pipe]>], [2, 1, 1]>,
+ InstrItinData<IIC_iStore_bh_r, [InstrStage<1, [V6_Pipe]>], [2, 1, 1]>,
+ InstrItinData<IIC_iStore_d_r , [InstrStage<1, [V6_Pipe]>], [2, 1, 1]>,
+ //
+ // Scaled register offset, issues over 2 cycles
+ InstrItinData<IIC_iStore_si , [InstrStage<2, [V6_Pipe]>], [2, 2, 1]>,
+ InstrItinData<IIC_iStore_bh_si, [InstrStage<2, [V6_Pipe]>], [2, 2, 1]>,
+ //
+ // Immediate offset with update
+ InstrItinData<IIC_iStore_iu , [InstrStage<1, [V6_Pipe]>], [2, 2, 1]>,
+ InstrItinData<IIC_iStore_bh_iu, [InstrStage<1, [V6_Pipe]>], [2, 2, 1]>,
+ //
+ // Register offset with update
+ InstrItinData<IIC_iStore_ru, [InstrStage<1, [V6_Pipe]>], [2, 2, 1, 1]>,
+ InstrItinData<IIC_iStore_bh_ru,[InstrStage<1, [V6_Pipe]>], [2, 2, 1, 1]>,
+ InstrItinData<IIC_iStore_d_ru, [InstrStage<1, [V6_Pipe]>], [2, 2, 1, 1]>,
+ //
+ // Scaled register offset with update, issues over 2 cycles
+ InstrItinData<IIC_iStore_siu, [InstrStage<2, [V6_Pipe]>], [2, 2, 2, 1]>,
+ InstrItinData<IIC_iStore_bh_siu,[InstrStage<2, [V6_Pipe]>], [2, 2, 2, 1]>,
+ //
+ // Store multiple
+ InstrItinData<IIC_iStore_m , [InstrStage<3, [V6_Pipe]>]>,
+ //
+ // Store multiple + update
+ InstrItinData<IIC_iStore_mu , [InstrStage<3, [V6_Pipe]>], [2]>,
+
+ // Branch
+ //
+ // no delay slots, so the latency of a branch is unimportant
+ InstrItinData<IIC_Br , [InstrStage<1, [V6_Pipe]>]>,
+
+ // VFP
+ // Issue through integer pipeline, and execute in NEON unit. We assume
+ // RunFast mode so that NFP pipeline is used for single-precision when
+ // possible.
+ //
+ // FP Special Register to Integer Register File Move
+ InstrItinData<IIC_fpSTAT , [InstrStage<1, [V6_Pipe]>], [3]>,
+ //
+ // Single-precision FP Unary
+ InstrItinData<IIC_fpUNA32 , [InstrStage<1, [V6_Pipe]>], [5, 2]>,
+ //
+ // Double-precision FP Unary
+ InstrItinData<IIC_fpUNA64 , [InstrStage<1, [V6_Pipe]>], [5, 2]>,
+ //
+ // Single-precision FP Compare
+ InstrItinData<IIC_fpCMP32 , [InstrStage<1, [V6_Pipe]>], [2, 2]>,
+ //
+ // Double-precision FP Compare
+ InstrItinData<IIC_fpCMP64 , [InstrStage<1, [V6_Pipe]>], [2, 2]>,
+ //
+ // Single to Double FP Convert
+ InstrItinData<IIC_fpCVTSD , [InstrStage<1, [V6_Pipe]>], [5, 2]>,
+ //
+ // Double to Single FP Convert
+ InstrItinData<IIC_fpCVTDS , [InstrStage<1, [V6_Pipe]>], [5, 2]>,
+ //
+ // Single-Precision FP to Integer Convert
+ InstrItinData<IIC_fpCVTSI , [InstrStage<1, [V6_Pipe]>], [9, 2]>,
+ //
+ // Double-Precision FP to Integer Convert
+ InstrItinData<IIC_fpCVTDI , [InstrStage<1, [V6_Pipe]>], [9, 2]>,
+ //
+ // Integer to Single-Precision FP Convert
+ InstrItinData<IIC_fpCVTIS , [InstrStage<1, [V6_Pipe]>], [9, 2]>,
+ //
+ // Integer to Double-Precision FP Convert
+ InstrItinData<IIC_fpCVTID , [InstrStage<1, [V6_Pipe]>], [9, 2]>,
+ //
+ // Single-precision FP ALU
+ InstrItinData<IIC_fpALU32 , [InstrStage<1, [V6_Pipe]>], [9, 2, 2]>,
+ //
+ // Double-precision FP ALU
+ InstrItinData<IIC_fpALU64 , [InstrStage<1, [V6_Pipe]>], [9, 2, 2]>,
+ //
+ // Single-precision FP Multiply
+ InstrItinData<IIC_fpMUL32 , [InstrStage<1, [V6_Pipe]>], [9, 2, 2]>,
+ //
+ // Double-precision FP Multiply
+ InstrItinData<IIC_fpMUL64 , [InstrStage<2, [V6_Pipe]>], [9, 2, 2]>,
+ //
+ // Single-precision FP MAC
+ InstrItinData<IIC_fpMAC32 , [InstrStage<1, [V6_Pipe]>], [9, 2, 2, 2]>,
+ //
+ // Double-precision FP MAC
+ InstrItinData<IIC_fpMAC64 , [InstrStage<2, [V6_Pipe]>], [9, 2, 2, 2]>,
+ //
+ // Single-precision Fused FP MAC
+ InstrItinData<IIC_fpFMAC32, [InstrStage<1, [V6_Pipe]>], [9, 2, 2, 2]>,
+ //
+ // Double-precision Fused FP MAC
+ InstrItinData<IIC_fpFMAC64, [InstrStage<2, [V6_Pipe]>], [9, 2, 2, 2]>,
+ //
+ // Single-precision FP DIV
+ InstrItinData<IIC_fpDIV32 , [InstrStage<15, [V6_Pipe]>], [20, 2, 2]>,
+ //
+ // Double-precision FP DIV
+ InstrItinData<IIC_fpDIV64 , [InstrStage<29, [V6_Pipe]>], [34, 2, 2]>,
+ //
+ // Single-precision FP SQRT
+ InstrItinData<IIC_fpSQRT32 , [InstrStage<15, [V6_Pipe]>], [20, 2, 2]>,
+ //
+ // Double-precision FP SQRT
+ InstrItinData<IIC_fpSQRT64 , [InstrStage<29, [V6_Pipe]>], [34, 2, 2]>,
+ //
+ // Integer to Single-precision Move
+ InstrItinData<IIC_fpMOVIS, [InstrStage<1, [V6_Pipe]>], [10, 1]>,
+ //
+ // Integer to Double-precision Move
+ InstrItinData<IIC_fpMOVID, [InstrStage<1, [V6_Pipe]>], [10, 1, 1]>,
+ //
+ // Single-precision to Integer Move
+ InstrItinData<IIC_fpMOVSI, [InstrStage<1, [V6_Pipe]>], [10, 1]>,
+ //
+ // Double-precision to Integer Move
+ InstrItinData<IIC_fpMOVDI, [InstrStage<1, [V6_Pipe]>], [10, 10, 1]>,
+ //
+ // Single-precision FP Load
+ InstrItinData<IIC_fpLoad32 , [InstrStage<1, [V6_Pipe]>], [5, 2, 2]>,
+ //
+ // Double-precision FP Load
+ InstrItinData<IIC_fpLoad64 , [InstrStage<1, [V6_Pipe]>], [5, 2, 2]>,
+ //
+ // FP Load Multiple
+ InstrItinData<IIC_fpLoad_m , [InstrStage<3, [V6_Pipe]>], [2, 1, 1, 5]>,
+ //
+ // FP Load Multiple + update
+ InstrItinData<IIC_fpLoad_mu, [InstrStage<3, [V6_Pipe]>], [3, 2, 1, 1, 5]>,
+ //
+ // Single-precision FP Store
+ InstrItinData<IIC_fpStore32 , [InstrStage<1, [V6_Pipe]>], [2, 2, 2]>,
+ //
+ // Double-precision FP Store
+ // use FU_Issue to enforce the 1 load/store per cycle limit
+ InstrItinData<IIC_fpStore64 , [InstrStage<1, [V6_Pipe]>], [2, 2, 2]>,
+ //
+ // FP Store Multiple
+ InstrItinData<IIC_fpStore_m, [InstrStage<3, [V6_Pipe]>], [2, 2, 2, 2]>,
+ //
+ // FP Store Multiple + update
+ InstrItinData<IIC_fpStore_mu,[InstrStage<3, [V6_Pipe]>], [3, 2, 2, 2, 2]>
+]>;
diff --git a/contrib/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp
new file mode 100644
index 0000000..6fded9c
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp
@@ -0,0 +1,271 @@
+//===-- ARMSelectionDAGInfo.cpp - ARM SelectionDAG Info -------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the ARMSelectionDAGInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARMTargetMachine.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/IR/DerivedTypes.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "arm-selectiondag-info"
+
+// Emit, if possible, a specialized version of the given Libcall. Typically this
+// means selecting the appropriately aligned version, but we also convert memset
+// of 0 into memclr.
+SDValue ARMSelectionDAGInfo::
+EmitSpecializedLibcall(SelectionDAG &DAG, SDLoc dl,
+ SDValue Chain,
+ SDValue Dst, SDValue Src,
+ SDValue Size, unsigned Align,
+ RTLIB::Libcall LC) const {
+ const ARMSubtarget &Subtarget =
+ DAG.getMachineFunction().getSubtarget<ARMSubtarget>();
+ const ARMTargetLowering *TLI = Subtarget.getTargetLowering();
+
+ // Only use a specialized AEABI function if the default version of this
+ // Libcall is an AEABI function.
+ if (std::strncmp(TLI->getLibcallName(LC), "__aeabi", 7) != 0)
+ return SDValue();
+
+ // Translate RTLIB::Libcall to AEABILibcall. We only do this in order to be
+ // able to translate memset to memclr and use the value to index the function
+ // name array.
+ enum {
+ AEABI_MEMCPY = 0,
+ AEABI_MEMMOVE,
+ AEABI_MEMSET,
+ AEABI_MEMCLR
+ } AEABILibcall;
+ switch (LC) {
+ case RTLIB::MEMCPY:
+ AEABILibcall = AEABI_MEMCPY;
+ break;
+ case RTLIB::MEMMOVE:
+ AEABILibcall = AEABI_MEMMOVE;
+ break;
+ case RTLIB::MEMSET:
+ AEABILibcall = AEABI_MEMSET;
+ if (ConstantSDNode *ConstantSrc = dyn_cast<ConstantSDNode>(Src))
+ if (ConstantSrc->getZExtValue() == 0)
+ AEABILibcall = AEABI_MEMCLR;
+ break;
+ default:
+ return SDValue();
+ }
+
+ // Choose the most-aligned libcall variant that we can
+ enum {
+ ALIGN1 = 0,
+ ALIGN4,
+ ALIGN8
+ } AlignVariant;
+ if ((Align & 7) == 0)
+ AlignVariant = ALIGN8;
+ else if ((Align & 3) == 0)
+ AlignVariant = ALIGN4;
+ else
+ AlignVariant = ALIGN1;
+
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+ Entry.Ty = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
+ Entry.Node = Dst;
+ Args.push_back(Entry);
+ if (AEABILibcall == AEABI_MEMCLR) {
+ Entry.Node = Size;
+ Args.push_back(Entry);
+ } else if (AEABILibcall == AEABI_MEMSET) {
+ // Adjust parameters for memset, EABI uses format (ptr, size, value),
+ // GNU library uses (ptr, value, size)
+ // See RTABI section 4.3.4
+ Entry.Node = Size;
+ Args.push_back(Entry);
+
+ // Extend or truncate the argument to be an i32 value for the call.
+ if (Src.getValueType().bitsGT(MVT::i32))
+ Src = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Src);
+ else if (Src.getValueType().bitsLT(MVT::i32))
+ Src = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Src);
+
+ Entry.Node = Src;
+ Entry.Ty = Type::getInt32Ty(*DAG.getContext());
+ Entry.isSExt = false;
+ Args.push_back(Entry);
+ } else {
+ Entry.Node = Src;
+ Args.push_back(Entry);
+
+ Entry.Node = Size;
+ Args.push_back(Entry);
+ }
+
+ char const *FunctionNames[4][3] = {
+ { "__aeabi_memcpy", "__aeabi_memcpy4", "__aeabi_memcpy8" },
+ { "__aeabi_memmove", "__aeabi_memmove4", "__aeabi_memmove8" },
+ { "__aeabi_memset", "__aeabi_memset4", "__aeabi_memset8" },
+ { "__aeabi_memclr", "__aeabi_memclr4", "__aeabi_memclr8" }
+ };
+ TargetLowering::CallLoweringInfo CLI(DAG);
+ CLI.setDebugLoc(dl)
+ .setChain(Chain)
+ .setCallee(
+ TLI->getLibcallCallingConv(LC), Type::getVoidTy(*DAG.getContext()),
+ DAG.getExternalSymbol(FunctionNames[AEABILibcall][AlignVariant],
+ TLI->getPointerTy(DAG.getDataLayout())),
+ std::move(Args), 0)
+ .setDiscardResult();
+ std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI);
+
+ return CallResult.second;
+}
+
+SDValue
+ARMSelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl,
+ SDValue Chain,
+ SDValue Dst, SDValue Src,
+ SDValue Size, unsigned Align,
+ bool isVolatile, bool AlwaysInline,
+ MachinePointerInfo DstPtrInfo,
+ MachinePointerInfo SrcPtrInfo) const {
+ const ARMSubtarget &Subtarget =
+ DAG.getMachineFunction().getSubtarget<ARMSubtarget>();
+ // Do repeated 4-byte loads and stores. To be improved.
+ // This requires 4-byte alignment.
+ if ((Align & 3) != 0)
+ return SDValue();
+ // This requires the copy size to be a constant, preferably
+ // within a subtarget-specific limit.
+ ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
+ if (!ConstantSize)
+ return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size, Align,
+ RTLIB::MEMCPY);
+ uint64_t SizeVal = ConstantSize->getZExtValue();
+ if (!AlwaysInline && SizeVal > Subtarget.getMaxInlineSizeThreshold())
+ return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size, Align,
+ RTLIB::MEMCPY);
+
+ unsigned BytesLeft = SizeVal & 3;
+ unsigned NumMemOps = SizeVal >> 2;
+ unsigned EmittedNumMemOps = 0;
+ EVT VT = MVT::i32;
+ unsigned VTSize = 4;
+ unsigned i = 0;
+ // Emit a maximum of 4 loads in Thumb1 since we have fewer registers
+ const unsigned MaxLoadsInLDM = Subtarget.isThumb1Only() ? 4 : 6;
+ SDValue TFOps[6];
+ SDValue Loads[6];
+ uint64_t SrcOff = 0, DstOff = 0;
+
+ // FIXME: We should invent a VMEMCPY pseudo-instruction that lowers to
+ // VLDM/VSTM and make this code emit it when appropriate. This would reduce
+ // pressure on the general purpose registers. However this seems harder to map
+ // onto the register allocator's view of the world.
+
+ // The number of MEMCPY pseudo-instructions to emit. We use up to
+ // MaxLoadsInLDM registers per mcopy, which will get lowered into ldm/stm
+ // later on. This is a lower bound on the number of MEMCPY operations we must
+ // emit.
+ unsigned NumMEMCPYs = (NumMemOps + MaxLoadsInLDM - 1) / MaxLoadsInLDM;
+
+ SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other, MVT::Glue);
+
+ for (unsigned I = 0; I != NumMEMCPYs; ++I) {
+ // Evenly distribute registers among MEMCPY operations to reduce register
+ // pressure.
+ unsigned NextEmittedNumMemOps = NumMemOps * (I + 1) / NumMEMCPYs;
+ unsigned NumRegs = NextEmittedNumMemOps - EmittedNumMemOps;
+
+ Dst = DAG.getNode(ARMISD::MEMCPY, dl, VTs, Chain, Dst, Src,
+ DAG.getConstant(NumRegs, dl, MVT::i32));
+ Src = Dst.getValue(1);
+ Chain = Dst.getValue(2);
+
+ DstPtrInfo = DstPtrInfo.getWithOffset(NumRegs * VTSize);
+ SrcPtrInfo = SrcPtrInfo.getWithOffset(NumRegs * VTSize);
+
+ EmittedNumMemOps = NextEmittedNumMemOps;
+ }
+
+ if (BytesLeft == 0)
+ return Chain;
+
+ // Issue loads / stores for the trailing (1 - 3) bytes.
+ unsigned BytesLeftSave = BytesLeft;
+ i = 0;
+ while (BytesLeft) {
+ if (BytesLeft >= 2) {
+ VT = MVT::i16;
+ VTSize = 2;
+ } else {
+ VT = MVT::i8;
+ VTSize = 1;
+ }
+
+ Loads[i] = DAG.getLoad(VT, dl, Chain,
+ DAG.getNode(ISD::ADD, dl, MVT::i32, Src,
+ DAG.getConstant(SrcOff, dl, MVT::i32)),
+ SrcPtrInfo.getWithOffset(SrcOff),
+ false, false, false, 0);
+ TFOps[i] = Loads[i].getValue(1);
+ ++i;
+ SrcOff += VTSize;
+ BytesLeft -= VTSize;
+ }
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ makeArrayRef(TFOps, i));
+
+ i = 0;
+ BytesLeft = BytesLeftSave;
+ while (BytesLeft) {
+ if (BytesLeft >= 2) {
+ VT = MVT::i16;
+ VTSize = 2;
+ } else {
+ VT = MVT::i8;
+ VTSize = 1;
+ }
+
+ TFOps[i] = DAG.getStore(Chain, dl, Loads[i],
+ DAG.getNode(ISD::ADD, dl, MVT::i32, Dst,
+ DAG.getConstant(DstOff, dl, MVT::i32)),
+ DstPtrInfo.getWithOffset(DstOff), false, false, 0);
+ ++i;
+ DstOff += VTSize;
+ BytesLeft -= VTSize;
+ }
+ return DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ makeArrayRef(TFOps, i));
+}
+
+
+SDValue ARMSelectionDAGInfo::
+EmitTargetCodeForMemmove(SelectionDAG &DAG, SDLoc dl,
+ SDValue Chain,
+ SDValue Dst, SDValue Src,
+ SDValue Size, unsigned Align,
+ bool isVolatile,
+ MachinePointerInfo DstPtrInfo,
+ MachinePointerInfo SrcPtrInfo) const {
+ return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size, Align,
+ RTLIB::MEMMOVE);
+}
+
+
+SDValue ARMSelectionDAGInfo::
+EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc dl,
+ SDValue Chain, SDValue Dst,
+ SDValue Src, SDValue Size,
+ unsigned Align, bool isVolatile,
+ MachinePointerInfo DstPtrInfo) const {
+ return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size, Align,
+ RTLIB::MEMSET);
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMSelectionDAGInfo.h b/contrib/llvm/lib/Target/ARM/ARMSelectionDAGInfo.h
new file mode 100644
index 0000000..289879e
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMSelectionDAGInfo.h
@@ -0,0 +1,73 @@
+//===-- ARMSelectionDAGInfo.h - ARM SelectionDAG Info -----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the ARM subclass for TargetSelectionDAGInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_ARM_ARMSELECTIONDAGINFO_H
+#define LLVM_LIB_TARGET_ARM_ARMSELECTIONDAGINFO_H
+
+#include "MCTargetDesc/ARMAddressingModes.h"
+#include "llvm/Target/TargetSelectionDAGInfo.h"
+
+namespace llvm {
+
+namespace ARM_AM {
+ static inline ShiftOpc getShiftOpcForNode(unsigned Opcode) {
+ switch (Opcode) {
+ default: return ARM_AM::no_shift;
+ case ISD::SHL: return ARM_AM::lsl;
+ case ISD::SRL: return ARM_AM::lsr;
+ case ISD::SRA: return ARM_AM::asr;
+ case ISD::ROTR: return ARM_AM::ror;
+ //case ISD::ROTL: // Only if imm -> turn into ROTR.
+ // Can't handle RRX here, because it would require folding a flag into
+ // the addressing mode. :( This causes us to miss certain things.
+ //case ARMISD::RRX: return ARM_AM::rrx;
+ }
+ }
+} // end namespace ARM_AM
+
+class ARMSelectionDAGInfo : public TargetSelectionDAGInfo {
+public:
+
+ SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl,
+ SDValue Chain,
+ SDValue Dst, SDValue Src,
+ SDValue Size, unsigned Align,
+ bool isVolatile, bool AlwaysInline,
+ MachinePointerInfo DstPtrInfo,
+ MachinePointerInfo SrcPtrInfo) const override;
+
+ SDValue EmitTargetCodeForMemmove(SelectionDAG &DAG, SDLoc dl,
+ SDValue Chain,
+ SDValue Dst, SDValue Src,
+ SDValue Size, unsigned Align, bool isVolatile,
+ MachinePointerInfo DstPtrInfo,
+ MachinePointerInfo SrcPtrInfo) const override;
+
+ // Adjust parameters for memset, see RTABI section 4.3.4
+ SDValue EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc dl,
+ SDValue Chain,
+ SDValue Op1, SDValue Op2,
+ SDValue Op3, unsigned Align,
+ bool isVolatile,
+ MachinePointerInfo DstPtrInfo) const override;
+
+ SDValue EmitSpecializedLibcall(SelectionDAG &DAG, SDLoc dl,
+ SDValue Chain,
+ SDValue Dst, SDValue Src,
+ SDValue Size, unsigned Align,
+ RTLIB::Libcall LC) const;
+};
+
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/ARM/ARMSubtarget.cpp b/contrib/llvm/lib/Target/ARM/ARMSubtarget.cpp
new file mode 100644
index 0000000..bb6ae28
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMSubtarget.cpp
@@ -0,0 +1,366 @@
+//===-- ARMSubtarget.cpp - ARM Subtarget Information ----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the ARM specific subclass of TargetSubtargetInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARMSubtarget.h"
+#include "ARMFrameLowering.h"
+#include "ARMISelLowering.h"
+#include "ARMInstrInfo.h"
+#include "ARMMachineFunctionInfo.h"
+#include "ARMSelectionDAGInfo.h"
+#include "ARMSubtarget.h"
+#include "ARMTargetMachine.h"
+#include "Thumb1FrameLowering.h"
+#include "Thumb1InstrInfo.h"
+#include "Thumb2InstrInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "arm-subtarget"
+
+#define GET_SUBTARGETINFO_TARGET_DESC
+#define GET_SUBTARGETINFO_CTOR
+#include "ARMGenSubtargetInfo.inc"
+
+static cl::opt<bool>
+UseFusedMulOps("arm-use-mulops",
+ cl::init(true), cl::Hidden);
+
+enum ITMode {
+ DefaultIT,
+ RestrictedIT,
+ NoRestrictedIT
+};
+
+static cl::opt<ITMode>
+IT(cl::desc("IT block support"), cl::Hidden, cl::init(DefaultIT),
+ cl::ZeroOrMore,
+ cl::values(clEnumValN(DefaultIT, "arm-default-it",
+ "Generate IT block based on arch"),
+ clEnumValN(RestrictedIT, "arm-restrict-it",
+ "Disallow deprecated IT based on ARMv8"),
+ clEnumValN(NoRestrictedIT, "arm-no-restrict-it",
+ "Allow IT blocks based on ARMv7"),
+ clEnumValEnd));
+
+/// ForceFastISel - Use the fast-isel, even for subtargets where it is not
+/// currently supported (for testing only).
+static cl::opt<bool>
+ForceFastISel("arm-force-fast-isel",
+ cl::init(false), cl::Hidden);
+
+/// initializeSubtargetDependencies - Initializes using a CPU and feature string
+/// so that we can use initializer lists for subtarget initialization.
+ARMSubtarget &ARMSubtarget::initializeSubtargetDependencies(StringRef CPU,
+ StringRef FS) {
+ initializeEnvironment();
+ initSubtargetFeatures(CPU, FS);
+ return *this;
+}
+
+ARMFrameLowering *ARMSubtarget::initializeFrameLowering(StringRef CPU,
+ StringRef FS) {
+ ARMSubtarget &STI = initializeSubtargetDependencies(CPU, FS);
+ if (STI.isThumb1Only())
+ return (ARMFrameLowering *)new Thumb1FrameLowering(STI);
+
+ return new ARMFrameLowering(STI);
+}
+
+ARMSubtarget::ARMSubtarget(const Triple &TT, const std::string &CPU,
+ const std::string &FS,
+ const ARMBaseTargetMachine &TM, bool IsLittle)
+ : ARMGenSubtargetInfo(TT, CPU, FS), ARMProcFamily(Others),
+ ARMProcClass(None), ARMArch(ARMv4t), stackAlignment(4), CPUString(CPU),
+ IsLittle(IsLittle), TargetTriple(TT), Options(TM.Options), TM(TM),
+ FrameLowering(initializeFrameLowering(CPU, FS)),
+ // At this point initializeSubtargetDependencies has been called so
+ // we can query directly.
+ InstrInfo(isThumb1Only()
+ ? (ARMBaseInstrInfo *)new Thumb1InstrInfo(*this)
+ : !isThumb()
+ ? (ARMBaseInstrInfo *)new ARMInstrInfo(*this)
+ : (ARMBaseInstrInfo *)new Thumb2InstrInfo(*this)),
+ TLInfo(TM, *this) {}
+
+void ARMSubtarget::initializeEnvironment() {
+ HasV4TOps = false;
+ HasV5TOps = false;
+ HasV5TEOps = false;
+ HasV6Ops = false;
+ HasV6MOps = false;
+ HasV6KOps = false;
+ HasV6T2Ops = false;
+ HasV7Ops = false;
+ HasV8Ops = false;
+ HasV8_1aOps = false;
+ HasV8_2aOps = false;
+ HasVFPv2 = false;
+ HasVFPv3 = false;
+ HasVFPv4 = false;
+ HasFPARMv8 = false;
+ HasNEON = false;
+ UseNEONForSinglePrecisionFP = false;
+ UseMulOps = UseFusedMulOps;
+ SlowFPVMLx = false;
+ HasVMLxForwarding = false;
+ SlowFPBrcc = false;
+ InThumbMode = false;
+ UseSoftFloat = false;
+ HasThumb2 = false;
+ NoARM = false;
+ ReserveR9 = false;
+ NoMovt = false;
+ SupportsTailCall = false;
+ HasFP16 = false;
+ HasFullFP16 = false;
+ HasD16 = false;
+ HasHardwareDivide = false;
+ HasHardwareDivideInARM = false;
+ HasT2ExtractPack = false;
+ HasDataBarrier = false;
+ Pref32BitThumb = false;
+ AvoidCPSRPartialUpdate = false;
+ AvoidMOVsShifterOperand = false;
+ HasRAS = false;
+ HasMPExtension = false;
+ HasVirtualization = false;
+ FPOnlySP = false;
+ HasPerfMon = false;
+ HasTrustZone = false;
+ HasCrypto = false;
+ HasCRC = false;
+ HasZeroCycleZeroing = false;
+ StrictAlign = false;
+ HasDSP = false;
+ UseNaClTrap = false;
+ GenLongCalls = false;
+ UnsafeFPMath = false;
+
+ // MCAsmInfo isn't always present (e.g. in opt) so we can't initialize this
+ // directly from it, but we can try to make sure they're consistent when both
+ // available.
+ UseSjLjEH = isTargetDarwin() && !isTargetWatchOS();
+ assert((!TM.getMCAsmInfo() ||
+ (TM.getMCAsmInfo()->getExceptionHandlingType() ==
+ ExceptionHandling::SjLj) == UseSjLjEH) &&
+ "inconsistent sjlj choice between CodeGen and MC");
+}
+
+void ARMSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
+ if (CPUString.empty()) {
+ CPUString = "generic";
+
+ if (isTargetDarwin()) {
+ StringRef ArchName = TargetTriple.getArchName();
+ if (ArchName.endswith("v7s"))
+ // Default to the Swift CPU when targeting armv7s/thumbv7s.
+ CPUString = "swift";
+ else if (ArchName.endswith("v7k"))
+ // Default to the Cortex-a7 CPU when targeting armv7k/thumbv7k.
+ // ARMv7k does not use SjLj exception handling.
+ CPUString = "cortex-a7";
+ }
+ }
+
+ // Insert the architecture feature derived from the target triple into the
+ // feature string. This is important for setting features that are implied
+ // based on the architecture version.
+ std::string ArchFS = ARM_MC::ParseARMTriple(TargetTriple, CPUString);
+ if (!FS.empty()) {
+ if (!ArchFS.empty())
+ ArchFS = (Twine(ArchFS) + "," + FS).str();
+ else
+ ArchFS = FS;
+ }
+ ParseSubtargetFeatures(CPUString, ArchFS);
+
+ // FIXME: This used enable V6T2 support implicitly for Thumb2 mode.
+ // Assert this for now to make the change obvious.
+ assert(hasV6T2Ops() || !hasThumb2());
+
+ // Keep a pointer to static instruction cost data for the specified CPU.
+ SchedModel = getSchedModelForCPU(CPUString);
+
+ // Initialize scheduling itinerary for the specified CPU.
+ InstrItins = getInstrItineraryForCPU(CPUString);
+
+ // FIXME: this is invalid for WindowsCE
+ if (isTargetWindows())
+ NoARM = true;
+
+ if (isAAPCS_ABI())
+ stackAlignment = 8;
+ if (isTargetNaCl() || isAAPCS16_ABI())
+ stackAlignment = 16;
+
+ // FIXME: Completely disable sibcall for Thumb1 since ThumbRegisterInfo::
+ // emitEpilogue is not ready for them. Thumb tail calls also use t2B, as
+ // the Thumb1 16-bit unconditional branch doesn't have sufficient relocation
+ // support in the assembler and linker to be used. This would need to be
+ // fixed to fully support tail calls in Thumb1.
+ //
+ // Doing this is tricky, since the LDM/POP instruction on Thumb doesn't take
+ // LR. This means if we need to reload LR, it takes an extra instructions,
+ // which outweighs the value of the tail call; but here we don't know yet
+ // whether LR is going to be used. Probably the right approach is to
+ // generate the tail call here and turn it back into CALL/RET in
+ // emitEpilogue if LR is used.
+
+ // Thumb1 PIC calls to external symbols use BX, so they can be tail calls,
+ // but we need to make sure there are enough registers; the only valid
+ // registers are the 4 used for parameters. We don't currently do this
+ // case.
+
+ SupportsTailCall = !isThumb1Only();
+
+ if (isTargetMachO() && isTargetIOS() && getTargetTriple().isOSVersionLT(5, 0))
+ SupportsTailCall = false;
+
+ switch (IT) {
+ case DefaultIT:
+ RestrictIT = hasV8Ops();
+ break;
+ case RestrictedIT:
+ RestrictIT = true;
+ break;
+ case NoRestrictedIT:
+ RestrictIT = false;
+ break;
+ }
+
+ // NEON f32 ops are non-IEEE 754 compliant. Darwin is ok with it by default.
+ const FeatureBitset &Bits = getFeatureBits();
+ if ((Bits[ARM::ProcA5] || Bits[ARM::ProcA8]) && // Where this matters
+ (Options.UnsafeFPMath || isTargetDarwin()))
+ UseNEONForSinglePrecisionFP = true;
+}
+
+bool ARMSubtarget::isAPCS_ABI() const {
+ assert(TM.TargetABI != ARMBaseTargetMachine::ARM_ABI_UNKNOWN);
+ return TM.TargetABI == ARMBaseTargetMachine::ARM_ABI_APCS;
+}
+bool ARMSubtarget::isAAPCS_ABI() const {
+ assert(TM.TargetABI != ARMBaseTargetMachine::ARM_ABI_UNKNOWN);
+ return TM.TargetABI == ARMBaseTargetMachine::ARM_ABI_AAPCS ||
+ TM.TargetABI == ARMBaseTargetMachine::ARM_ABI_AAPCS16;
+}
+bool ARMSubtarget::isAAPCS16_ABI() const {
+ assert(TM.TargetABI != ARMBaseTargetMachine::ARM_ABI_UNKNOWN);
+ return TM.TargetABI == ARMBaseTargetMachine::ARM_ABI_AAPCS16;
+}
+
+
+/// GVIsIndirectSymbol - true if the GV will be accessed via an indirect symbol.
+bool
+ARMSubtarget::GVIsIndirectSymbol(const GlobalValue *GV,
+ Reloc::Model RelocM) const {
+ if (RelocM == Reloc::Static)
+ return false;
+
+ bool isDef = GV->isStrongDefinitionForLinker();
+
+ if (!isTargetMachO()) {
+ // Extra load is needed for all externally visible.
+ if (GV->hasLocalLinkage() || GV->hasHiddenVisibility())
+ return false;
+ return true;
+ } else {
+ // If this is a strong reference to a definition, it is definitely not
+ // through a stub.
+ if (isDef)
+ return false;
+
+ // Unless we have a symbol with hidden visibility, we have to go through a
+ // normal $non_lazy_ptr stub because this symbol might be resolved late.
+ if (!GV->hasHiddenVisibility()) // Non-hidden $non_lazy_ptr reference.
+ return true;
+
+ if (RelocM == Reloc::PIC_) {
+ // If symbol visibility is hidden, we have a stub for common symbol
+ // references and external declarations.
+ if (GV->isDeclarationForLinker() || GV->hasCommonLinkage())
+ // Hidden $non_lazy_ptr reference.
+ return true;
+ }
+ }
+
+ return false;
+}
+
+unsigned ARMSubtarget::getMispredictionPenalty() const {
+ return SchedModel.MispredictPenalty;
+}
+
+bool ARMSubtarget::hasSinCos() const {
+ return isTargetWatchOS() ||
+ (isTargetIOS() && !getTargetTriple().isOSVersionLT(7, 0));
+}
+
+bool ARMSubtarget::enableMachineScheduler() const {
+ // Enable the MachineScheduler before register allocation for out-of-order
+ // architectures where we do not use the PostRA scheduler anymore (for now
+ // restricted to swift).
+ return getSchedModel().isOutOfOrder() && isSwift();
+}
+
+// This overrides the PostRAScheduler bit in the SchedModel for any CPU.
+bool ARMSubtarget::enablePostRAScheduler() const {
+ // No need for PostRA scheduling on out of order CPUs (for now restricted to
+ // swift).
+ if (getSchedModel().isOutOfOrder() && isSwift())
+ return false;
+ return (!isThumb() || hasThumb2());
+}
+
+bool ARMSubtarget::enableAtomicExpand() const {
+ return hasAnyDataBarrier() && !isThumb1Only();
+}
+
+bool ARMSubtarget::useStride4VFPs(const MachineFunction &MF) const {
+ // For general targets, the prologue can grow when VFPs are allocated with
+ // stride 4 (more vpush instructions). But WatchOS uses a compact unwind
+ // format which it's more important to get right.
+ return isTargetWatchOS() || (isSwift() && !MF.getFunction()->optForMinSize());
+}
+
+bool ARMSubtarget::useMovt(const MachineFunction &MF) const {
+ // NOTE Windows on ARM needs to use mov.w/mov.t pairs to materialise 32-bit
+ // immediates as it is inherently position independent, and may be out of
+ // range otherwise.
+ return !NoMovt && hasV6T2Ops() &&
+ (isTargetWindows() || !MF.getFunction()->optForMinSize());
+}
+
+bool ARMSubtarget::useFastISel() const {
+ // Enable fast-isel for any target, for testing only.
+ if (ForceFastISel)
+ return true;
+
+ // Limit fast-isel to the targets that are or have been tested.
+ if (!hasV6Ops())
+ return false;
+
+ // Thumb2 support on iOS; ARM support on iOS, Linux and NaCl.
+ return TM.Options.EnableFastISel &&
+ ((isTargetMachO() && !isThumb1Only()) ||
+ (isTargetLinux() && !isThumb()) || (isTargetNaCl() && !isThumb()));
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMSubtarget.h b/contrib/llvm/lib/Target/ARM/ARMSubtarget.h
new file mode 100644
index 0000000..4d54e57
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMSubtarget.h
@@ -0,0 +1,488 @@
+//===-- ARMSubtarget.h - Define Subtarget for the ARM ----------*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the ARM specific subclass of TargetSubtargetInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_ARM_ARMSUBTARGET_H
+#define LLVM_LIB_TARGET_ARM_ARMSUBTARGET_H
+
+
+#include "ARMFrameLowering.h"
+#include "ARMISelLowering.h"
+#include "ARMInstrInfo.h"
+#include "ARMSelectionDAGInfo.h"
+#include "ARMSubtarget.h"
+#include "MCTargetDesc/ARMMCTargetDesc.h"
+#include "Thumb1FrameLowering.h"
+#include "Thumb1InstrInfo.h"
+#include "Thumb2InstrInfo.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/MC/MCInstrItineraries.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include <string>
+
+#define GET_SUBTARGETINFO_HEADER
+#include "ARMGenSubtargetInfo.inc"
+
+namespace llvm {
+class GlobalValue;
+class StringRef;
+class TargetOptions;
+class ARMBaseTargetMachine;
+
+class ARMSubtarget : public ARMGenSubtargetInfo {
+protected:
+ enum ARMProcFamilyEnum {
+ Others, CortexA5, CortexA7, CortexA8, CortexA9, CortexA12, CortexA15,
+ CortexA17, CortexR4, CortexR4F, CortexR5, CortexR7, CortexA35, CortexA53,
+ CortexA57, CortexA72, Krait, Swift, ExynosM1
+ };
+ enum ARMProcClassEnum {
+ None, AClass, RClass, MClass
+ };
+ enum ARMArchEnum {
+ ARMv2, ARMv2a, ARMv3, ARMv3m, ARMv4, ARMv4t, ARMv5, ARMv5t, ARMv5te,
+ ARMv5tej, ARMv6, ARMv6k, ARMv6kz, ARMv6t2, ARMv6m, ARMv6sm, ARMv7a, ARMv7r,
+ ARMv7m, ARMv7em, ARMv8a, ARMv81a, ARMv82a
+ };
+
+ /// ARMProcFamily - ARM processor family: Cortex-A8, Cortex-A9, and others.
+ ARMProcFamilyEnum ARMProcFamily;
+
+ /// ARMProcClass - ARM processor class: None, AClass, RClass or MClass.
+ ARMProcClassEnum ARMProcClass;
+
+ /// ARMArch - ARM architecture
+ ARMArchEnum ARMArch;
+
+ /// HasV4TOps, HasV5TOps, HasV5TEOps,
+ /// HasV6Ops, HasV6MOps, HasV6KOps, HasV6T2Ops, HasV7Ops, HasV8Ops -
+ /// Specify whether target support specific ARM ISA variants.
+ bool HasV4TOps;
+ bool HasV5TOps;
+ bool HasV5TEOps;
+ bool HasV6Ops;
+ bool HasV6MOps;
+ bool HasV6KOps;
+ bool HasV6T2Ops;
+ bool HasV7Ops;
+ bool HasV8Ops;
+ bool HasV8_1aOps;
+ bool HasV8_2aOps;
+
+ /// HasVFPv2, HasVFPv3, HasVFPv4, HasFPARMv8, HasNEON - Specify what
+ /// floating point ISAs are supported.
+ bool HasVFPv2;
+ bool HasVFPv3;
+ bool HasVFPv4;
+ bool HasFPARMv8;
+ bool HasNEON;
+
+ /// UseNEONForSinglePrecisionFP - if the NEONFP attribute has been
+ /// specified. Use the method useNEONForSinglePrecisionFP() to
+ /// determine if NEON should actually be used.
+ bool UseNEONForSinglePrecisionFP;
+
+ /// UseMulOps - True if non-microcoded fused integer multiply-add and
+ /// multiply-subtract instructions should be used.
+ bool UseMulOps;
+
+ /// SlowFPVMLx - If the VFP2 / NEON instructions are available, indicates
+ /// whether the FP VML[AS] instructions are slow (if so, don't use them).
+ bool SlowFPVMLx;
+
+ /// HasVMLxForwarding - If true, NEON has special multiplier accumulator
+ /// forwarding to allow mul + mla being issued back to back.
+ bool HasVMLxForwarding;
+
+ /// SlowFPBrcc - True if floating point compare + branch is slow.
+ bool SlowFPBrcc;
+
+ /// InThumbMode - True if compiling for Thumb, false for ARM.
+ bool InThumbMode;
+
+ /// UseSoftFloat - True if we're using software floating point features.
+ bool UseSoftFloat;
+
+ /// HasThumb2 - True if Thumb2 instructions are supported.
+ bool HasThumb2;
+
+ /// NoARM - True if subtarget does not support ARM mode execution.
+ bool NoARM;
+
+ /// ReserveR9 - True if R9 is not available as a general purpose register.
+ bool ReserveR9;
+
+ /// NoMovt - True if MOVT / MOVW pairs are not used for materialization of
+ /// 32-bit imms (including global addresses).
+ bool NoMovt;
+
+ /// SupportsTailCall - True if the OS supports tail call. The dynamic linker
+ /// must be able to synthesize call stubs for interworking between ARM and
+ /// Thumb.
+ bool SupportsTailCall;
+
+ /// HasFP16 - True if subtarget supports half-precision FP conversions
+ bool HasFP16;
+
+ /// HasFullFP16 - True if subtarget supports half-precision FP operations
+ bool HasFullFP16;
+
+ /// HasD16 - True if subtarget is limited to 16 double precision
+ /// FP registers for VFPv3.
+ bool HasD16;
+
+ /// HasHardwareDivide - True if subtarget supports [su]div
+ bool HasHardwareDivide;
+
+ /// HasHardwareDivideInARM - True if subtarget supports [su]div in ARM mode
+ bool HasHardwareDivideInARM;
+
+ /// HasT2ExtractPack - True if subtarget supports thumb2 extract/pack
+ /// instructions.
+ bool HasT2ExtractPack;
+
+ /// HasDataBarrier - True if the subtarget supports DMB / DSB data barrier
+ /// instructions.
+ bool HasDataBarrier;
+
+ /// Pref32BitThumb - If true, codegen would prefer 32-bit Thumb instructions
+ /// over 16-bit ones.
+ bool Pref32BitThumb;
+
+ /// AvoidCPSRPartialUpdate - If true, codegen would avoid using instructions
+ /// that partially update CPSR and add false dependency on the previous
+ /// CPSR setting instruction.
+ bool AvoidCPSRPartialUpdate;
+
+ /// AvoidMOVsShifterOperand - If true, codegen should avoid using flag setting
+ /// movs with shifter operand (i.e. asr, lsl, lsr).
+ bool AvoidMOVsShifterOperand;
+
+ /// HasRAS - Some processors perform return stack prediction. CodeGen should
+ /// avoid issue "normal" call instructions to callees which do not return.
+ bool HasRAS;
+
+ /// HasMPExtension - True if the subtarget supports Multiprocessing
+ /// extension (ARMv7 only).
+ bool HasMPExtension;
+
+ /// HasVirtualization - True if the subtarget supports the Virtualization
+ /// extension.
+ bool HasVirtualization;
+
+ /// FPOnlySP - If true, the floating point unit only supports single
+ /// precision.
+ bool FPOnlySP;
+
+ /// If true, the processor supports the Performance Monitor Extensions. These
+ /// include a generic cycle-counter as well as more fine-grained (often
+ /// implementation-specific) events.
+ bool HasPerfMon;
+
+ /// HasTrustZone - if true, processor supports TrustZone security extensions
+ bool HasTrustZone;
+
+ /// HasCrypto - if true, processor supports Cryptography extensions
+ bool HasCrypto;
+
+ /// HasCRC - if true, processor supports CRC instructions
+ bool HasCRC;
+
+ /// If true, the instructions "vmov.i32 d0, #0" and "vmov.i32 q0, #0" are
+ /// particularly effective at zeroing a VFP register.
+ bool HasZeroCycleZeroing;
+
+ /// StrictAlign - If true, the subtarget disallows unaligned memory
+ /// accesses for some types. For details, see
+ /// ARMTargetLowering::allowsMisalignedMemoryAccesses().
+ bool StrictAlign;
+
+ /// RestrictIT - If true, the subtarget disallows generation of deprecated IT
+ /// blocks to conform to ARMv8 rule.
+ bool RestrictIT;
+
+ /// HasDSP - If true, the subtarget supports the DSP (saturating arith
+ /// and such) instructions.
+ bool HasDSP;
+
+ /// NaCl TRAP instruction is generated instead of the regular TRAP.
+ bool UseNaClTrap;
+
+ /// Generate calls via indirect call instructions.
+ bool GenLongCalls;
+
+ /// Target machine allowed unsafe FP math (such as use of NEON fp)
+ bool UnsafeFPMath;
+
+ /// UseSjLjEH - If true, the target uses SjLj exception handling (e.g. iOS).
+ bool UseSjLjEH;
+
+ /// stackAlignment - The minimum alignment known to hold of the stack frame on
+ /// entry to the function and which must be maintained by every function.
+ unsigned stackAlignment;
+
+ /// CPUString - String name of used CPU.
+ std::string CPUString;
+
+ /// IsLittle - The target is Little Endian
+ bool IsLittle;
+
+ /// TargetTriple - What processor and OS we're targeting.
+ Triple TargetTriple;
+
+ /// SchedModel - Processor specific instruction costs.
+ MCSchedModel SchedModel;
+
+ /// Selected instruction itineraries (one entry per itinerary class.)
+ InstrItineraryData InstrItins;
+
+ /// Options passed via command line that could influence the target
+ const TargetOptions &Options;
+
+ const ARMBaseTargetMachine &TM;
+
+public:
+ /// This constructor initializes the data members to match that
+ /// of the specified triple.
+ ///
+ ARMSubtarget(const Triple &TT, const std::string &CPU, const std::string &FS,
+ const ARMBaseTargetMachine &TM, bool IsLittle);
+
+ /// getMaxInlineSizeThreshold - Returns the maximum memset / memcpy size
+ /// that still makes it profitable to inline the call.
+ unsigned getMaxInlineSizeThreshold() const {
+ return 64;
+ }
+ /// ParseSubtargetFeatures - Parses features string setting specified
+ /// subtarget options. Definition of function is auto generated by tblgen.
+ void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
+
+ /// initializeSubtargetDependencies - Initializes using a CPU and feature string
+ /// so that we can use initializer lists for subtarget initialization.
+ ARMSubtarget &initializeSubtargetDependencies(StringRef CPU, StringRef FS);
+
+ const ARMSelectionDAGInfo *getSelectionDAGInfo() const override {
+ return &TSInfo;
+ }
+ const ARMBaseInstrInfo *getInstrInfo() const override {
+ return InstrInfo.get();
+ }
+ const ARMTargetLowering *getTargetLowering() const override {
+ return &TLInfo;
+ }
+ const ARMFrameLowering *getFrameLowering() const override {
+ return FrameLowering.get();
+ }
+ const ARMBaseRegisterInfo *getRegisterInfo() const override {
+ return &InstrInfo->getRegisterInfo();
+ }
+
+private:
+ ARMSelectionDAGInfo TSInfo;
+ // Either Thumb1FrameLowering or ARMFrameLowering.
+ std::unique_ptr<ARMFrameLowering> FrameLowering;
+ // Either Thumb1InstrInfo or Thumb2InstrInfo.
+ std::unique_ptr<ARMBaseInstrInfo> InstrInfo;
+ ARMTargetLowering TLInfo;
+
+ void initializeEnvironment();
+ void initSubtargetFeatures(StringRef CPU, StringRef FS);
+ ARMFrameLowering *initializeFrameLowering(StringRef CPU, StringRef FS);
+
+public:
+ void computeIssueWidth();
+
+ bool hasV4TOps() const { return HasV4TOps; }
+ bool hasV5TOps() const { return HasV5TOps; }
+ bool hasV5TEOps() const { return HasV5TEOps; }
+ bool hasV6Ops() const { return HasV6Ops; }
+ bool hasV6MOps() const { return HasV6MOps; }
+ bool hasV6KOps() const { return HasV6KOps; }
+ bool hasV6T2Ops() const { return HasV6T2Ops; }
+ bool hasV7Ops() const { return HasV7Ops; }
+ bool hasV8Ops() const { return HasV8Ops; }
+ bool hasV8_1aOps() const { return HasV8_1aOps; }
+ bool hasV8_2aOps() const { return HasV8_2aOps; }
+
+ bool isCortexA5() const { return ARMProcFamily == CortexA5; }
+ bool isCortexA7() const { return ARMProcFamily == CortexA7; }
+ bool isCortexA8() const { return ARMProcFamily == CortexA8; }
+ bool isCortexA9() const { return ARMProcFamily == CortexA9; }
+ bool isCortexA15() const { return ARMProcFamily == CortexA15; }
+ bool isSwift() const { return ARMProcFamily == Swift; }
+ bool isCortexM3() const { return CPUString == "cortex-m3"; }
+ bool isLikeA9() const { return isCortexA9() || isCortexA15() || isKrait(); }
+ bool isCortexR5() const { return ARMProcFamily == CortexR5; }
+ bool isKrait() const { return ARMProcFamily == Krait; }
+
+ bool hasARMOps() const { return !NoARM; }
+
+ bool hasVFP2() const { return HasVFPv2; }
+ bool hasVFP3() const { return HasVFPv3; }
+ bool hasVFP4() const { return HasVFPv4; }
+ bool hasFPARMv8() const { return HasFPARMv8; }
+ bool hasNEON() const { return HasNEON; }
+ bool hasCrypto() const { return HasCrypto; }
+ bool hasCRC() const { return HasCRC; }
+ bool hasVirtualization() const { return HasVirtualization; }
+ bool useNEONForSinglePrecisionFP() const {
+ return hasNEON() && UseNEONForSinglePrecisionFP;
+ }
+
+ bool hasDivide() const { return HasHardwareDivide; }
+ bool hasDivideInARMMode() const { return HasHardwareDivideInARM; }
+ bool hasT2ExtractPack() const { return HasT2ExtractPack; }
+ bool hasDataBarrier() const { return HasDataBarrier; }
+ bool hasAnyDataBarrier() const {
+ return HasDataBarrier || (hasV6Ops() && !isThumb());
+ }
+ bool useMulOps() const { return UseMulOps; }
+ bool useFPVMLx() const { return !SlowFPVMLx; }
+ bool hasVMLxForwarding() const { return HasVMLxForwarding; }
+ bool isFPBrccSlow() const { return SlowFPBrcc; }
+ bool isFPOnlySP() const { return FPOnlySP; }
+ bool hasPerfMon() const { return HasPerfMon; }
+ bool hasTrustZone() const { return HasTrustZone; }
+ bool hasZeroCycleZeroing() const { return HasZeroCycleZeroing; }
+ bool prefers32BitThumb() const { return Pref32BitThumb; }
+ bool avoidCPSRPartialUpdate() const { return AvoidCPSRPartialUpdate; }
+ bool avoidMOVsShifterOperand() const { return AvoidMOVsShifterOperand; }
+ bool hasRAS() const { return HasRAS; }
+ bool hasMPExtension() const { return HasMPExtension; }
+ bool hasDSP() const { return HasDSP; }
+ bool useNaClTrap() const { return UseNaClTrap; }
+ bool useSjLjEH() const { return UseSjLjEH; }
+ bool genLongCalls() const { return GenLongCalls; }
+
+ bool hasFP16() const { return HasFP16; }
+ bool hasD16() const { return HasD16; }
+ bool hasFullFP16() const { return HasFullFP16; }
+
+ const Triple &getTargetTriple() const { return TargetTriple; }
+
+ bool isTargetDarwin() const { return TargetTriple.isOSDarwin(); }
+ bool isTargetIOS() const { return TargetTriple.isiOS(); }
+ bool isTargetWatchOS() const { return TargetTriple.isWatchOS(); }
+ bool isTargetLinux() const { return TargetTriple.isOSLinux(); }
+ bool isTargetNaCl() const { return TargetTriple.isOSNaCl(); }
+ bool isTargetNetBSD() const { return TargetTriple.isOSNetBSD(); }
+ bool isTargetWindows() const { return TargetTriple.isOSWindows(); }
+
+ bool isTargetCOFF() const { return TargetTriple.isOSBinFormatCOFF(); }
+ bool isTargetELF() const { return TargetTriple.isOSBinFormatELF(); }
+ bool isTargetMachO() const { return TargetTriple.isOSBinFormatMachO(); }
+
+ // ARM EABI is the bare-metal EABI described in ARM ABI documents and
+ // can be accessed via -target arm-none-eabi. This is NOT GNUEABI.
+ // FIXME: Add a flag for bare-metal for that target and set Triple::EABI
+ // even for GNUEABI, so we can make a distinction here and still conform to
+ // the EABI on GNU (and Android) mode. This requires change in Clang, too.
+ // FIXME: The Darwin exception is temporary, while we move users to
+ // "*-*-*-macho" triples as quickly as possible.
+ bool isTargetAEABI() const {
+ return (TargetTriple.getEnvironment() == Triple::EABI ||
+ TargetTriple.getEnvironment() == Triple::EABIHF) &&
+ !isTargetDarwin() && !isTargetWindows();
+ }
+ bool isTargetGNUAEABI() const {
+ return (TargetTriple.getEnvironment() == Triple::GNUEABI ||
+ TargetTriple.getEnvironment() == Triple::GNUEABIHF) &&
+ !isTargetDarwin() && !isTargetWindows();
+ }
+
+ // ARM Targets that support EHABI exception handling standard
+ // Darwin uses SjLj. Other targets might need more checks.
+ bool isTargetEHABICompatible() const {
+ return (TargetTriple.getEnvironment() == Triple::EABI ||
+ TargetTriple.getEnvironment() == Triple::GNUEABI ||
+ TargetTriple.getEnvironment() == Triple::EABIHF ||
+ TargetTriple.getEnvironment() == Triple::GNUEABIHF ||
+ isTargetAndroid()) &&
+ !isTargetDarwin() && !isTargetWindows();
+ }
+
+ bool isTargetHardFloat() const {
+ // FIXME: this is invalid for WindowsCE
+ return TargetTriple.getEnvironment() == Triple::GNUEABIHF ||
+ TargetTriple.getEnvironment() == Triple::EABIHF ||
+ isTargetWindows() || isAAPCS16_ABI();
+ }
+ bool isTargetAndroid() const { return TargetTriple.isAndroid(); }
+
+ bool isAPCS_ABI() const;
+ bool isAAPCS_ABI() const;
+ bool isAAPCS16_ABI() const;
+
+ bool useSoftFloat() const { return UseSoftFloat; }
+ bool isThumb() const { return InThumbMode; }
+ bool isThumb1Only() const { return InThumbMode && !HasThumb2; }
+ bool isThumb2() const { return InThumbMode && HasThumb2; }
+ bool hasThumb2() const { return HasThumb2; }
+ bool isMClass() const { return ARMProcClass == MClass; }
+ bool isRClass() const { return ARMProcClass == RClass; }
+ bool isAClass() const { return ARMProcClass == AClass; }
+
+ bool isR9Reserved() const {
+ return isTargetMachO() ? (ReserveR9 || !HasV6Ops) : ReserveR9;
+ }
+
+ bool useStride4VFPs(const MachineFunction &MF) const;
+
+ bool useMovt(const MachineFunction &MF) const;
+
+ bool supportsTailCall() const { return SupportsTailCall; }
+
+ bool allowsUnalignedMem() const { return !StrictAlign; }
+
+ bool restrictIT() const { return RestrictIT; }
+
+ const std::string & getCPUString() const { return CPUString; }
+
+ bool isLittle() const { return IsLittle; }
+
+ unsigned getMispredictionPenalty() const;
+
+ /// This function returns true if the target has sincos() routine in its
+ /// compiler runtime or math libraries.
+ bool hasSinCos() const;
+
+ /// Returns true if machine scheduler should be enabled.
+ bool enableMachineScheduler() const override;
+
+ /// True for some subtargets at > -O0.
+ bool enablePostRAScheduler() const override;
+
+ // enableAtomicExpand- True if we need to expand our atomics.
+ bool enableAtomicExpand() const override;
+
+ /// getInstrItins - Return the instruction itineraries based on subtarget
+ /// selection.
+ const InstrItineraryData *getInstrItineraryData() const override {
+ return &InstrItins;
+ }
+
+ /// getStackAlignment - Returns the minimum alignment known to hold of the
+ /// stack frame on entry to the function and which must be maintained by every
+ /// function for this subtarget.
+ unsigned getStackAlignment() const { return stackAlignment; }
+
+ /// GVIsIndirectSymbol - true if the GV will be accessed via an indirect
+ /// symbol.
+ bool GVIsIndirectSymbol(const GlobalValue *GV, Reloc::Model RelocM) const;
+
+ /// True if fast-isel is used.
+ bool useFastISel() const;
+};
+} // End llvm namespace
+
+#endif // ARMSUBTARGET_H
diff --git a/contrib/llvm/lib/Target/ARM/ARMTargetMachine.cpp b/contrib/llvm/lib/Target/ARM/ARMTargetMachine.cpp
new file mode 100644
index 0000000..fca1901
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMTargetMachine.cpp
@@ -0,0 +1,431 @@
+//===-- ARMTargetMachine.cpp - Define TargetMachine for ARM ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARM.h"
+#include "ARMFrameLowering.h"
+#include "ARMTargetMachine.h"
+#include "ARMTargetObjectFile.h"
+#include "ARMTargetTransformInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/LegacyPassManager.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Transforms/Scalar.h"
+using namespace llvm;
+
+static cl::opt<bool>
+DisableA15SDOptimization("disable-a15-sd-optimization", cl::Hidden,
+ cl::desc("Inhibit optimization of S->D register accesses on A15"),
+ cl::init(false));
+
+static cl::opt<bool>
+EnableAtomicTidy("arm-atomic-cfg-tidy", cl::Hidden,
+ cl::desc("Run SimplifyCFG after expanding atomic operations"
+ " to make use of cmpxchg flow-based information"),
+ cl::init(true));
+
+static cl::opt<bool>
+EnableARMLoadStoreOpt("arm-load-store-opt", cl::Hidden,
+ cl::desc("Enable ARM load/store optimization pass"),
+ cl::init(true));
+
+// FIXME: Unify control over GlobalMerge.
+static cl::opt<cl::boolOrDefault>
+EnableGlobalMerge("arm-global-merge", cl::Hidden,
+ cl::desc("Enable the global merge pass"));
+
+extern "C" void LLVMInitializeARMTarget() {
+ // Register the target.
+ RegisterTargetMachine<ARMLETargetMachine> X(TheARMLETarget);
+ RegisterTargetMachine<ARMBETargetMachine> Y(TheARMBETarget);
+ RegisterTargetMachine<ThumbLETargetMachine> A(TheThumbLETarget);
+ RegisterTargetMachine<ThumbBETargetMachine> B(TheThumbBETarget);
+}
+
+static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
+ if (TT.isOSBinFormatMachO())
+ return make_unique<TargetLoweringObjectFileMachO>();
+ if (TT.isOSWindows())
+ return make_unique<TargetLoweringObjectFileCOFF>();
+ return make_unique<ARMElfTargetObjectFile>();
+}
+
+static ARMBaseTargetMachine::ARMABI
+computeTargetABI(const Triple &TT, StringRef CPU,
+ const TargetOptions &Options) {
+ if (Options.MCOptions.getABIName() == "aapcs16")
+ return ARMBaseTargetMachine::ARM_ABI_AAPCS16;
+ else if (Options.MCOptions.getABIName().startswith("aapcs"))
+ return ARMBaseTargetMachine::ARM_ABI_AAPCS;
+ else if (Options.MCOptions.getABIName().startswith("apcs"))
+ return ARMBaseTargetMachine::ARM_ABI_APCS;
+
+ assert(Options.MCOptions.getABIName().empty() &&
+ "Unknown target-abi option!");
+
+ ARMBaseTargetMachine::ARMABI TargetABI =
+ ARMBaseTargetMachine::ARM_ABI_UNKNOWN;
+
+ // FIXME: This is duplicated code from the front end and should be unified.
+ if (TT.isOSBinFormatMachO()) {
+ if (TT.getEnvironment() == llvm::Triple::EABI ||
+ (TT.getOS() == llvm::Triple::UnknownOS && TT.isOSBinFormatMachO()) ||
+ CPU.startswith("cortex-m")) {
+ TargetABI = ARMBaseTargetMachine::ARM_ABI_AAPCS;
+ } else if (TT.isWatchOS()) {
+ TargetABI = ARMBaseTargetMachine::ARM_ABI_AAPCS16;
+ } else {
+ TargetABI = ARMBaseTargetMachine::ARM_ABI_APCS;
+ }
+ } else if (TT.isOSWindows()) {
+ // FIXME: this is invalid for WindowsCE
+ TargetABI = ARMBaseTargetMachine::ARM_ABI_AAPCS;
+ } else {
+ // Select the default based on the platform.
+ switch (TT.getEnvironment()) {
+ case llvm::Triple::Android:
+ case llvm::Triple::GNUEABI:
+ case llvm::Triple::GNUEABIHF:
+ case llvm::Triple::EABIHF:
+ case llvm::Triple::EABI:
+ TargetABI = ARMBaseTargetMachine::ARM_ABI_AAPCS;
+ break;
+ case llvm::Triple::GNU:
+ TargetABI = ARMBaseTargetMachine::ARM_ABI_APCS;
+ break;
+ default:
+ if (TT.isOSNetBSD())
+ TargetABI = ARMBaseTargetMachine::ARM_ABI_APCS;
+ else
+ TargetABI = ARMBaseTargetMachine::ARM_ABI_AAPCS;
+ break;
+ }
+ }
+
+ return TargetABI;
+}
+
+static std::string computeDataLayout(const Triple &TT, StringRef CPU,
+ const TargetOptions &Options,
+ bool isLittle) {
+ auto ABI = computeTargetABI(TT, CPU, Options);
+ std::string Ret = "";
+
+ if (isLittle)
+ // Little endian.
+ Ret += "e";
+ else
+ // Big endian.
+ Ret += "E";
+
+ Ret += DataLayout::getManglingComponent(TT);
+
+ // Pointers are 32 bits and aligned to 32 bits.
+ Ret += "-p:32:32";
+
+ // ABIs other than APCS have 64 bit integers with natural alignment.
+ if (ABI != ARMBaseTargetMachine::ARM_ABI_APCS)
+ Ret += "-i64:64";
+
+ // We have 64 bits floats. The APCS ABI requires them to be aligned to 32
+ // bits, others to 64 bits. We always try to align to 64 bits.
+ if (ABI == ARMBaseTargetMachine::ARM_ABI_APCS)
+ Ret += "-f64:32:64";
+
+ // We have 128 and 64 bit vectors. The APCS ABI aligns them to 32 bits, others
+ // to 64. We always ty to give them natural alignment.
+ if (ABI == ARMBaseTargetMachine::ARM_ABI_APCS)
+ Ret += "-v64:32:64-v128:32:128";
+ else if (ABI != ARMBaseTargetMachine::ARM_ABI_AAPCS16)
+ Ret += "-v128:64:128";
+
+ // Try to align aggregates to 32 bits (the default is 64 bits, which has no
+ // particular hardware support on 32-bit ARM).
+ Ret += "-a:0:32";
+
+ // Integer registers are 32 bits.
+ Ret += "-n32";
+
+ // The stack is 128 bit aligned on NaCl, 64 bit aligned on AAPCS and 32 bit
+ // aligned everywhere else.
+ if (TT.isOSNaCl() || ABI == ARMBaseTargetMachine::ARM_ABI_AAPCS16)
+ Ret += "-S128";
+ else if (ABI == ARMBaseTargetMachine::ARM_ABI_AAPCS)
+ Ret += "-S64";
+ else
+ Ret += "-S32";
+
+ return Ret;
+}
+
+/// TargetMachine ctor - Create an ARM architecture model.
+///
+ARMBaseTargetMachine::ARMBaseTargetMachine(const Target &T, const Triple &TT,
+ StringRef CPU, StringRef FS,
+ const TargetOptions &Options,
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL, bool isLittle)
+ : LLVMTargetMachine(T, computeDataLayout(TT, CPU, Options, isLittle), TT,
+ CPU, FS, Options, RM, CM, OL),
+ TargetABI(computeTargetABI(TT, CPU, Options)),
+ TLOF(createTLOF(getTargetTriple())),
+ Subtarget(TT, CPU, FS, *this, isLittle), isLittle(isLittle) {
+
+ // Default to triple-appropriate float ABI
+ if (Options.FloatABIType == FloatABI::Default)
+ this->Options.FloatABIType =
+ Subtarget.isTargetHardFloat() ? FloatABI::Hard : FloatABI::Soft;
+
+ // Default to triple-appropriate EABI
+ if (Options.EABIVersion == EABI::Default ||
+ Options.EABIVersion == EABI::Unknown) {
+ if (Subtarget.isTargetGNUAEABI())
+ this->Options.EABIVersion = EABI::GNU;
+ else
+ this->Options.EABIVersion = EABI::EABI5;
+ }
+}
+
+ARMBaseTargetMachine::~ARMBaseTargetMachine() {}
+
+const ARMSubtarget *
+ARMBaseTargetMachine::getSubtargetImpl(const Function &F) const {
+ Attribute CPUAttr = F.getFnAttribute("target-cpu");
+ Attribute FSAttr = F.getFnAttribute("target-features");
+
+ std::string CPU = !CPUAttr.hasAttribute(Attribute::None)
+ ? CPUAttr.getValueAsString().str()
+ : TargetCPU;
+ std::string FS = !FSAttr.hasAttribute(Attribute::None)
+ ? FSAttr.getValueAsString().str()
+ : TargetFS;
+
+ // FIXME: This is related to the code below to reset the target options,
+ // we need to know whether or not the soft float flag is set on the
+ // function before we can generate a subtarget. We also need to use
+ // it as a key for the subtarget since that can be the only difference
+ // between two functions.
+ bool SoftFloat =
+ F.hasFnAttribute("use-soft-float") &&
+ F.getFnAttribute("use-soft-float").getValueAsString() == "true";
+ // If the soft float attribute is set on the function turn on the soft float
+ // subtarget feature.
+ if (SoftFloat)
+ FS += FS.empty() ? "+soft-float" : ",+soft-float";
+
+ auto &I = SubtargetMap[CPU + FS];
+ if (!I) {
+ // This needs to be done before we create a new subtarget since any
+ // creation will depend on the TM and the code generation flags on the
+ // function that reside in TargetOptions.
+ resetTargetOptions(F);
+ I = llvm::make_unique<ARMSubtarget>(TargetTriple, CPU, FS, *this, isLittle);
+ }
+ return I.get();
+}
+
+TargetIRAnalysis ARMBaseTargetMachine::getTargetIRAnalysis() {
+ return TargetIRAnalysis([this](const Function &F) {
+ return TargetTransformInfo(ARMTTIImpl(this, F));
+ });
+}
+
+void ARMTargetMachine::anchor() {}
+
+ARMTargetMachine::ARMTargetMachine(const Target &T, const Triple &TT,
+ StringRef CPU, StringRef FS,
+ const TargetOptions &Options,
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL, bool isLittle)
+ : ARMBaseTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, isLittle) {
+ initAsmInfo();
+ if (!Subtarget.hasARMOps())
+ report_fatal_error("CPU: '" + Subtarget.getCPUString() + "' does not "
+ "support ARM mode execution!");
+}
+
+void ARMLETargetMachine::anchor() {}
+
+ARMLETargetMachine::ARMLETargetMachine(const Target &T, const Triple &TT,
+ StringRef CPU, StringRef FS,
+ const TargetOptions &Options,
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL)
+ : ARMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {}
+
+void ARMBETargetMachine::anchor() {}
+
+ARMBETargetMachine::ARMBETargetMachine(const Target &T, const Triple &TT,
+ StringRef CPU, StringRef FS,
+ const TargetOptions &Options,
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL)
+ : ARMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {}
+
+void ThumbTargetMachine::anchor() {}
+
+ThumbTargetMachine::ThumbTargetMachine(const Target &T, const Triple &TT,
+ StringRef CPU, StringRef FS,
+ const TargetOptions &Options,
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL, bool isLittle)
+ : ARMBaseTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, isLittle) {
+ initAsmInfo();
+}
+
+void ThumbLETargetMachine::anchor() {}
+
+ThumbLETargetMachine::ThumbLETargetMachine(const Target &T, const Triple &TT,
+ StringRef CPU, StringRef FS,
+ const TargetOptions &Options,
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL)
+ : ThumbTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {}
+
+void ThumbBETargetMachine::anchor() {}
+
+ThumbBETargetMachine::ThumbBETargetMachine(const Target &T, const Triple &TT,
+ StringRef CPU, StringRef FS,
+ const TargetOptions &Options,
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL)
+ : ThumbTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {}
+
+namespace {
+/// ARM Code Generator Pass Configuration Options.
+class ARMPassConfig : public TargetPassConfig {
+public:
+ ARMPassConfig(ARMBaseTargetMachine *TM, PassManagerBase &PM)
+ : TargetPassConfig(TM, PM) {}
+
+ ARMBaseTargetMachine &getARMTargetMachine() const {
+ return getTM<ARMBaseTargetMachine>();
+ }
+
+ void addIRPasses() override;
+ bool addPreISel() override;
+ bool addInstSelector() override;
+ void addPreRegAlloc() override;
+ void addPreSched2() override;
+ void addPreEmitPass() override;
+};
+} // namespace
+
+TargetPassConfig *ARMBaseTargetMachine::createPassConfig(PassManagerBase &PM) {
+ return new ARMPassConfig(this, PM);
+}
+
+void ARMPassConfig::addIRPasses() {
+ if (TM->Options.ThreadModel == ThreadModel::Single)
+ addPass(createLowerAtomicPass());
+ else
+ addPass(createAtomicExpandPass(TM));
+
+ // Cmpxchg instructions are often used with a subsequent comparison to
+ // determine whether it succeeded. We can exploit existing control-flow in
+ // ldrex/strex loops to simplify this, but it needs tidying up.
+ if (TM->getOptLevel() != CodeGenOpt::None && EnableAtomicTidy)
+ addPass(createCFGSimplificationPass(-1, [this](const Function &F) {
+ const auto &ST = this->TM->getSubtarget<ARMSubtarget>(F);
+ return ST.hasAnyDataBarrier() && !ST.isThumb1Only();
+ }));
+
+ TargetPassConfig::addIRPasses();
+
+ // Match interleaved memory accesses to ldN/stN intrinsics.
+ if (TM->getOptLevel() != CodeGenOpt::None)
+ addPass(createInterleavedAccessPass(TM));
+}
+
+bool ARMPassConfig::addPreISel() {
+ if ((TM->getOptLevel() != CodeGenOpt::None &&
+ EnableGlobalMerge == cl::BOU_UNSET) ||
+ EnableGlobalMerge == cl::BOU_TRUE) {
+ // FIXME: This is using the thumb1 only constant value for
+ // maximal global offset for merging globals. We may want
+ // to look into using the old value for non-thumb1 code of
+ // 4095 based on the TargetMachine, but this starts to become
+ // tricky when doing code gen per function.
+ bool OnlyOptimizeForSize = (TM->getOptLevel() < CodeGenOpt::Aggressive) &&
+ (EnableGlobalMerge == cl::BOU_UNSET);
+ // Merging of extern globals is enabled by default on non-Mach-O as we
+ // expect it to be generally either beneficial or harmless. On Mach-O it
+ // is disabled as we emit the .subsections_via_symbols directive which
+ // means that merging extern globals is not safe.
+ bool MergeExternalByDefault = !TM->getTargetTriple().isOSBinFormatMachO();
+ addPass(createGlobalMergePass(TM, 127, OnlyOptimizeForSize,
+ MergeExternalByDefault));
+ }
+
+ return false;
+}
+
+bool ARMPassConfig::addInstSelector() {
+ addPass(createARMISelDag(getARMTargetMachine(), getOptLevel()));
+ return false;
+}
+
+void ARMPassConfig::addPreRegAlloc() {
+ if (getOptLevel() != CodeGenOpt::None) {
+ addPass(createMLxExpansionPass());
+
+ if (EnableARMLoadStoreOpt)
+ addPass(createARMLoadStoreOptimizationPass(/* pre-register alloc */ true));
+
+ if (!DisableA15SDOptimization)
+ addPass(createA15SDOptimizerPass());
+ }
+}
+
+void ARMPassConfig::addPreSched2() {
+ if (getOptLevel() != CodeGenOpt::None) {
+ if (EnableARMLoadStoreOpt)
+ addPass(createARMLoadStoreOptimizationPass());
+
+ addPass(createExecutionDependencyFixPass(&ARM::DPRRegClass));
+ }
+
+ // Expand some pseudo instructions into multiple instructions to allow
+ // proper scheduling.
+ addPass(createARMExpandPseudoPass());
+
+ if (getOptLevel() != CodeGenOpt::None) {
+ // in v8, IfConversion depends on Thumb instruction widths
+ addPass(createThumb2SizeReductionPass([this](const Function &F) {
+ return this->TM->getSubtarget<ARMSubtarget>(F).restrictIT();
+ }));
+
+ addPass(createIfConverter([this](const Function &F) {
+ return !this->TM->getSubtarget<ARMSubtarget>(F).isThumb1Only();
+ }));
+ }
+ addPass(createThumb2ITBlockPass());
+}
+
+void ARMPassConfig::addPreEmitPass() {
+ addPass(createThumb2SizeReductionPass());
+
+ // Constant island pass work on unbundled instructions.
+ addPass(createUnpackMachineBundles([this](const Function &F) {
+ return this->TM->getSubtarget<ARMSubtarget>(F).isThumb2();
+ }));
+
+ // Don't optimize barriers at -O0.
+ if (getOptLevel() != CodeGenOpt::None)
+ addPass(createARMOptimizeBarriersPass());
+
+ addPass(createARMConstantIslandPass());
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMTargetMachine.h b/contrib/llvm/lib/Target/ARM/ARMTargetMachine.h
new file mode 100644
index 0000000..8ad1f3d
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMTargetMachine.h
@@ -0,0 +1,130 @@
+//===-- ARMTargetMachine.h - Define TargetMachine for ARM -------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the ARM specific subclass of TargetMachine.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_ARM_ARMTARGETMACHINE_H
+#define LLVM_LIB_TARGET_ARM_ARMTARGETMACHINE_H
+
+#include "ARMInstrInfo.h"
+#include "ARMSubtarget.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+
+class ARMBaseTargetMachine : public LLVMTargetMachine {
+public:
+ enum ARMABI {
+ ARM_ABI_UNKNOWN,
+ ARM_ABI_APCS,
+ ARM_ABI_AAPCS, // ARM EABI
+ ARM_ABI_AAPCS16
+ } TargetABI;
+
+protected:
+ std::unique_ptr<TargetLoweringObjectFile> TLOF;
+ ARMSubtarget Subtarget;
+ bool isLittle;
+ mutable StringMap<std::unique_ptr<ARMSubtarget>> SubtargetMap;
+
+public:
+ ARMBaseTargetMachine(const Target &T, const Triple &TT, StringRef CPU,
+ StringRef FS, const TargetOptions &Options,
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL, bool isLittle);
+ ~ARMBaseTargetMachine() override;
+
+ const ARMSubtarget *getSubtargetImpl() const { return &Subtarget; }
+ const ARMSubtarget *getSubtargetImpl(const Function &F) const override;
+ bool isLittleEndian() const { return isLittle; }
+
+ /// \brief Get the TargetIRAnalysis for this target.
+ TargetIRAnalysis getTargetIRAnalysis() override;
+
+ // Pass Pipeline Configuration
+ TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
+
+ TargetLoweringObjectFile *getObjFileLowering() const override {
+ return TLOF.get();
+ }
+};
+
+/// ARMTargetMachine - ARM target machine.
+///
+class ARMTargetMachine : public ARMBaseTargetMachine {
+ virtual void anchor();
+ public:
+ ARMTargetMachine(const Target &T, const Triple &TT, StringRef CPU,
+ StringRef FS, const TargetOptions &Options, Reloc::Model RM,
+ CodeModel::Model CM, CodeGenOpt::Level OL, bool isLittle);
+};
+
+/// ARMLETargetMachine - ARM little endian target machine.
+///
+class ARMLETargetMachine : public ARMTargetMachine {
+ void anchor() override;
+public:
+ ARMLETargetMachine(const Target &T, const Triple &TT, StringRef CPU,
+ StringRef FS, const TargetOptions &Options,
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL);
+};
+
+/// ARMBETargetMachine - ARM big endian target machine.
+///
+class ARMBETargetMachine : public ARMTargetMachine {
+ void anchor() override;
+public:
+ ARMBETargetMachine(const Target &T, const Triple &TT, StringRef CPU,
+ StringRef FS, const TargetOptions &Options,
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL);
+};
+
+/// ThumbTargetMachine - Thumb target machine.
+/// Due to the way architectures are handled, this represents both
+/// Thumb-1 and Thumb-2.
+///
+class ThumbTargetMachine : public ARMBaseTargetMachine {
+ virtual void anchor();
+public:
+ ThumbTargetMachine(const Target &T, const Triple &TT, StringRef CPU,
+ StringRef FS, const TargetOptions &Options,
+ Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL,
+ bool isLittle);
+};
+
+/// ThumbLETargetMachine - Thumb little endian target machine.
+///
+class ThumbLETargetMachine : public ThumbTargetMachine {
+ void anchor() override;
+public:
+ ThumbLETargetMachine(const Target &T, const Triple &TT, StringRef CPU,
+ StringRef FS, const TargetOptions &Options,
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL);
+};
+
+/// ThumbBETargetMachine - Thumb big endian target machine.
+///
+class ThumbBETargetMachine : public ThumbTargetMachine {
+ void anchor() override;
+public:
+ ThumbBETargetMachine(const Target &T, const Triple &TT, StringRef CPU,
+ StringRef FS, const TargetOptions &Options,
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL);
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/ARM/ARMTargetObjectFile.cpp b/contrib/llvm/lib/Target/ARM/ARMTargetObjectFile.cpp
new file mode 100644
index 0000000..eaed5cc
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMTargetObjectFile.cpp
@@ -0,0 +1,61 @@
+//===-- llvm/Target/ARMTargetObjectFile.cpp - ARM Object Info Impl --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARMTargetObjectFile.h"
+#include "ARMTargetMachine.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/IR/Mangler.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/ELF.h"
+#include "llvm/Target/TargetLowering.h"
+using namespace llvm;
+using namespace dwarf;
+
+//===----------------------------------------------------------------------===//
+// ELF Target
+//===----------------------------------------------------------------------===//
+
+void ARMElfTargetObjectFile::Initialize(MCContext &Ctx,
+ const TargetMachine &TM) {
+ bool isAAPCS_ABI = static_cast<const ARMTargetMachine &>(TM).TargetABI ==
+ ARMTargetMachine::ARMABI::ARM_ABI_AAPCS;
+ TargetLoweringObjectFileELF::Initialize(Ctx, TM);
+ InitializeELF(isAAPCS_ABI);
+
+ if (isAAPCS_ABI) {
+ LSDASection = nullptr;
+ }
+
+ AttributesSection =
+ getContext().getELFSection(".ARM.attributes", ELF::SHT_ARM_ATTRIBUTES, 0);
+}
+
+const MCExpr *ARMElfTargetObjectFile::getTTypeGlobalReference(
+ const GlobalValue *GV, unsigned Encoding, Mangler &Mang,
+ const TargetMachine &TM, MachineModuleInfo *MMI,
+ MCStreamer &Streamer) const {
+ if (TM.getMCAsmInfo()->getExceptionHandlingType() != ExceptionHandling::ARM)
+ return TargetLoweringObjectFileELF::getTTypeGlobalReference(
+ GV, Encoding, Mang, TM, MMI, Streamer);
+
+ assert(Encoding == DW_EH_PE_absptr && "Can handle absptr encoding only");
+
+ return MCSymbolRefExpr::create(TM.getSymbol(GV, Mang),
+ MCSymbolRefExpr::VK_ARM_TARGET2, getContext());
+}
+
+const MCExpr *ARMElfTargetObjectFile::
+getDebugThreadLocalSymbol(const MCSymbol *Sym) const {
+ return MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_ARM_TLSLDO,
+ getContext());
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMTargetObjectFile.h b/contrib/llvm/lib/Target/ARM/ARMTargetObjectFile.h
new file mode 100644
index 0000000..98e8763
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMTargetObjectFile.h
@@ -0,0 +1,43 @@
+//===-- llvm/Target/ARMTargetObjectFile.h - ARM Object Info -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_ARM_ARMTARGETOBJECTFILE_H
+#define LLVM_LIB_TARGET_ARM_ARMTARGETOBJECTFILE_H
+
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+
+namespace llvm {
+
+class MCContext;
+class TargetMachine;
+
+class ARMElfTargetObjectFile : public TargetLoweringObjectFileELF {
+protected:
+ const MCSection *AttributesSection;
+public:
+ ARMElfTargetObjectFile() :
+ TargetLoweringObjectFileELF(),
+ AttributesSection(nullptr)
+ {}
+
+ void Initialize(MCContext &Ctx, const TargetMachine &TM) override;
+
+ const MCExpr *
+ getTTypeGlobalReference(const GlobalValue *GV, unsigned Encoding,
+ Mangler &Mang, const TargetMachine &TM,
+ MachineModuleInfo *MMI,
+ MCStreamer &Streamer) const override;
+
+ /// \brief Describe a TLS variable address within debug info.
+ const MCExpr *getDebugThreadLocalSymbol(const MCSymbol *Sym) const override;
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
new file mode 100644
index 0000000..c152011
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
@@ -0,0 +1,495 @@
+//===-- ARMTargetTransformInfo.cpp - ARM specific TTI ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARMTargetTransformInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Target/CostTable.h"
+#include "llvm/Target/TargetLowering.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "armtti"
+
+int ARMTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) {
+ assert(Ty->isIntegerTy());
+
+ unsigned Bits = Ty->getPrimitiveSizeInBits();
+ if (Bits == 0 || Bits > 32)
+ return 4;
+
+ int32_t SImmVal = Imm.getSExtValue();
+ uint32_t ZImmVal = Imm.getZExtValue();
+ if (!ST->isThumb()) {
+ if ((SImmVal >= 0 && SImmVal < 65536) ||
+ (ARM_AM::getSOImmVal(ZImmVal) != -1) ||
+ (ARM_AM::getSOImmVal(~ZImmVal) != -1))
+ return 1;
+ return ST->hasV6T2Ops() ? 2 : 3;
+ }
+ if (ST->isThumb2()) {
+ if ((SImmVal >= 0 && SImmVal < 65536) ||
+ (ARM_AM::getT2SOImmVal(ZImmVal) != -1) ||
+ (ARM_AM::getT2SOImmVal(~ZImmVal) != -1))
+ return 1;
+ return ST->hasV6T2Ops() ? 2 : 3;
+ }
+ // Thumb1.
+ if (SImmVal >= 0 && SImmVal < 256)
+ return 1;
+ if ((~ZImmVal < 256) || ARM_AM::isThumbImmShiftedVal(ZImmVal))
+ return 2;
+ // Load from constantpool.
+ return 3;
+}
+
+int ARMTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) {
+ int ISD = TLI->InstructionOpcodeToISD(Opcode);
+ assert(ISD && "Invalid opcode");
+
+ // Single to/from double precision conversions.
+ static const CostTblEntry NEONFltDblTbl[] = {
+ // Vector fptrunc/fpext conversions.
+ { ISD::FP_ROUND, MVT::v2f64, 2 },
+ { ISD::FP_EXTEND, MVT::v2f32, 2 },
+ { ISD::FP_EXTEND, MVT::v4f32, 4 }
+ };
+
+ if (Src->isVectorTy() && ST->hasNEON() && (ISD == ISD::FP_ROUND ||
+ ISD == ISD::FP_EXTEND)) {
+ std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Src);
+ if (const auto *Entry = CostTableLookup(NEONFltDblTbl, ISD, LT.second))
+ return LT.first * Entry->Cost;
+ }
+
+ EVT SrcTy = TLI->getValueType(DL, Src);
+ EVT DstTy = TLI->getValueType(DL, Dst);
+
+ if (!SrcTy.isSimple() || !DstTy.isSimple())
+ return BaseT::getCastInstrCost(Opcode, Dst, Src);
+
+ // Some arithmetic, load and store operations have specific instructions
+ // to cast up/down their types automatically at no extra cost.
+ // TODO: Get these tables to know at least what the related operations are.
+ static const TypeConversionCostTblEntry NEONVectorConversionTbl[] = {
+ { ISD::SIGN_EXTEND, MVT::v4i32, MVT::v4i16, 0 },
+ { ISD::ZERO_EXTEND, MVT::v4i32, MVT::v4i16, 0 },
+ { ISD::SIGN_EXTEND, MVT::v2i64, MVT::v2i32, 1 },
+ { ISD::ZERO_EXTEND, MVT::v2i64, MVT::v2i32, 1 },
+ { ISD::TRUNCATE, MVT::v4i32, MVT::v4i64, 0 },
+ { ISD::TRUNCATE, MVT::v4i16, MVT::v4i32, 1 },
+
+ // The number of vmovl instructions for the extension.
+ { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 3 },
+ { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i16, 3 },
+ { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i8, 3 },
+ { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i8, 3 },
+ { ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i8, 7 },
+ { ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i8, 7 },
+ { ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i16, 6 },
+ { ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i16, 6 },
+ { ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i8, 6 },
+ { ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i8, 6 },
+
+ // Operations that we legalize using splitting.
+ { ISD::TRUNCATE, MVT::v16i8, MVT::v16i32, 6 },
+ { ISD::TRUNCATE, MVT::v8i8, MVT::v8i32, 3 },
+
+ // Vector float <-> i32 conversions.
+ { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 },
+ { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 },
+
+ { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i8, 3 },
+ { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i8, 3 },
+ { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i16, 2 },
+ { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i16, 2 },
+ { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i32, 1 },
+ { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i32, 1 },
+ { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i1, 3 },
+ { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i1, 3 },
+ { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i8, 3 },
+ { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i8, 3 },
+ { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i16, 2 },
+ { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i16, 2 },
+ { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i16, 4 },
+ { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i16, 4 },
+ { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i32, 2 },
+ { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i32, 2 },
+ { ISD::SINT_TO_FP, MVT::v16f32, MVT::v16i16, 8 },
+ { ISD::UINT_TO_FP, MVT::v16f32, MVT::v16i16, 8 },
+ { ISD::SINT_TO_FP, MVT::v16f32, MVT::v16i32, 4 },
+ { ISD::UINT_TO_FP, MVT::v16f32, MVT::v16i32, 4 },
+
+ { ISD::FP_TO_SINT, MVT::v4i32, MVT::v4f32, 1 },
+ { ISD::FP_TO_UINT, MVT::v4i32, MVT::v4f32, 1 },
+ { ISD::FP_TO_SINT, MVT::v4i8, MVT::v4f32, 3 },
+ { ISD::FP_TO_UINT, MVT::v4i8, MVT::v4f32, 3 },
+ { ISD::FP_TO_SINT, MVT::v4i16, MVT::v4f32, 2 },
+ { ISD::FP_TO_UINT, MVT::v4i16, MVT::v4f32, 2 },
+
+ // Vector double <-> i32 conversions.
+ { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 },
+ { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 },
+
+ { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i8, 4 },
+ { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i8, 4 },
+ { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i16, 3 },
+ { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i16, 3 },
+ { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 },
+ { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 },
+
+ { ISD::FP_TO_SINT, MVT::v2i32, MVT::v2f64, 2 },
+ { ISD::FP_TO_UINT, MVT::v2i32, MVT::v2f64, 2 },
+ { ISD::FP_TO_SINT, MVT::v8i16, MVT::v8f32, 4 },
+ { ISD::FP_TO_UINT, MVT::v8i16, MVT::v8f32, 4 },
+ { ISD::FP_TO_SINT, MVT::v16i16, MVT::v16f32, 8 },
+ { ISD::FP_TO_UINT, MVT::v16i16, MVT::v16f32, 8 }
+ };
+
+ if (SrcTy.isVector() && ST->hasNEON()) {
+ if (const auto *Entry = ConvertCostTableLookup(NEONVectorConversionTbl, ISD,
+ DstTy.getSimpleVT(),
+ SrcTy.getSimpleVT()))
+ return Entry->Cost;
+ }
+
+ // Scalar float to integer conversions.
+ static const TypeConversionCostTblEntry NEONFloatConversionTbl[] = {
+ { ISD::FP_TO_SINT, MVT::i1, MVT::f32, 2 },
+ { ISD::FP_TO_UINT, MVT::i1, MVT::f32, 2 },
+ { ISD::FP_TO_SINT, MVT::i1, MVT::f64, 2 },
+ { ISD::FP_TO_UINT, MVT::i1, MVT::f64, 2 },
+ { ISD::FP_TO_SINT, MVT::i8, MVT::f32, 2 },
+ { ISD::FP_TO_UINT, MVT::i8, MVT::f32, 2 },
+ { ISD::FP_TO_SINT, MVT::i8, MVT::f64, 2 },
+ { ISD::FP_TO_UINT, MVT::i8, MVT::f64, 2 },
+ { ISD::FP_TO_SINT, MVT::i16, MVT::f32, 2 },
+ { ISD::FP_TO_UINT, MVT::i16, MVT::f32, 2 },
+ { ISD::FP_TO_SINT, MVT::i16, MVT::f64, 2 },
+ { ISD::FP_TO_UINT, MVT::i16, MVT::f64, 2 },
+ { ISD::FP_TO_SINT, MVT::i32, MVT::f32, 2 },
+ { ISD::FP_TO_UINT, MVT::i32, MVT::f32, 2 },
+ { ISD::FP_TO_SINT, MVT::i32, MVT::f64, 2 },
+ { ISD::FP_TO_UINT, MVT::i32, MVT::f64, 2 },
+ { ISD::FP_TO_SINT, MVT::i64, MVT::f32, 10 },
+ { ISD::FP_TO_UINT, MVT::i64, MVT::f32, 10 },
+ { ISD::FP_TO_SINT, MVT::i64, MVT::f64, 10 },
+ { ISD::FP_TO_UINT, MVT::i64, MVT::f64, 10 }
+ };
+ if (SrcTy.isFloatingPoint() && ST->hasNEON()) {
+ if (const auto *Entry = ConvertCostTableLookup(NEONFloatConversionTbl, ISD,
+ DstTy.getSimpleVT(),
+ SrcTy.getSimpleVT()))
+ return Entry->Cost;
+ }
+
+ // Scalar integer to float conversions.
+ static const TypeConversionCostTblEntry NEONIntegerConversionTbl[] = {
+ { ISD::SINT_TO_FP, MVT::f32, MVT::i1, 2 },
+ { ISD::UINT_TO_FP, MVT::f32, MVT::i1, 2 },
+ { ISD::SINT_TO_FP, MVT::f64, MVT::i1, 2 },
+ { ISD::UINT_TO_FP, MVT::f64, MVT::i1, 2 },
+ { ISD::SINT_TO_FP, MVT::f32, MVT::i8, 2 },
+ { ISD::UINT_TO_FP, MVT::f32, MVT::i8, 2 },
+ { ISD::SINT_TO_FP, MVT::f64, MVT::i8, 2 },
+ { ISD::UINT_TO_FP, MVT::f64, MVT::i8, 2 },
+ { ISD::SINT_TO_FP, MVT::f32, MVT::i16, 2 },
+ { ISD::UINT_TO_FP, MVT::f32, MVT::i16, 2 },
+ { ISD::SINT_TO_FP, MVT::f64, MVT::i16, 2 },
+ { ISD::UINT_TO_FP, MVT::f64, MVT::i16, 2 },
+ { ISD::SINT_TO_FP, MVT::f32, MVT::i32, 2 },
+ { ISD::UINT_TO_FP, MVT::f32, MVT::i32, 2 },
+ { ISD::SINT_TO_FP, MVT::f64, MVT::i32, 2 },
+ { ISD::UINT_TO_FP, MVT::f64, MVT::i32, 2 },
+ { ISD::SINT_TO_FP, MVT::f32, MVT::i64, 10 },
+ { ISD::UINT_TO_FP, MVT::f32, MVT::i64, 10 },
+ { ISD::SINT_TO_FP, MVT::f64, MVT::i64, 10 },
+ { ISD::UINT_TO_FP, MVT::f64, MVT::i64, 10 }
+ };
+
+ if (SrcTy.isInteger() && ST->hasNEON()) {
+ if (const auto *Entry = ConvertCostTableLookup(NEONIntegerConversionTbl,
+ ISD, DstTy.getSimpleVT(),
+ SrcTy.getSimpleVT()))
+ return Entry->Cost;
+ }
+
+ // Scalar integer conversion costs.
+ static const TypeConversionCostTblEntry ARMIntegerConversionTbl[] = {
+ // i16 -> i64 requires two dependent operations.
+ { ISD::SIGN_EXTEND, MVT::i64, MVT::i16, 2 },
+
+ // Truncates on i64 are assumed to be free.
+ { ISD::TRUNCATE, MVT::i32, MVT::i64, 0 },
+ { ISD::TRUNCATE, MVT::i16, MVT::i64, 0 },
+ { ISD::TRUNCATE, MVT::i8, MVT::i64, 0 },
+ { ISD::TRUNCATE, MVT::i1, MVT::i64, 0 }
+ };
+
+ if (SrcTy.isInteger()) {
+ if (const auto *Entry = ConvertCostTableLookup(ARMIntegerConversionTbl, ISD,
+ DstTy.getSimpleVT(),
+ SrcTy.getSimpleVT()))
+ return Entry->Cost;
+ }
+
+ return BaseT::getCastInstrCost(Opcode, Dst, Src);
+}
+
+int ARMTTIImpl::getVectorInstrCost(unsigned Opcode, Type *ValTy,
+ unsigned Index) {
+ // Penalize inserting into an D-subregister. We end up with a three times
+ // lower estimated throughput on swift.
+ if (ST->isSwift() &&
+ Opcode == Instruction::InsertElement &&
+ ValTy->isVectorTy() &&
+ ValTy->getScalarSizeInBits() <= 32)
+ return 3;
+
+ if ((Opcode == Instruction::InsertElement ||
+ Opcode == Instruction::ExtractElement)) {
+ // Cross-class copies are expensive on many microarchitectures,
+ // so assume they are expensive by default.
+ if (ValTy->getVectorElementType()->isIntegerTy())
+ return 3;
+
+ // Even if it's not a cross class copy, this likely leads to mixing
+ // of NEON and VFP code and should be therefore penalized.
+ if (ValTy->isVectorTy() &&
+ ValTy->getScalarSizeInBits() <= 32)
+ return std::max(BaseT::getVectorInstrCost(Opcode, ValTy, Index), 2U);
+ }
+
+ return BaseT::getVectorInstrCost(Opcode, ValTy, Index);
+}
+
+int ARMTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy) {
+
+ int ISD = TLI->InstructionOpcodeToISD(Opcode);
+ // On NEON a a vector select gets lowered to vbsl.
+ if (ST->hasNEON() && ValTy->isVectorTy() && ISD == ISD::SELECT) {
+ // Lowering of some vector selects is currently far from perfect.
+ static const TypeConversionCostTblEntry NEONVectorSelectTbl[] = {
+ { ISD::SELECT, MVT::v4i1, MVT::v4i64, 4*4 + 1*2 + 1 },
+ { ISD::SELECT, MVT::v8i1, MVT::v8i64, 50 },
+ { ISD::SELECT, MVT::v16i1, MVT::v16i64, 100 }
+ };
+
+ EVT SelCondTy = TLI->getValueType(DL, CondTy);
+ EVT SelValTy = TLI->getValueType(DL, ValTy);
+ if (SelCondTy.isSimple() && SelValTy.isSimple()) {
+ if (const auto *Entry = ConvertCostTableLookup(NEONVectorSelectTbl, ISD,
+ SelCondTy.getSimpleVT(),
+ SelValTy.getSimpleVT()))
+ return Entry->Cost;
+ }
+
+ std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
+ return LT.first;
+ }
+
+ return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy);
+}
+
+int ARMTTIImpl::getAddressComputationCost(Type *Ty, bool IsComplex) {
+ // Address computations in vectorized code with non-consecutive addresses will
+ // likely result in more instructions compared to scalar code where the
+ // computation can more often be merged into the index mode. The resulting
+ // extra micro-ops can significantly decrease throughput.
+ unsigned NumVectorInstToHideOverhead = 10;
+
+ if (Ty->isVectorTy() && IsComplex)
+ return NumVectorInstToHideOverhead;
+
+ // In many cases the address computation is not merged into the instruction
+ // addressing mode.
+ return 1;
+}
+
+int ARMTTIImpl::getFPOpCost(Type *Ty) {
+ // Use similar logic that's in ARMISelLowering:
+ // Any ARM CPU with VFP2 has floating point, but Thumb1 didn't have access
+ // to VFP.
+
+ if (ST->hasVFP2() && !ST->isThumb1Only()) {
+ if (Ty->isFloatTy()) {
+ return TargetTransformInfo::TCC_Basic;
+ }
+
+ if (Ty->isDoubleTy()) {
+ return ST->isFPOnlySP() ? TargetTransformInfo::TCC_Expensive :
+ TargetTransformInfo::TCC_Basic;
+ }
+ }
+
+ return TargetTransformInfo::TCC_Expensive;
+}
+
+int ARMTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
+ Type *SubTp) {
+ // We only handle costs of reverse and alternate shuffles for now.
+ if (Kind != TTI::SK_Reverse && Kind != TTI::SK_Alternate)
+ return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
+
+ if (Kind == TTI::SK_Reverse) {
+ static const CostTblEntry NEONShuffleTbl[] = {
+ // Reverse shuffle cost one instruction if we are shuffling within a
+ // double word (vrev) or two if we shuffle a quad word (vrev, vext).
+ {ISD::VECTOR_SHUFFLE, MVT::v2i32, 1},
+ {ISD::VECTOR_SHUFFLE, MVT::v2f32, 1},
+ {ISD::VECTOR_SHUFFLE, MVT::v2i64, 1},
+ {ISD::VECTOR_SHUFFLE, MVT::v2f64, 1},
+
+ {ISD::VECTOR_SHUFFLE, MVT::v4i32, 2},
+ {ISD::VECTOR_SHUFFLE, MVT::v4f32, 2},
+ {ISD::VECTOR_SHUFFLE, MVT::v8i16, 2},
+ {ISD::VECTOR_SHUFFLE, MVT::v16i8, 2}};
+
+ std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
+
+ if (const auto *Entry = CostTableLookup(NEONShuffleTbl, ISD::VECTOR_SHUFFLE,
+ LT.second))
+ return LT.first * Entry->Cost;
+
+ return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
+ }
+ if (Kind == TTI::SK_Alternate) {
+ static const CostTblEntry NEONAltShuffleTbl[] = {
+ // Alt shuffle cost table for ARM. Cost is the number of instructions
+ // required to create the shuffled vector.
+
+ {ISD::VECTOR_SHUFFLE, MVT::v2f32, 1},
+ {ISD::VECTOR_SHUFFLE, MVT::v2i64, 1},
+ {ISD::VECTOR_SHUFFLE, MVT::v2f64, 1},
+ {ISD::VECTOR_SHUFFLE, MVT::v2i32, 1},
+
+ {ISD::VECTOR_SHUFFLE, MVT::v4i32, 2},
+ {ISD::VECTOR_SHUFFLE, MVT::v4f32, 2},
+ {ISD::VECTOR_SHUFFLE, MVT::v4i16, 2},
+
+ {ISD::VECTOR_SHUFFLE, MVT::v8i16, 16},
+
+ {ISD::VECTOR_SHUFFLE, MVT::v16i8, 32}};
+
+ std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
+ if (const auto *Entry = CostTableLookup(NEONAltShuffleTbl,
+ ISD::VECTOR_SHUFFLE, LT.second))
+ return LT.first * Entry->Cost;
+ return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
+ }
+ return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
+}
+
+int ARMTTIImpl::getArithmeticInstrCost(
+ unsigned Opcode, Type *Ty, TTI::OperandValueKind Op1Info,
+ TTI::OperandValueKind Op2Info, TTI::OperandValueProperties Opd1PropInfo,
+ TTI::OperandValueProperties Opd2PropInfo) {
+
+ int ISDOpcode = TLI->InstructionOpcodeToISD(Opcode);
+ std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
+
+ const unsigned FunctionCallDivCost = 20;
+ const unsigned ReciprocalDivCost = 10;
+ static const CostTblEntry CostTbl[] = {
+ // Division.
+ // These costs are somewhat random. Choose a cost of 20 to indicate that
+ // vectorizing devision (added function call) is going to be very expensive.
+ // Double registers types.
+ { ISD::SDIV, MVT::v1i64, 1 * FunctionCallDivCost},
+ { ISD::UDIV, MVT::v1i64, 1 * FunctionCallDivCost},
+ { ISD::SREM, MVT::v1i64, 1 * FunctionCallDivCost},
+ { ISD::UREM, MVT::v1i64, 1 * FunctionCallDivCost},
+ { ISD::SDIV, MVT::v2i32, 2 * FunctionCallDivCost},
+ { ISD::UDIV, MVT::v2i32, 2 * FunctionCallDivCost},
+ { ISD::SREM, MVT::v2i32, 2 * FunctionCallDivCost},
+ { ISD::UREM, MVT::v2i32, 2 * FunctionCallDivCost},
+ { ISD::SDIV, MVT::v4i16, ReciprocalDivCost},
+ { ISD::UDIV, MVT::v4i16, ReciprocalDivCost},
+ { ISD::SREM, MVT::v4i16, 4 * FunctionCallDivCost},
+ { ISD::UREM, MVT::v4i16, 4 * FunctionCallDivCost},
+ { ISD::SDIV, MVT::v8i8, ReciprocalDivCost},
+ { ISD::UDIV, MVT::v8i8, ReciprocalDivCost},
+ { ISD::SREM, MVT::v8i8, 8 * FunctionCallDivCost},
+ { ISD::UREM, MVT::v8i8, 8 * FunctionCallDivCost},
+ // Quad register types.
+ { ISD::SDIV, MVT::v2i64, 2 * FunctionCallDivCost},
+ { ISD::UDIV, MVT::v2i64, 2 * FunctionCallDivCost},
+ { ISD::SREM, MVT::v2i64, 2 * FunctionCallDivCost},
+ { ISD::UREM, MVT::v2i64, 2 * FunctionCallDivCost},
+ { ISD::SDIV, MVT::v4i32, 4 * FunctionCallDivCost},
+ { ISD::UDIV, MVT::v4i32, 4 * FunctionCallDivCost},
+ { ISD::SREM, MVT::v4i32, 4 * FunctionCallDivCost},
+ { ISD::UREM, MVT::v4i32, 4 * FunctionCallDivCost},
+ { ISD::SDIV, MVT::v8i16, 8 * FunctionCallDivCost},
+ { ISD::UDIV, MVT::v8i16, 8 * FunctionCallDivCost},
+ { ISD::SREM, MVT::v8i16, 8 * FunctionCallDivCost},
+ { ISD::UREM, MVT::v8i16, 8 * FunctionCallDivCost},
+ { ISD::SDIV, MVT::v16i8, 16 * FunctionCallDivCost},
+ { ISD::UDIV, MVT::v16i8, 16 * FunctionCallDivCost},
+ { ISD::SREM, MVT::v16i8, 16 * FunctionCallDivCost},
+ { ISD::UREM, MVT::v16i8, 16 * FunctionCallDivCost},
+ // Multiplication.
+ };
+
+ if (ST->hasNEON())
+ if (const auto *Entry = CostTableLookup(CostTbl, ISDOpcode, LT.second))
+ return LT.first * Entry->Cost;
+
+ int Cost = BaseT::getArithmeticInstrCost(Opcode, Ty, Op1Info, Op2Info,
+ Opd1PropInfo, Opd2PropInfo);
+
+ // This is somewhat of a hack. The problem that we are facing is that SROA
+ // creates a sequence of shift, and, or instructions to construct values.
+ // These sequences are recognized by the ISel and have zero-cost. Not so for
+ // the vectorized code. Because we have support for v2i64 but not i64 those
+ // sequences look particularly beneficial to vectorize.
+ // To work around this we increase the cost of v2i64 operations to make them
+ // seem less beneficial.
+ if (LT.second == MVT::v2i64 &&
+ Op2Info == TargetTransformInfo::OK_UniformConstantValue)
+ Cost += 4;
+
+ return Cost;
+}
+
+int ARMTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
+ unsigned AddressSpace) {
+ std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Src);
+
+ if (Src->isVectorTy() && Alignment != 16 &&
+ Src->getVectorElementType()->isDoubleTy()) {
+ // Unaligned loads/stores are extremely inefficient.
+ // We need 4 uops for vst.1/vld.1 vs 1uop for vldr/vstr.
+ return LT.first * 4;
+ }
+ return LT.first;
+}
+
+int ARMTTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
+ unsigned Factor,
+ ArrayRef<unsigned> Indices,
+ unsigned Alignment,
+ unsigned AddressSpace) {
+ assert(Factor >= 2 && "Invalid interleave factor");
+ assert(isa<VectorType>(VecTy) && "Expect a vector type");
+
+ // vldN/vstN doesn't support vector types of i64/f64 element.
+ bool EltIs64Bits = DL.getTypeSizeInBits(VecTy->getScalarType()) == 64;
+
+ if (Factor <= TLI->getMaxSupportedInterleaveFactor() && !EltIs64Bits) {
+ unsigned NumElts = VecTy->getVectorNumElements();
+ Type *SubVecTy = VectorType::get(VecTy->getScalarType(), NumElts / Factor);
+ unsigned SubVecSize = DL.getTypeSizeInBits(SubVecTy);
+
+ // vldN/vstN only support legal vector types of size 64 or 128 in bits.
+ if (NumElts % Factor == 0 && (SubVecSize == 64 || SubVecSize == 128))
+ return Factor;
+ }
+
+ return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
+ Alignment, AddressSpace);
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.h b/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
new file mode 100644
index 0000000..7d8d238
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
@@ -0,0 +1,127 @@
+//===-- ARMTargetTransformInfo.h - ARM specific TTI -------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file a TargetTransformInfo::Concept conforming object specific to the
+/// ARM target machine. It uses the target's detailed information to
+/// provide more precise answers to certain TTI queries, while letting the
+/// target independent and default TTI implementations handle the rest.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_ARM_ARMTARGETTRANSFORMINFO_H
+#define LLVM_LIB_TARGET_ARM_ARMTARGETTRANSFORMINFO_H
+
+#include "ARM.h"
+#include "ARMTargetMachine.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/CodeGen/BasicTTIImpl.h"
+#include "llvm/Target/TargetLowering.h"
+
+namespace llvm {
+
+class ARMTTIImpl : public BasicTTIImplBase<ARMTTIImpl> {
+ typedef BasicTTIImplBase<ARMTTIImpl> BaseT;
+ typedef TargetTransformInfo TTI;
+ friend BaseT;
+
+ const ARMSubtarget *ST;
+ const ARMTargetLowering *TLI;
+
+ /// Estimate the overhead of scalarizing an instruction. Insert and Extract
+ /// are set if the result needs to be inserted and/or extracted from vectors.
+ unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract);
+
+ const ARMSubtarget *getST() const { return ST; }
+ const ARMTargetLowering *getTLI() const { return TLI; }
+
+public:
+ explicit ARMTTIImpl(const ARMBaseTargetMachine *TM, const Function &F)
+ : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)),
+ TLI(ST->getTargetLowering()) {}
+
+ // Provide value semantics. MSVC requires that we spell all of these out.
+ ARMTTIImpl(const ARMTTIImpl &Arg)
+ : BaseT(static_cast<const BaseT &>(Arg)), ST(Arg.ST), TLI(Arg.TLI) {}
+ ARMTTIImpl(ARMTTIImpl &&Arg)
+ : BaseT(std::move(static_cast<BaseT &>(Arg))), ST(std::move(Arg.ST)),
+ TLI(std::move(Arg.TLI)) {}
+
+ bool enableInterleavedAccessVectorization() { return true; }
+
+ /// \name Scalar TTI Implementations
+ /// @{
+
+ using BaseT::getIntImmCost;
+ int getIntImmCost(const APInt &Imm, Type *Ty);
+
+ /// @}
+
+ /// \name Vector TTI Implementations
+ /// @{
+
+ unsigned getNumberOfRegisters(bool Vector) {
+ if (Vector) {
+ if (ST->hasNEON())
+ return 16;
+ return 0;
+ }
+
+ if (ST->isThumb1Only())
+ return 8;
+ return 13;
+ }
+
+ unsigned getRegisterBitWidth(bool Vector) {
+ if (Vector) {
+ if (ST->hasNEON())
+ return 128;
+ return 0;
+ }
+
+ return 32;
+ }
+
+ unsigned getMaxInterleaveFactor(unsigned VF) {
+ // These are out of order CPUs:
+ if (ST->isCortexA15() || ST->isSwift())
+ return 2;
+ return 1;
+ }
+
+ int getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp);
+
+ int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src);
+
+ int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy);
+
+ int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index);
+
+ int getAddressComputationCost(Type *Val, bool IsComplex);
+
+ int getFPOpCost(Type *Ty);
+
+ int getArithmeticInstrCost(
+ unsigned Opcode, Type *Ty,
+ TTI::OperandValueKind Op1Info = TTI::OK_AnyValue,
+ TTI::OperandValueKind Op2Info = TTI::OK_AnyValue,
+ TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
+ TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None);
+
+ int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
+ unsigned AddressSpace);
+
+ int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor,
+ ArrayRef<unsigned> Indices, unsigned Alignment,
+ unsigned AddressSpace);
+ /// @}
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
new file mode 100644
index 0000000..c69a741
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
@@ -0,0 +1,10034 @@
+//===-- ARMAsmParser.cpp - Parse ARM assembly to MCInst instructions ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARMFeatures.h"
+#include "MCTargetDesc/ARMAddressingModes.h"
+#include "MCTargetDesc/ARMBaseInfo.h"
+#include "MCTargetDesc/ARMMCExpr.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCDisassembler.h"
+#include "llvm/MC/MCELFStreamer.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCObjectFileInfo.h"
+#include "llvm/MC/MCParser/MCAsmLexer.h"
+#include "llvm/MC/MCParser/MCAsmParser.h"
+#include "llvm/MC/MCParser/MCAsmParserUtils.h"
+#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSection.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCTargetAsmParser.h"
+#include "llvm/Support/ARMBuildAttributes.h"
+#include "llvm/Support/ARMEHABI.h"
+#include "llvm/Support/TargetParser.h"
+#include "llvm/Support/COFF.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ELF.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+namespace {
+
+class ARMOperand;
+
+enum VectorLaneTy { NoLanes, AllLanes, IndexedLane };
+
+class UnwindContext {
+ MCAsmParser &Parser;
+
+ typedef SmallVector<SMLoc, 4> Locs;
+
+ Locs FnStartLocs;
+ Locs CantUnwindLocs;
+ Locs PersonalityLocs;
+ Locs PersonalityIndexLocs;
+ Locs HandlerDataLocs;
+ int FPReg;
+
+public:
+ UnwindContext(MCAsmParser &P) : Parser(P), FPReg(ARM::SP) {}
+
+ bool hasFnStart() const { return !FnStartLocs.empty(); }
+ bool cantUnwind() const { return !CantUnwindLocs.empty(); }
+ bool hasHandlerData() const { return !HandlerDataLocs.empty(); }
+ bool hasPersonality() const {
+ return !(PersonalityLocs.empty() && PersonalityIndexLocs.empty());
+ }
+
+ void recordFnStart(SMLoc L) { FnStartLocs.push_back(L); }
+ void recordCantUnwind(SMLoc L) { CantUnwindLocs.push_back(L); }
+ void recordPersonality(SMLoc L) { PersonalityLocs.push_back(L); }
+ void recordHandlerData(SMLoc L) { HandlerDataLocs.push_back(L); }
+ void recordPersonalityIndex(SMLoc L) { PersonalityIndexLocs.push_back(L); }
+
+ void saveFPReg(int Reg) { FPReg = Reg; }
+ int getFPReg() const { return FPReg; }
+
+ void emitFnStartLocNotes() const {
+ for (Locs::const_iterator FI = FnStartLocs.begin(), FE = FnStartLocs.end();
+ FI != FE; ++FI)
+ Parser.Note(*FI, ".fnstart was specified here");
+ }
+ void emitCantUnwindLocNotes() const {
+ for (Locs::const_iterator UI = CantUnwindLocs.begin(),
+ UE = CantUnwindLocs.end(); UI != UE; ++UI)
+ Parser.Note(*UI, ".cantunwind was specified here");
+ }
+ void emitHandlerDataLocNotes() const {
+ for (Locs::const_iterator HI = HandlerDataLocs.begin(),
+ HE = HandlerDataLocs.end(); HI != HE; ++HI)
+ Parser.Note(*HI, ".handlerdata was specified here");
+ }
+ void emitPersonalityLocNotes() const {
+ for (Locs::const_iterator PI = PersonalityLocs.begin(),
+ PE = PersonalityLocs.end(),
+ PII = PersonalityIndexLocs.begin(),
+ PIE = PersonalityIndexLocs.end();
+ PI != PE || PII != PIE;) {
+ if (PI != PE && (PII == PIE || PI->getPointer() < PII->getPointer()))
+ Parser.Note(*PI++, ".personality was specified here");
+ else if (PII != PIE && (PI == PE || PII->getPointer() < PI->getPointer()))
+ Parser.Note(*PII++, ".personalityindex was specified here");
+ else
+ llvm_unreachable(".personality and .personalityindex cannot be "
+ "at the same location");
+ }
+ }
+
+ void reset() {
+ FnStartLocs = Locs();
+ CantUnwindLocs = Locs();
+ PersonalityLocs = Locs();
+ HandlerDataLocs = Locs();
+ PersonalityIndexLocs = Locs();
+ FPReg = ARM::SP;
+ }
+};
+
+class ARMAsmParser : public MCTargetAsmParser {
+ const MCInstrInfo &MII;
+ const MCRegisterInfo *MRI;
+ UnwindContext UC;
+
+ ARMTargetStreamer &getTargetStreamer() {
+ assert(getParser().getStreamer().getTargetStreamer() &&
+ "do not have a target streamer");
+ MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
+ return static_cast<ARMTargetStreamer &>(TS);
+ }
+
+ // Map of register aliases registers via the .req directive.
+ StringMap<unsigned> RegisterReqs;
+
+ bool NextSymbolIsThumb;
+
+ struct {
+ ARMCC::CondCodes Cond; // Condition for IT block.
+ unsigned Mask:4; // Condition mask for instructions.
+ // Starting at first 1 (from lsb).
+ // '1' condition as indicated in IT.
+ // '0' inverse of condition (else).
+ // Count of instructions in IT block is
+ // 4 - trailingzeroes(mask)
+
+ bool FirstCond; // Explicit flag for when we're parsing the
+ // First instruction in the IT block. It's
+ // implied in the mask, so needs special
+ // handling.
+
+ unsigned CurPosition; // Current position in parsing of IT
+ // block. In range [0,3]. Initialized
+ // according to count of instructions in block.
+ // ~0U if no active IT block.
+ } ITState;
+ bool inITBlock() { return ITState.CurPosition != ~0U; }
+ bool lastInITBlock() {
+ return ITState.CurPosition == 4 - countTrailingZeros(ITState.Mask);
+ }
+ void forwardITPosition() {
+ if (!inITBlock()) return;
+ // Move to the next instruction in the IT block, if there is one. If not,
+ // mark the block as done.
+ unsigned TZ = countTrailingZeros(ITState.Mask);
+ if (++ITState.CurPosition == 5 - TZ)
+ ITState.CurPosition = ~0U; // Done with the IT block after this.
+ }
+
+ void Note(SMLoc L, const Twine &Msg, ArrayRef<SMRange> Ranges = None) {
+ return getParser().Note(L, Msg, Ranges);
+ }
+ bool Warning(SMLoc L, const Twine &Msg,
+ ArrayRef<SMRange> Ranges = None) {
+ return getParser().Warning(L, Msg, Ranges);
+ }
+ bool Error(SMLoc L, const Twine &Msg,
+ ArrayRef<SMRange> Ranges = None) {
+ return getParser().Error(L, Msg, Ranges);
+ }
+
+ bool validatetLDMRegList(const MCInst &Inst, const OperandVector &Operands,
+ unsigned ListNo, bool IsARPop = false);
+ bool validatetSTMRegList(const MCInst &Inst, const OperandVector &Operands,
+ unsigned ListNo);
+
+ int tryParseRegister();
+ bool tryParseRegisterWithWriteBack(OperandVector &);
+ int tryParseShiftRegister(OperandVector &);
+ bool parseRegisterList(OperandVector &);
+ bool parseMemory(OperandVector &);
+ bool parseOperand(OperandVector &, StringRef Mnemonic);
+ bool parsePrefix(ARMMCExpr::VariantKind &RefKind);
+ bool parseMemRegOffsetShift(ARM_AM::ShiftOpc &ShiftType,
+ unsigned &ShiftAmount);
+ bool parseLiteralValues(unsigned Size, SMLoc L);
+ bool parseDirectiveThumb(SMLoc L);
+ bool parseDirectiveARM(SMLoc L);
+ bool parseDirectiveThumbFunc(SMLoc L);
+ bool parseDirectiveCode(SMLoc L);
+ bool parseDirectiveSyntax(SMLoc L);
+ bool parseDirectiveReq(StringRef Name, SMLoc L);
+ bool parseDirectiveUnreq(SMLoc L);
+ bool parseDirectiveArch(SMLoc L);
+ bool parseDirectiveEabiAttr(SMLoc L);
+ bool parseDirectiveCPU(SMLoc L);
+ bool parseDirectiveFPU(SMLoc L);
+ bool parseDirectiveFnStart(SMLoc L);
+ bool parseDirectiveFnEnd(SMLoc L);
+ bool parseDirectiveCantUnwind(SMLoc L);
+ bool parseDirectivePersonality(SMLoc L);
+ bool parseDirectiveHandlerData(SMLoc L);
+ bool parseDirectiveSetFP(SMLoc L);
+ bool parseDirectivePad(SMLoc L);
+ bool parseDirectiveRegSave(SMLoc L, bool IsVector);
+ bool parseDirectiveInst(SMLoc L, char Suffix = '\0');
+ bool parseDirectiveLtorg(SMLoc L);
+ bool parseDirectiveEven(SMLoc L);
+ bool parseDirectivePersonalityIndex(SMLoc L);
+ bool parseDirectiveUnwindRaw(SMLoc L);
+ bool parseDirectiveTLSDescSeq(SMLoc L);
+ bool parseDirectiveMovSP(SMLoc L);
+ bool parseDirectiveObjectArch(SMLoc L);
+ bool parseDirectiveArchExtension(SMLoc L);
+ bool parseDirectiveAlign(SMLoc L);
+ bool parseDirectiveThumbSet(SMLoc L);
+
+ StringRef splitMnemonic(StringRef Mnemonic, unsigned &PredicationCode,
+ bool &CarrySetting, unsigned &ProcessorIMod,
+ StringRef &ITMask);
+ void getMnemonicAcceptInfo(StringRef Mnemonic, StringRef FullInst,
+ bool &CanAcceptCarrySet,
+ bool &CanAcceptPredicationCode);
+
+ void tryConvertingToTwoOperandForm(StringRef Mnemonic, bool CarrySetting,
+ OperandVector &Operands);
+ bool isThumb() const {
+ // FIXME: Can tablegen auto-generate this?
+ return getSTI().getFeatureBits()[ARM::ModeThumb];
+ }
+ bool isThumbOne() const {
+ return isThumb() && !getSTI().getFeatureBits()[ARM::FeatureThumb2];
+ }
+ bool isThumbTwo() const {
+ return isThumb() && getSTI().getFeatureBits()[ARM::FeatureThumb2];
+ }
+ bool hasThumb() const {
+ return getSTI().getFeatureBits()[ARM::HasV4TOps];
+ }
+ bool hasV6Ops() const {
+ return getSTI().getFeatureBits()[ARM::HasV6Ops];
+ }
+ bool hasV6MOps() const {
+ return getSTI().getFeatureBits()[ARM::HasV6MOps];
+ }
+ bool hasV7Ops() const {
+ return getSTI().getFeatureBits()[ARM::HasV7Ops];
+ }
+ bool hasV8Ops() const {
+ return getSTI().getFeatureBits()[ARM::HasV8Ops];
+ }
+ bool hasARM() const {
+ return !getSTI().getFeatureBits()[ARM::FeatureNoARM];
+ }
+ bool hasDSP() const {
+ return getSTI().getFeatureBits()[ARM::FeatureDSP];
+ }
+ bool hasD16() const {
+ return getSTI().getFeatureBits()[ARM::FeatureD16];
+ }
+ bool hasV8_1aOps() const {
+ return getSTI().getFeatureBits()[ARM::HasV8_1aOps];
+ }
+
+ void SwitchMode() {
+ MCSubtargetInfo &STI = copySTI();
+ uint64_t FB = ComputeAvailableFeatures(STI.ToggleFeature(ARM::ModeThumb));
+ setAvailableFeatures(FB);
+ }
+ bool isMClass() const {
+ return getSTI().getFeatureBits()[ARM::FeatureMClass];
+ }
+
+ /// @name Auto-generated Match Functions
+ /// {
+
+#define GET_ASSEMBLER_HEADER
+#include "ARMGenAsmMatcher.inc"
+
+ /// }
+
+ OperandMatchResultTy parseITCondCode(OperandVector &);
+ OperandMatchResultTy parseCoprocNumOperand(OperandVector &);
+ OperandMatchResultTy parseCoprocRegOperand(OperandVector &);
+ OperandMatchResultTy parseCoprocOptionOperand(OperandVector &);
+ OperandMatchResultTy parseMemBarrierOptOperand(OperandVector &);
+ OperandMatchResultTy parseInstSyncBarrierOptOperand(OperandVector &);
+ OperandMatchResultTy parseProcIFlagsOperand(OperandVector &);
+ OperandMatchResultTy parseMSRMaskOperand(OperandVector &);
+ OperandMatchResultTy parseBankedRegOperand(OperandVector &);
+ OperandMatchResultTy parsePKHImm(OperandVector &O, StringRef Op, int Low,
+ int High);
+ OperandMatchResultTy parsePKHLSLImm(OperandVector &O) {
+ return parsePKHImm(O, "lsl", 0, 31);
+ }
+ OperandMatchResultTy parsePKHASRImm(OperandVector &O) {
+ return parsePKHImm(O, "asr", 1, 32);
+ }
+ OperandMatchResultTy parseSetEndImm(OperandVector &);
+ OperandMatchResultTy parseShifterImm(OperandVector &);
+ OperandMatchResultTy parseRotImm(OperandVector &);
+ OperandMatchResultTy parseModImm(OperandVector &);
+ OperandMatchResultTy parseBitfield(OperandVector &);
+ OperandMatchResultTy parsePostIdxReg(OperandVector &);
+ OperandMatchResultTy parseAM3Offset(OperandVector &);
+ OperandMatchResultTy parseFPImm(OperandVector &);
+ OperandMatchResultTy parseVectorList(OperandVector &);
+ OperandMatchResultTy parseVectorLane(VectorLaneTy &LaneKind, unsigned &Index,
+ SMLoc &EndLoc);
+
+ // Asm Match Converter Methods
+ void cvtThumbMultiply(MCInst &Inst, const OperandVector &);
+ void cvtThumbBranches(MCInst &Inst, const OperandVector &);
+
+ bool validateInstruction(MCInst &Inst, const OperandVector &Ops);
+ bool processInstruction(MCInst &Inst, const OperandVector &Ops, MCStreamer &Out);
+ bool shouldOmitCCOutOperand(StringRef Mnemonic, OperandVector &Operands);
+ bool shouldOmitPredicateOperand(StringRef Mnemonic, OperandVector &Operands);
+
+public:
+ enum ARMMatchResultTy {
+ Match_RequiresITBlock = FIRST_TARGET_MATCH_RESULT_TY,
+ Match_RequiresNotITBlock,
+ Match_RequiresV6,
+ Match_RequiresThumb2,
+ Match_RequiresV8,
+#define GET_OPERAND_DIAGNOSTIC_TYPES
+#include "ARMGenAsmMatcher.inc"
+
+ };
+
+ ARMAsmParser(const MCSubtargetInfo &STI, MCAsmParser &Parser,
+ const MCInstrInfo &MII, const MCTargetOptions &Options)
+ : MCTargetAsmParser(Options, STI), MII(MII), UC(Parser) {
+ MCAsmParserExtension::Initialize(Parser);
+
+ // Cache the MCRegisterInfo.
+ MRI = getContext().getRegisterInfo();
+
+ // Initialize the set of available features.
+ setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
+
+ // Not in an ITBlock to start with.
+ ITState.CurPosition = ~0U;
+
+ NextSymbolIsThumb = false;
+ }
+
+ // Implementation of the MCTargetAsmParser interface:
+ bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
+ bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
+ SMLoc NameLoc, OperandVector &Operands) override;
+ bool ParseDirective(AsmToken DirectiveID) override;
+
+ unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
+ unsigned Kind) override;
+ unsigned checkTargetMatchPredicate(MCInst &Inst) override;
+
+ bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
+ OperandVector &Operands, MCStreamer &Out,
+ uint64_t &ErrorInfo,
+ bool MatchingInlineAsm) override;
+ void onLabelParsed(MCSymbol *Symbol) override;
+};
+} // end anonymous namespace
+
+namespace {
+
+/// ARMOperand - Instances of this class represent a parsed ARM machine
+/// operand.
+class ARMOperand : public MCParsedAsmOperand {
+ enum KindTy {
+ k_CondCode,
+ k_CCOut,
+ k_ITCondMask,
+ k_CoprocNum,
+ k_CoprocReg,
+ k_CoprocOption,
+ k_Immediate,
+ k_MemBarrierOpt,
+ k_InstSyncBarrierOpt,
+ k_Memory,
+ k_PostIndexRegister,
+ k_MSRMask,
+ k_BankedReg,
+ k_ProcIFlags,
+ k_VectorIndex,
+ k_Register,
+ k_RegisterList,
+ k_DPRRegisterList,
+ k_SPRRegisterList,
+ k_VectorList,
+ k_VectorListAllLanes,
+ k_VectorListIndexed,
+ k_ShiftedRegister,
+ k_ShiftedImmediate,
+ k_ShifterImmediate,
+ k_RotateImmediate,
+ k_ModifiedImmediate,
+ k_BitfieldDescriptor,
+ k_Token
+ } Kind;
+
+ SMLoc StartLoc, EndLoc, AlignmentLoc;
+ SmallVector<unsigned, 8> Registers;
+
+ struct CCOp {
+ ARMCC::CondCodes Val;
+ };
+
+ struct CopOp {
+ unsigned Val;
+ };
+
+ struct CoprocOptionOp {
+ unsigned Val;
+ };
+
+ struct ITMaskOp {
+ unsigned Mask:4;
+ };
+
+ struct MBOptOp {
+ ARM_MB::MemBOpt Val;
+ };
+
+ struct ISBOptOp {
+ ARM_ISB::InstSyncBOpt Val;
+ };
+
+ struct IFlagsOp {
+ ARM_PROC::IFlags Val;
+ };
+
+ struct MMaskOp {
+ unsigned Val;
+ };
+
+ struct BankedRegOp {
+ unsigned Val;
+ };
+
+ struct TokOp {
+ const char *Data;
+ unsigned Length;
+ };
+
+ struct RegOp {
+ unsigned RegNum;
+ };
+
+ // A vector register list is a sequential list of 1 to 4 registers.
+ struct VectorListOp {
+ unsigned RegNum;
+ unsigned Count;
+ unsigned LaneIndex;
+ bool isDoubleSpaced;
+ };
+
+ struct VectorIndexOp {
+ unsigned Val;
+ };
+
+ struct ImmOp {
+ const MCExpr *Val;
+ };
+
+ /// Combined record for all forms of ARM address expressions.
+ struct MemoryOp {
+ unsigned BaseRegNum;
+ // Offset is in OffsetReg or OffsetImm. If both are zero, no offset
+ // was specified.
+ const MCConstantExpr *OffsetImm; // Offset immediate value
+ unsigned OffsetRegNum; // Offset register num, when OffsetImm == NULL
+ ARM_AM::ShiftOpc ShiftType; // Shift type for OffsetReg
+ unsigned ShiftImm; // shift for OffsetReg.
+ unsigned Alignment; // 0 = no alignment specified
+ // n = alignment in bytes (2, 4, 8, 16, or 32)
+ unsigned isNegative : 1; // Negated OffsetReg? (~'U' bit)
+ };
+
+ struct PostIdxRegOp {
+ unsigned RegNum;
+ bool isAdd;
+ ARM_AM::ShiftOpc ShiftTy;
+ unsigned ShiftImm;
+ };
+
+ struct ShifterImmOp {
+ bool isASR;
+ unsigned Imm;
+ };
+
+ struct RegShiftedRegOp {
+ ARM_AM::ShiftOpc ShiftTy;
+ unsigned SrcReg;
+ unsigned ShiftReg;
+ unsigned ShiftImm;
+ };
+
+ struct RegShiftedImmOp {
+ ARM_AM::ShiftOpc ShiftTy;
+ unsigned SrcReg;
+ unsigned ShiftImm;
+ };
+
+ struct RotImmOp {
+ unsigned Imm;
+ };
+
+ struct ModImmOp {
+ unsigned Bits;
+ unsigned Rot;
+ };
+
+ struct BitfieldOp {
+ unsigned LSB;
+ unsigned Width;
+ };
+
+ union {
+ struct CCOp CC;
+ struct CopOp Cop;
+ struct CoprocOptionOp CoprocOption;
+ struct MBOptOp MBOpt;
+ struct ISBOptOp ISBOpt;
+ struct ITMaskOp ITMask;
+ struct IFlagsOp IFlags;
+ struct MMaskOp MMask;
+ struct BankedRegOp BankedReg;
+ struct TokOp Tok;
+ struct RegOp Reg;
+ struct VectorListOp VectorList;
+ struct VectorIndexOp VectorIndex;
+ struct ImmOp Imm;
+ struct MemoryOp Memory;
+ struct PostIdxRegOp PostIdxReg;
+ struct ShifterImmOp ShifterImm;
+ struct RegShiftedRegOp RegShiftedReg;
+ struct RegShiftedImmOp RegShiftedImm;
+ struct RotImmOp RotImm;
+ struct ModImmOp ModImm;
+ struct BitfieldOp Bitfield;
+ };
+
+public:
+ ARMOperand(KindTy K) : MCParsedAsmOperand(), Kind(K) {}
+
+ /// getStartLoc - Get the location of the first token of this operand.
+ SMLoc getStartLoc() const override { return StartLoc; }
+ /// getEndLoc - Get the location of the last token of this operand.
+ SMLoc getEndLoc() const override { return EndLoc; }
+ /// getLocRange - Get the range between the first and last token of this
+ /// operand.
+ SMRange getLocRange() const { return SMRange(StartLoc, EndLoc); }
+
+ /// getAlignmentLoc - Get the location of the Alignment token of this operand.
+ SMLoc getAlignmentLoc() const {
+ assert(Kind == k_Memory && "Invalid access!");
+ return AlignmentLoc;
+ }
+
+ ARMCC::CondCodes getCondCode() const {
+ assert(Kind == k_CondCode && "Invalid access!");
+ return CC.Val;
+ }
+
+ unsigned getCoproc() const {
+ assert((Kind == k_CoprocNum || Kind == k_CoprocReg) && "Invalid access!");
+ return Cop.Val;
+ }
+
+ StringRef getToken() const {
+ assert(Kind == k_Token && "Invalid access!");
+ return StringRef(Tok.Data, Tok.Length);
+ }
+
+ unsigned getReg() const override {
+ assert((Kind == k_Register || Kind == k_CCOut) && "Invalid access!");
+ return Reg.RegNum;
+ }
+
+ const SmallVectorImpl<unsigned> &getRegList() const {
+ assert((Kind == k_RegisterList || Kind == k_DPRRegisterList ||
+ Kind == k_SPRRegisterList) && "Invalid access!");
+ return Registers;
+ }
+
+ const MCExpr *getImm() const {
+ assert(isImm() && "Invalid access!");
+ return Imm.Val;
+ }
+
+ unsigned getVectorIndex() const {
+ assert(Kind == k_VectorIndex && "Invalid access!");
+ return VectorIndex.Val;
+ }
+
+ ARM_MB::MemBOpt getMemBarrierOpt() const {
+ assert(Kind == k_MemBarrierOpt && "Invalid access!");
+ return MBOpt.Val;
+ }
+
+ ARM_ISB::InstSyncBOpt getInstSyncBarrierOpt() const {
+ assert(Kind == k_InstSyncBarrierOpt && "Invalid access!");
+ return ISBOpt.Val;
+ }
+
+ ARM_PROC::IFlags getProcIFlags() const {
+ assert(Kind == k_ProcIFlags && "Invalid access!");
+ return IFlags.Val;
+ }
+
+ unsigned getMSRMask() const {
+ assert(Kind == k_MSRMask && "Invalid access!");
+ return MMask.Val;
+ }
+
+ unsigned getBankedReg() const {
+ assert(Kind == k_BankedReg && "Invalid access!");
+ return BankedReg.Val;
+ }
+
+ bool isCoprocNum() const { return Kind == k_CoprocNum; }
+ bool isCoprocReg() const { return Kind == k_CoprocReg; }
+ bool isCoprocOption() const { return Kind == k_CoprocOption; }
+ bool isCondCode() const { return Kind == k_CondCode; }
+ bool isCCOut() const { return Kind == k_CCOut; }
+ bool isITMask() const { return Kind == k_ITCondMask; }
+ bool isITCondCode() const { return Kind == k_CondCode; }
+ bool isImm() const override { return Kind == k_Immediate; }
+ // checks whether this operand is an unsigned offset which fits is a field
+ // of specified width and scaled by a specific number of bits
+ template<unsigned width, unsigned scale>
+ bool isUnsignedOffset() const {
+ if (!isImm()) return false;
+ if (isa<MCSymbolRefExpr>(Imm.Val)) return true;
+ if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Imm.Val)) {
+ int64_t Val = CE->getValue();
+ int64_t Align = 1LL << scale;
+ int64_t Max = Align * ((1LL << width) - 1);
+ return ((Val % Align) == 0) && (Val >= 0) && (Val <= Max);
+ }
+ return false;
+ }
+ // checks whether this operand is an signed offset which fits is a field
+ // of specified width and scaled by a specific number of bits
+ template<unsigned width, unsigned scale>
+ bool isSignedOffset() const {
+ if (!isImm()) return false;
+ if (isa<MCSymbolRefExpr>(Imm.Val)) return true;
+ if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Imm.Val)) {
+ int64_t Val = CE->getValue();
+ int64_t Align = 1LL << scale;
+ int64_t Max = Align * ((1LL << (width-1)) - 1);
+ int64_t Min = -Align * (1LL << (width-1));
+ return ((Val % Align) == 0) && (Val >= Min) && (Val <= Max);
+ }
+ return false;
+ }
+
+ // checks whether this operand is a memory operand computed as an offset
+ // applied to PC. the offset may have 8 bits of magnitude and is represented
+ // with two bits of shift. textually it may be either [pc, #imm], #imm or
+ // relocable expression...
+ bool isThumbMemPC() const {
+ int64_t Val = 0;
+ if (isImm()) {
+ if (isa<MCSymbolRefExpr>(Imm.Val)) return true;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Imm.Val);
+ if (!CE) return false;
+ Val = CE->getValue();
+ }
+ else if (isMem()) {
+ if(!Memory.OffsetImm || Memory.OffsetRegNum) return false;
+ if(Memory.BaseRegNum != ARM::PC) return false;
+ Val = Memory.OffsetImm->getValue();
+ }
+ else return false;
+ return ((Val % 4) == 0) && (Val >= 0) && (Val <= 1020);
+ }
+ bool isFPImm() const {
+ if (!isImm()) return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int Val = ARM_AM::getFP32Imm(APInt(32, CE->getValue()));
+ return Val != -1;
+ }
+ bool isFBits16() const {
+ if (!isImm()) return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return Value >= 0 && Value <= 16;
+ }
+ bool isFBits32() const {
+ if (!isImm()) return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return Value >= 1 && Value <= 32;
+ }
+ bool isImm8s4() const {
+ if (!isImm()) return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return ((Value & 3) == 0) && Value >= -1020 && Value <= 1020;
+ }
+ bool isImm0_1020s4() const {
+ if (!isImm()) return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return ((Value & 3) == 0) && Value >= 0 && Value <= 1020;
+ }
+ bool isImm0_508s4() const {
+ if (!isImm()) return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return ((Value & 3) == 0) && Value >= 0 && Value <= 508;
+ }
+ bool isImm0_508s4Neg() const {
+ if (!isImm()) return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = -CE->getValue();
+ // explicitly exclude zero. we want that to use the normal 0_508 version.
+ return ((Value & 3) == 0) && Value > 0 && Value <= 508;
+ }
+ bool isImm0_239() const {
+ if (!isImm()) return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return Value >= 0 && Value < 240;
+ }
+ bool isImm0_255() const {
+ if (!isImm()) return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return Value >= 0 && Value < 256;
+ }
+ bool isImm0_4095() const {
+ if (!isImm()) return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return Value >= 0 && Value < 4096;
+ }
+ bool isImm0_4095Neg() const {
+ if (!isImm()) return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = -CE->getValue();
+ return Value > 0 && Value < 4096;
+ }
+ bool isImm0_1() const {
+ if (!isImm()) return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return Value >= 0 && Value < 2;
+ }
+ bool isImm0_3() const {
+ if (!isImm()) return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return Value >= 0 && Value < 4;
+ }
+ bool isImm0_7() const {
+ if (!isImm()) return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return Value >= 0 && Value < 8;
+ }
+ bool isImm0_15() const {
+ if (!isImm()) return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return Value >= 0 && Value < 16;
+ }
+ bool isImm0_31() const {
+ if (!isImm()) return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return Value >= 0 && Value < 32;
+ }
+ bool isImm0_63() const {
+ if (!isImm()) return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return Value >= 0 && Value < 64;
+ }
+ bool isImm8() const {
+ if (!isImm()) return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return Value == 8;
+ }
+ bool isImm16() const {
+ if (!isImm()) return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return Value == 16;
+ }
+ bool isImm32() const {
+ if (!isImm()) return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return Value == 32;
+ }
+ bool isShrImm8() const {
+ if (!isImm()) return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return Value > 0 && Value <= 8;
+ }
+ bool isShrImm16() const {
+ if (!isImm()) return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return Value > 0 && Value <= 16;
+ }
+ bool isShrImm32() const {
+ if (!isImm()) return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return Value > 0 && Value <= 32;
+ }
+ bool isShrImm64() const {
+ if (!isImm()) return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return Value > 0 && Value <= 64;
+ }
+ bool isImm1_7() const {
+ if (!isImm()) return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return Value > 0 && Value < 8;
+ }
+ bool isImm1_15() const {
+ if (!isImm()) return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return Value > 0 && Value < 16;
+ }
+ bool isImm1_31() const {
+ if (!isImm()) return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return Value > 0 && Value < 32;
+ }
+ bool isImm1_16() const {
+ if (!isImm()) return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return Value > 0 && Value < 17;
+ }
+ bool isImm1_32() const {
+ if (!isImm()) return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return Value > 0 && Value < 33;
+ }
+ bool isImm0_32() const {
+ if (!isImm()) return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return Value >= 0 && Value < 33;
+ }
+ bool isImm0_65535() const {
+ if (!isImm()) return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return Value >= 0 && Value < 65536;
+ }
+ bool isImm256_65535Expr() const {
+ if (!isImm()) return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ // If it's not a constant expression, it'll generate a fixup and be
+ // handled later.
+ if (!CE) return true;
+ int64_t Value = CE->getValue();
+ return Value >= 256 && Value < 65536;
+ }
+ bool isImm0_65535Expr() const {
+ if (!isImm()) return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ // If it's not a constant expression, it'll generate a fixup and be
+ // handled later.
+ if (!CE) return true;
+ int64_t Value = CE->getValue();
+ return Value >= 0 && Value < 65536;
+ }
+ bool isImm24bit() const {
+ if (!isImm()) return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return Value >= 0 && Value <= 0xffffff;
+ }
+ bool isImmThumbSR() const {
+ if (!isImm()) return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return Value > 0 && Value < 33;
+ }
+ bool isPKHLSLImm() const {
+ if (!isImm()) return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return Value >= 0 && Value < 32;
+ }
+ bool isPKHASRImm() const {
+ if (!isImm()) return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return Value > 0 && Value <= 32;
+ }
+ bool isAdrLabel() const {
+ // If we have an immediate that's not a constant, treat it as a label
+ // reference needing a fixup.
+ if (isImm() && !isa<MCConstantExpr>(getImm()))
+ return true;
+
+ // If it is a constant, it must fit into a modified immediate encoding.
+ if (!isImm()) return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return (ARM_AM::getSOImmVal(Value) != -1 ||
+ ARM_AM::getSOImmVal(-Value) != -1);
+ }
+ bool isT2SOImm() const {
+ if (!isImm()) return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return ARM_AM::getT2SOImmVal(Value) != -1;
+ }
+ bool isT2SOImmNot() const {
+ if (!isImm()) return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return ARM_AM::getT2SOImmVal(Value) == -1 &&
+ ARM_AM::getT2SOImmVal(~Value) != -1;
+ }
+ bool isT2SOImmNeg() const {
+ if (!isImm()) return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ // Only use this when not representable as a plain so_imm.
+ return ARM_AM::getT2SOImmVal(Value) == -1 &&
+ ARM_AM::getT2SOImmVal(-Value) != -1;
+ }
+ bool isSetEndImm() const {
+ if (!isImm()) return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return Value == 1 || Value == 0;
+ }
+ bool isReg() const override { return Kind == k_Register; }
+ bool isRegList() const { return Kind == k_RegisterList; }
+ bool isDPRRegList() const { return Kind == k_DPRRegisterList; }
+ bool isSPRRegList() const { return Kind == k_SPRRegisterList; }
+ bool isToken() const override { return Kind == k_Token; }
+ bool isMemBarrierOpt() const { return Kind == k_MemBarrierOpt; }
+ bool isInstSyncBarrierOpt() const { return Kind == k_InstSyncBarrierOpt; }
+ bool isMem() const override { return Kind == k_Memory; }
+ bool isShifterImm() const { return Kind == k_ShifterImmediate; }
+ bool isRegShiftedReg() const { return Kind == k_ShiftedRegister; }
+ bool isRegShiftedImm() const { return Kind == k_ShiftedImmediate; }
+ bool isRotImm() const { return Kind == k_RotateImmediate; }
+ bool isModImm() const { return Kind == k_ModifiedImmediate; }
+ bool isModImmNot() const {
+ if (!isImm()) return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return ARM_AM::getSOImmVal(~Value) != -1;
+ }
+ bool isModImmNeg() const {
+ if (!isImm()) return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return ARM_AM::getSOImmVal(Value) == -1 &&
+ ARM_AM::getSOImmVal(-Value) != -1;
+ }
+ bool isBitfield() const { return Kind == k_BitfieldDescriptor; }
+ bool isPostIdxRegShifted() const { return Kind == k_PostIndexRegister; }
+ bool isPostIdxReg() const {
+ return Kind == k_PostIndexRegister && PostIdxReg.ShiftTy ==ARM_AM::no_shift;
+ }
+ bool isMemNoOffset(bool alignOK = false, unsigned Alignment = 0) const {
+ if (!isMem())
+ return false;
+ // No offset of any kind.
+ return Memory.OffsetRegNum == 0 && Memory.OffsetImm == nullptr &&
+ (alignOK || Memory.Alignment == Alignment);
+ }
+ bool isMemPCRelImm12() const {
+ if (!isMem() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0)
+ return false;
+ // Base register must be PC.
+ if (Memory.BaseRegNum != ARM::PC)
+ return false;
+ // Immediate offset in range [-4095, 4095].
+ if (!Memory.OffsetImm) return true;
+ int64_t Val = Memory.OffsetImm->getValue();
+ return (Val > -4096 && Val < 4096) || (Val == INT32_MIN);
+ }
+ bool isAlignedMemory() const {
+ return isMemNoOffset(true);
+ }
+ bool isAlignedMemoryNone() const {
+ return isMemNoOffset(false, 0);
+ }
+ bool isDupAlignedMemoryNone() const {
+ return isMemNoOffset(false, 0);
+ }
+ bool isAlignedMemory16() const {
+ if (isMemNoOffset(false, 2)) // alignment in bytes for 16-bits is 2.
+ return true;
+ return isMemNoOffset(false, 0);
+ }
+ bool isDupAlignedMemory16() const {
+ if (isMemNoOffset(false, 2)) // alignment in bytes for 16-bits is 2.
+ return true;
+ return isMemNoOffset(false, 0);
+ }
+ bool isAlignedMemory32() const {
+ if (isMemNoOffset(false, 4)) // alignment in bytes for 32-bits is 4.
+ return true;
+ return isMemNoOffset(false, 0);
+ }
+ bool isDupAlignedMemory32() const {
+ if (isMemNoOffset(false, 4)) // alignment in bytes for 32-bits is 4.
+ return true;
+ return isMemNoOffset(false, 0);
+ }
+ bool isAlignedMemory64() const {
+ if (isMemNoOffset(false, 8)) // alignment in bytes for 64-bits is 8.
+ return true;
+ return isMemNoOffset(false, 0);
+ }
+ bool isDupAlignedMemory64() const {
+ if (isMemNoOffset(false, 8)) // alignment in bytes for 64-bits is 8.
+ return true;
+ return isMemNoOffset(false, 0);
+ }
+ bool isAlignedMemory64or128() const {
+ if (isMemNoOffset(false, 8)) // alignment in bytes for 64-bits is 8.
+ return true;
+ if (isMemNoOffset(false, 16)) // alignment in bytes for 128-bits is 16.
+ return true;
+ return isMemNoOffset(false, 0);
+ }
+ bool isDupAlignedMemory64or128() const {
+ if (isMemNoOffset(false, 8)) // alignment in bytes for 64-bits is 8.
+ return true;
+ if (isMemNoOffset(false, 16)) // alignment in bytes for 128-bits is 16.
+ return true;
+ return isMemNoOffset(false, 0);
+ }
+ bool isAlignedMemory64or128or256() const {
+ if (isMemNoOffset(false, 8)) // alignment in bytes for 64-bits is 8.
+ return true;
+ if (isMemNoOffset(false, 16)) // alignment in bytes for 128-bits is 16.
+ return true;
+ if (isMemNoOffset(false, 32)) // alignment in bytes for 256-bits is 32.
+ return true;
+ return isMemNoOffset(false, 0);
+ }
+ bool isAddrMode2() const {
+ if (!isMem() || Memory.Alignment != 0) return false;
+ // Check for register offset.
+ if (Memory.OffsetRegNum) return true;
+ // Immediate offset in range [-4095, 4095].
+ if (!Memory.OffsetImm) return true;
+ int64_t Val = Memory.OffsetImm->getValue();
+ return Val > -4096 && Val < 4096;
+ }
+ bool isAM2OffsetImm() const {
+ if (!isImm()) return false;
+ // Immediate offset in range [-4095, 4095].
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Val = CE->getValue();
+ return (Val == INT32_MIN) || (Val > -4096 && Val < 4096);
+ }
+ bool isAddrMode3() const {
+ // If we have an immediate that's not a constant, treat it as a label
+ // reference needing a fixup. If it is a constant, it's something else
+ // and we reject it.
+ if (isImm() && !isa<MCConstantExpr>(getImm()))
+ return true;
+ if (!isMem() || Memory.Alignment != 0) return false;
+ // No shifts are legal for AM3.
+ if (Memory.ShiftType != ARM_AM::no_shift) return false;
+ // Check for register offset.
+ if (Memory.OffsetRegNum) return true;
+ // Immediate offset in range [-255, 255].
+ if (!Memory.OffsetImm) return true;
+ int64_t Val = Memory.OffsetImm->getValue();
+ // The #-0 offset is encoded as INT32_MIN, and we have to check
+ // for this too.
+ return (Val > -256 && Val < 256) || Val == INT32_MIN;
+ }
+ bool isAM3Offset() const {
+ if (Kind != k_Immediate && Kind != k_PostIndexRegister)
+ return false;
+ if (Kind == k_PostIndexRegister)
+ return PostIdxReg.ShiftTy == ARM_AM::no_shift;
+ // Immediate offset in range [-255, 255].
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Val = CE->getValue();
+ // Special case, #-0 is INT32_MIN.
+ return (Val > -256 && Val < 256) || Val == INT32_MIN;
+ }
+ bool isAddrMode5() const {
+ // If we have an immediate that's not a constant, treat it as a label
+ // reference needing a fixup. If it is a constant, it's something else
+ // and we reject it.
+ if (isImm() && !isa<MCConstantExpr>(getImm()))
+ return true;
+ if (!isMem() || Memory.Alignment != 0) return false;
+ // Check for register offset.
+ if (Memory.OffsetRegNum) return false;
+ // Immediate offset in range [-1020, 1020] and a multiple of 4.
+ if (!Memory.OffsetImm) return true;
+ int64_t Val = Memory.OffsetImm->getValue();
+ return (Val >= -1020 && Val <= 1020 && ((Val & 3) == 0)) ||
+ Val == INT32_MIN;
+ }
+ bool isMemTBB() const {
+ if (!isMem() || !Memory.OffsetRegNum || Memory.isNegative ||
+ Memory.ShiftType != ARM_AM::no_shift || Memory.Alignment != 0)
+ return false;
+ return true;
+ }
+ bool isMemTBH() const {
+ if (!isMem() || !Memory.OffsetRegNum || Memory.isNegative ||
+ Memory.ShiftType != ARM_AM::lsl || Memory.ShiftImm != 1 ||
+ Memory.Alignment != 0 )
+ return false;
+ return true;
+ }
+ bool isMemRegOffset() const {
+ if (!isMem() || !Memory.OffsetRegNum || Memory.Alignment != 0)
+ return false;
+ return true;
+ }
+ bool isT2MemRegOffset() const {
+ if (!isMem() || !Memory.OffsetRegNum || Memory.isNegative ||
+ Memory.Alignment != 0)
+ return false;
+ // Only lsl #{0, 1, 2, 3} allowed.
+ if (Memory.ShiftType == ARM_AM::no_shift)
+ return true;
+ if (Memory.ShiftType != ARM_AM::lsl || Memory.ShiftImm > 3)
+ return false;
+ return true;
+ }
+ bool isMemThumbRR() const {
+ // Thumb reg+reg addressing is simple. Just two registers, a base and
+ // an offset. No shifts, negations or any other complicating factors.
+ if (!isMem() || !Memory.OffsetRegNum || Memory.isNegative ||
+ Memory.ShiftType != ARM_AM::no_shift || Memory.Alignment != 0)
+ return false;
+ return isARMLowRegister(Memory.BaseRegNum) &&
+ (!Memory.OffsetRegNum || isARMLowRegister(Memory.OffsetRegNum));
+ }
+ bool isMemThumbRIs4() const {
+ if (!isMem() || Memory.OffsetRegNum != 0 ||
+ !isARMLowRegister(Memory.BaseRegNum) || Memory.Alignment != 0)
+ return false;
+ // Immediate offset, multiple of 4 in range [0, 124].
+ if (!Memory.OffsetImm) return true;
+ int64_t Val = Memory.OffsetImm->getValue();
+ return Val >= 0 && Val <= 124 && (Val % 4) == 0;
+ }
+ bool isMemThumbRIs2() const {
+ if (!isMem() || Memory.OffsetRegNum != 0 ||
+ !isARMLowRegister(Memory.BaseRegNum) || Memory.Alignment != 0)
+ return false;
+ // Immediate offset, multiple of 4 in range [0, 62].
+ if (!Memory.OffsetImm) return true;
+ int64_t Val = Memory.OffsetImm->getValue();
+ return Val >= 0 && Val <= 62 && (Val % 2) == 0;
+ }
+ bool isMemThumbRIs1() const {
+ if (!isMem() || Memory.OffsetRegNum != 0 ||
+ !isARMLowRegister(Memory.BaseRegNum) || Memory.Alignment != 0)
+ return false;
+ // Immediate offset in range [0, 31].
+ if (!Memory.OffsetImm) return true;
+ int64_t Val = Memory.OffsetImm->getValue();
+ return Val >= 0 && Val <= 31;
+ }
+ bool isMemThumbSPI() const {
+ if (!isMem() || Memory.OffsetRegNum != 0 ||
+ Memory.BaseRegNum != ARM::SP || Memory.Alignment != 0)
+ return false;
+ // Immediate offset, multiple of 4 in range [0, 1020].
+ if (!Memory.OffsetImm) return true;
+ int64_t Val = Memory.OffsetImm->getValue();
+ return Val >= 0 && Val <= 1020 && (Val % 4) == 0;
+ }
+ bool isMemImm8s4Offset() const {
+ // If we have an immediate that's not a constant, treat it as a label
+ // reference needing a fixup. If it is a constant, it's something else
+ // and we reject it.
+ if (isImm() && !isa<MCConstantExpr>(getImm()))
+ return true;
+ if (!isMem() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0)
+ return false;
+ // Immediate offset a multiple of 4 in range [-1020, 1020].
+ if (!Memory.OffsetImm) return true;
+ int64_t Val = Memory.OffsetImm->getValue();
+ // Special case, #-0 is INT32_MIN.
+ return (Val >= -1020 && Val <= 1020 && (Val & 3) == 0) || Val == INT32_MIN;
+ }
+ bool isMemImm0_1020s4Offset() const {
+ if (!isMem() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0)
+ return false;
+ // Immediate offset a multiple of 4 in range [0, 1020].
+ if (!Memory.OffsetImm) return true;
+ int64_t Val = Memory.OffsetImm->getValue();
+ return Val >= 0 && Val <= 1020 && (Val & 3) == 0;
+ }
+ bool isMemImm8Offset() const {
+ if (!isMem() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0)
+ return false;
+ // Base reg of PC isn't allowed for these encodings.
+ if (Memory.BaseRegNum == ARM::PC) return false;
+ // Immediate offset in range [-255, 255].
+ if (!Memory.OffsetImm) return true;
+ int64_t Val = Memory.OffsetImm->getValue();
+ return (Val == INT32_MIN) || (Val > -256 && Val < 256);
+ }
+ bool isMemPosImm8Offset() const {
+ if (!isMem() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0)
+ return false;
+ // Immediate offset in range [0, 255].
+ if (!Memory.OffsetImm) return true;
+ int64_t Val = Memory.OffsetImm->getValue();
+ return Val >= 0 && Val < 256;
+ }
+ bool isMemNegImm8Offset() const {
+ if (!isMem() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0)
+ return false;
+ // Base reg of PC isn't allowed for these encodings.
+ if (Memory.BaseRegNum == ARM::PC) return false;
+ // Immediate offset in range [-255, -1].
+ if (!Memory.OffsetImm) return false;
+ int64_t Val = Memory.OffsetImm->getValue();
+ return (Val == INT32_MIN) || (Val > -256 && Val < 0);
+ }
+ bool isMemUImm12Offset() const {
+ if (!isMem() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0)
+ return false;
+ // Immediate offset in range [0, 4095].
+ if (!Memory.OffsetImm) return true;
+ int64_t Val = Memory.OffsetImm->getValue();
+ return (Val >= 0 && Val < 4096);
+ }
+ bool isMemImm12Offset() const {
+ // If we have an immediate that's not a constant, treat it as a label
+ // reference needing a fixup. If it is a constant, it's something else
+ // and we reject it.
+ if (isImm() && !isa<MCConstantExpr>(getImm()))
+ return true;
+
+ if (!isMem() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0)
+ return false;
+ // Immediate offset in range [-4095, 4095].
+ if (!Memory.OffsetImm) return true;
+ int64_t Val = Memory.OffsetImm->getValue();
+ return (Val > -4096 && Val < 4096) || (Val == INT32_MIN);
+ }
+ bool isPostIdxImm8() const {
+ if (!isImm()) return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Val = CE->getValue();
+ return (Val > -256 && Val < 256) || (Val == INT32_MIN);
+ }
+ bool isPostIdxImm8s4() const {
+ if (!isImm()) return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Val = CE->getValue();
+ return ((Val & 3) == 0 && Val >= -1020 && Val <= 1020) ||
+ (Val == INT32_MIN);
+ }
+
+ bool isMSRMask() const { return Kind == k_MSRMask; }
+ bool isBankedReg() const { return Kind == k_BankedReg; }
+ bool isProcIFlags() const { return Kind == k_ProcIFlags; }
+
+ // NEON operands.
+ bool isSingleSpacedVectorList() const {
+ return Kind == k_VectorList && !VectorList.isDoubleSpaced;
+ }
+ bool isDoubleSpacedVectorList() const {
+ return Kind == k_VectorList && VectorList.isDoubleSpaced;
+ }
+ bool isVecListOneD() const {
+ if (!isSingleSpacedVectorList()) return false;
+ return VectorList.Count == 1;
+ }
+
+ bool isVecListDPair() const {
+ if (!isSingleSpacedVectorList()) return false;
+ return (ARMMCRegisterClasses[ARM::DPairRegClassID]
+ .contains(VectorList.RegNum));
+ }
+
+ bool isVecListThreeD() const {
+ if (!isSingleSpacedVectorList()) return false;
+ return VectorList.Count == 3;
+ }
+
+ bool isVecListFourD() const {
+ if (!isSingleSpacedVectorList()) return false;
+ return VectorList.Count == 4;
+ }
+
+ bool isVecListDPairSpaced() const {
+ if (Kind != k_VectorList) return false;
+ if (isSingleSpacedVectorList()) return false;
+ return (ARMMCRegisterClasses[ARM::DPairSpcRegClassID]
+ .contains(VectorList.RegNum));
+ }
+
+ bool isVecListThreeQ() const {
+ if (!isDoubleSpacedVectorList()) return false;
+ return VectorList.Count == 3;
+ }
+
+ bool isVecListFourQ() const {
+ if (!isDoubleSpacedVectorList()) return false;
+ return VectorList.Count == 4;
+ }
+
+ bool isSingleSpacedVectorAllLanes() const {
+ return Kind == k_VectorListAllLanes && !VectorList.isDoubleSpaced;
+ }
+ bool isDoubleSpacedVectorAllLanes() const {
+ return Kind == k_VectorListAllLanes && VectorList.isDoubleSpaced;
+ }
+ bool isVecListOneDAllLanes() const {
+ if (!isSingleSpacedVectorAllLanes()) return false;
+ return VectorList.Count == 1;
+ }
+
+ bool isVecListDPairAllLanes() const {
+ if (!isSingleSpacedVectorAllLanes()) return false;
+ return (ARMMCRegisterClasses[ARM::DPairRegClassID]
+ .contains(VectorList.RegNum));
+ }
+
+ bool isVecListDPairSpacedAllLanes() const {
+ if (!isDoubleSpacedVectorAllLanes()) return false;
+ return VectorList.Count == 2;
+ }
+
+ bool isVecListThreeDAllLanes() const {
+ if (!isSingleSpacedVectorAllLanes()) return false;
+ return VectorList.Count == 3;
+ }
+
+ bool isVecListThreeQAllLanes() const {
+ if (!isDoubleSpacedVectorAllLanes()) return false;
+ return VectorList.Count == 3;
+ }
+
+ bool isVecListFourDAllLanes() const {
+ if (!isSingleSpacedVectorAllLanes()) return false;
+ return VectorList.Count == 4;
+ }
+
+ bool isVecListFourQAllLanes() const {
+ if (!isDoubleSpacedVectorAllLanes()) return false;
+ return VectorList.Count == 4;
+ }
+
+ bool isSingleSpacedVectorIndexed() const {
+ return Kind == k_VectorListIndexed && !VectorList.isDoubleSpaced;
+ }
+ bool isDoubleSpacedVectorIndexed() const {
+ return Kind == k_VectorListIndexed && VectorList.isDoubleSpaced;
+ }
+ bool isVecListOneDByteIndexed() const {
+ if (!isSingleSpacedVectorIndexed()) return false;
+ return VectorList.Count == 1 && VectorList.LaneIndex <= 7;
+ }
+
+ bool isVecListOneDHWordIndexed() const {
+ if (!isSingleSpacedVectorIndexed()) return false;
+ return VectorList.Count == 1 && VectorList.LaneIndex <= 3;
+ }
+
+ bool isVecListOneDWordIndexed() const {
+ if (!isSingleSpacedVectorIndexed()) return false;
+ return VectorList.Count == 1 && VectorList.LaneIndex <= 1;
+ }
+
+ bool isVecListTwoDByteIndexed() const {
+ if (!isSingleSpacedVectorIndexed()) return false;
+ return VectorList.Count == 2 && VectorList.LaneIndex <= 7;
+ }
+
+ bool isVecListTwoDHWordIndexed() const {
+ if (!isSingleSpacedVectorIndexed()) return false;
+ return VectorList.Count == 2 && VectorList.LaneIndex <= 3;
+ }
+
+ bool isVecListTwoQWordIndexed() const {
+ if (!isDoubleSpacedVectorIndexed()) return false;
+ return VectorList.Count == 2 && VectorList.LaneIndex <= 1;
+ }
+
+ bool isVecListTwoQHWordIndexed() const {
+ if (!isDoubleSpacedVectorIndexed()) return false;
+ return VectorList.Count == 2 && VectorList.LaneIndex <= 3;
+ }
+
+ bool isVecListTwoDWordIndexed() const {
+ if (!isSingleSpacedVectorIndexed()) return false;
+ return VectorList.Count == 2 && VectorList.LaneIndex <= 1;
+ }
+
+ bool isVecListThreeDByteIndexed() const {
+ if (!isSingleSpacedVectorIndexed()) return false;
+ return VectorList.Count == 3 && VectorList.LaneIndex <= 7;
+ }
+
+ bool isVecListThreeDHWordIndexed() const {
+ if (!isSingleSpacedVectorIndexed()) return false;
+ return VectorList.Count == 3 && VectorList.LaneIndex <= 3;
+ }
+
+ bool isVecListThreeQWordIndexed() const {
+ if (!isDoubleSpacedVectorIndexed()) return false;
+ return VectorList.Count == 3 && VectorList.LaneIndex <= 1;
+ }
+
+ bool isVecListThreeQHWordIndexed() const {
+ if (!isDoubleSpacedVectorIndexed()) return false;
+ return VectorList.Count == 3 && VectorList.LaneIndex <= 3;
+ }
+
+ bool isVecListThreeDWordIndexed() const {
+ if (!isSingleSpacedVectorIndexed()) return false;
+ return VectorList.Count == 3 && VectorList.LaneIndex <= 1;
+ }
+
+ bool isVecListFourDByteIndexed() const {
+ if (!isSingleSpacedVectorIndexed()) return false;
+ return VectorList.Count == 4 && VectorList.LaneIndex <= 7;
+ }
+
+ bool isVecListFourDHWordIndexed() const {
+ if (!isSingleSpacedVectorIndexed()) return false;
+ return VectorList.Count == 4 && VectorList.LaneIndex <= 3;
+ }
+
+ bool isVecListFourQWordIndexed() const {
+ if (!isDoubleSpacedVectorIndexed()) return false;
+ return VectorList.Count == 4 && VectorList.LaneIndex <= 1;
+ }
+
+ bool isVecListFourQHWordIndexed() const {
+ if (!isDoubleSpacedVectorIndexed()) return false;
+ return VectorList.Count == 4 && VectorList.LaneIndex <= 3;
+ }
+
+ bool isVecListFourDWordIndexed() const {
+ if (!isSingleSpacedVectorIndexed()) return false;
+ return VectorList.Count == 4 && VectorList.LaneIndex <= 1;
+ }
+
+ bool isVectorIndex8() const {
+ if (Kind != k_VectorIndex) return false;
+ return VectorIndex.Val < 8;
+ }
+ bool isVectorIndex16() const {
+ if (Kind != k_VectorIndex) return false;
+ return VectorIndex.Val < 4;
+ }
+ bool isVectorIndex32() const {
+ if (Kind != k_VectorIndex) return false;
+ return VectorIndex.Val < 2;
+ }
+
+ bool isNEONi8splat() const {
+ if (!isImm()) return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ // Must be a constant.
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ // i8 value splatted across 8 bytes. The immediate is just the 8 byte
+ // value.
+ return Value >= 0 && Value < 256;
+ }
+
+ bool isNEONi16splat() const {
+ if (isNEONByteReplicate(2))
+ return false; // Leave that for bytes replication and forbid by default.
+ if (!isImm())
+ return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ // Must be a constant.
+ if (!CE) return false;
+ unsigned Value = CE->getValue();
+ return ARM_AM::isNEONi16splat(Value);
+ }
+
+ bool isNEONi16splatNot() const {
+ if (!isImm())
+ return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ // Must be a constant.
+ if (!CE) return false;
+ unsigned Value = CE->getValue();
+ return ARM_AM::isNEONi16splat(~Value & 0xffff);
+ }
+
+ bool isNEONi32splat() const {
+ if (isNEONByteReplicate(4))
+ return false; // Leave that for bytes replication and forbid by default.
+ if (!isImm())
+ return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ // Must be a constant.
+ if (!CE) return false;
+ unsigned Value = CE->getValue();
+ return ARM_AM::isNEONi32splat(Value);
+ }
+
+ bool isNEONi32splatNot() const {
+ if (!isImm())
+ return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ // Must be a constant.
+ if (!CE) return false;
+ unsigned Value = CE->getValue();
+ return ARM_AM::isNEONi32splat(~Value);
+ }
+
+ bool isNEONByteReplicate(unsigned NumBytes) const {
+ if (!isImm())
+ return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ // Must be a constant.
+ if (!CE)
+ return false;
+ int64_t Value = CE->getValue();
+ if (!Value)
+ return false; // Don't bother with zero.
+
+ unsigned char B = Value & 0xff;
+ for (unsigned i = 1; i < NumBytes; ++i) {
+ Value >>= 8;
+ if ((Value & 0xff) != B)
+ return false;
+ }
+ return true;
+ }
+ bool isNEONi16ByteReplicate() const { return isNEONByteReplicate(2); }
+ bool isNEONi32ByteReplicate() const { return isNEONByteReplicate(4); }
+ bool isNEONi32vmov() const {
+ if (isNEONByteReplicate(4))
+ return false; // Let it to be classified as byte-replicate case.
+ if (!isImm())
+ return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ // Must be a constant.
+ if (!CE)
+ return false;
+ int64_t Value = CE->getValue();
+ // i32 value with set bits only in one byte X000, 0X00, 00X0, or 000X,
+ // for VMOV/VMVN only, 00Xf or 0Xff are also accepted.
+ // FIXME: This is probably wrong and a copy and paste from previous example
+ return (Value >= 0 && Value < 256) ||
+ (Value >= 0x0100 && Value <= 0xff00) ||
+ (Value >= 0x010000 && Value <= 0xff0000) ||
+ (Value >= 0x01000000 && Value <= 0xff000000) ||
+ (Value >= 0x01ff && Value <= 0xffff && (Value & 0xff) == 0xff) ||
+ (Value >= 0x01ffff && Value <= 0xffffff && (Value & 0xffff) == 0xffff);
+ }
+ bool isNEONi32vmovNeg() const {
+ if (!isImm()) return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ // Must be a constant.
+ if (!CE) return false;
+ int64_t Value = ~CE->getValue();
+ // i32 value with set bits only in one byte X000, 0X00, 00X0, or 000X,
+ // for VMOV/VMVN only, 00Xf or 0Xff are also accepted.
+ // FIXME: This is probably wrong and a copy and paste from previous example
+ return (Value >= 0 && Value < 256) ||
+ (Value >= 0x0100 && Value <= 0xff00) ||
+ (Value >= 0x010000 && Value <= 0xff0000) ||
+ (Value >= 0x01000000 && Value <= 0xff000000) ||
+ (Value >= 0x01ff && Value <= 0xffff && (Value & 0xff) == 0xff) ||
+ (Value >= 0x01ffff && Value <= 0xffffff && (Value & 0xffff) == 0xffff);
+ }
+
+ bool isNEONi64splat() const {
+ if (!isImm()) return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ // Must be a constant.
+ if (!CE) return false;
+ uint64_t Value = CE->getValue();
+ // i64 value with each byte being either 0 or 0xff.
+ for (unsigned i = 0; i < 8; ++i)
+ if ((Value & 0xff) != 0 && (Value & 0xff) != 0xff) return false;
+ return true;
+ }
+
+ void addExpr(MCInst &Inst, const MCExpr *Expr) const {
+ // Add as immediates when possible. Null MCExpr = 0.
+ if (!Expr)
+ Inst.addOperand(MCOperand::createImm(0));
+ else if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr))
+ Inst.addOperand(MCOperand::createImm(CE->getValue()));
+ else
+ Inst.addOperand(MCOperand::createExpr(Expr));
+ }
+
+ void addCondCodeOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 2 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::createImm(unsigned(getCondCode())));
+ unsigned RegNum = getCondCode() == ARMCC::AL ? 0: ARM::CPSR;
+ Inst.addOperand(MCOperand::createReg(RegNum));
+ }
+
+ void addCoprocNumOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::createImm(getCoproc()));
+ }
+
+ void addCoprocRegOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::createImm(getCoproc()));
+ }
+
+ void addCoprocOptionOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::createImm(CoprocOption.Val));
+ }
+
+ void addITMaskOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::createImm(ITMask.Mask));
+ }
+
+ void addITCondCodeOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::createImm(unsigned(getCondCode())));
+ }
+
+ void addCCOutOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::createReg(getReg()));
+ }
+
+ void addRegOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::createReg(getReg()));
+ }
+
+ void addRegShiftedRegOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 3 && "Invalid number of operands!");
+ assert(isRegShiftedReg() &&
+ "addRegShiftedRegOperands() on non-RegShiftedReg!");
+ Inst.addOperand(MCOperand::createReg(RegShiftedReg.SrcReg));
+ Inst.addOperand(MCOperand::createReg(RegShiftedReg.ShiftReg));
+ Inst.addOperand(MCOperand::createImm(
+ ARM_AM::getSORegOpc(RegShiftedReg.ShiftTy, RegShiftedReg.ShiftImm)));
+ }
+
+ void addRegShiftedImmOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 2 && "Invalid number of operands!");
+ assert(isRegShiftedImm() &&
+ "addRegShiftedImmOperands() on non-RegShiftedImm!");
+ Inst.addOperand(MCOperand::createReg(RegShiftedImm.SrcReg));
+ // Shift of #32 is encoded as 0 where permitted
+ unsigned Imm = (RegShiftedImm.ShiftImm == 32 ? 0 : RegShiftedImm.ShiftImm);
+ Inst.addOperand(MCOperand::createImm(
+ ARM_AM::getSORegOpc(RegShiftedImm.ShiftTy, Imm)));
+ }
+
+ void addShifterImmOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::createImm((ShifterImm.isASR << 5) |
+ ShifterImm.Imm));
+ }
+
+ void addRegListOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ const SmallVectorImpl<unsigned> &RegList = getRegList();
+ for (SmallVectorImpl<unsigned>::const_iterator
+ I = RegList.begin(), E = RegList.end(); I != E; ++I)
+ Inst.addOperand(MCOperand::createReg(*I));
+ }
+
+ void addDPRRegListOperands(MCInst &Inst, unsigned N) const {
+ addRegListOperands(Inst, N);
+ }
+
+ void addSPRRegListOperands(MCInst &Inst, unsigned N) const {
+ addRegListOperands(Inst, N);
+ }
+
+ void addRotImmOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ // Encoded as val>>3. The printer handles display as 8, 16, 24.
+ Inst.addOperand(MCOperand::createImm(RotImm.Imm >> 3));
+ }
+
+ void addModImmOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+
+ // Support for fixups (MCFixup)
+ if (isImm())
+ return addImmOperands(Inst, N);
+
+ Inst.addOperand(MCOperand::createImm(ModImm.Bits | (ModImm.Rot << 7)));
+ }
+
+ void addModImmNotOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ uint32_t Enc = ARM_AM::getSOImmVal(~CE->getValue());
+ Inst.addOperand(MCOperand::createImm(Enc));
+ }
+
+ void addModImmNegOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ uint32_t Enc = ARM_AM::getSOImmVal(-CE->getValue());
+ Inst.addOperand(MCOperand::createImm(Enc));
+ }
+
+ void addBitfieldOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ // Munge the lsb/width into a bitfield mask.
+ unsigned lsb = Bitfield.LSB;
+ unsigned width = Bitfield.Width;
+ // Make a 32-bit mask w/ the referenced bits clear and all other bits set.
+ uint32_t Mask = ~(((uint32_t)0xffffffff >> lsb) << (32 - width) >>
+ (32 - (lsb + width)));
+ Inst.addOperand(MCOperand::createImm(Mask));
+ }
+
+ void addImmOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ addExpr(Inst, getImm());
+ }
+
+ void addFBits16Operands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ Inst.addOperand(MCOperand::createImm(16 - CE->getValue()));
+ }
+
+ void addFBits32Operands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ Inst.addOperand(MCOperand::createImm(32 - CE->getValue()));
+ }
+
+ void addFPImmOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ int Val = ARM_AM::getFP32Imm(APInt(32, CE->getValue()));
+ Inst.addOperand(MCOperand::createImm(Val));
+ }
+
+ void addImm8s4Operands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ // FIXME: We really want to scale the value here, but the LDRD/STRD
+ // instruction don't encode operands that way yet.
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ Inst.addOperand(MCOperand::createImm(CE->getValue()));
+ }
+
+ void addImm0_1020s4Operands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ // The immediate is scaled by four in the encoding and is stored
+ // in the MCInst as such. Lop off the low two bits here.
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ Inst.addOperand(MCOperand::createImm(CE->getValue() / 4));
+ }
+
+ void addImm0_508s4NegOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ // The immediate is scaled by four in the encoding and is stored
+ // in the MCInst as such. Lop off the low two bits here.
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ Inst.addOperand(MCOperand::createImm(-(CE->getValue() / 4)));
+ }
+
+ void addImm0_508s4Operands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ // The immediate is scaled by four in the encoding and is stored
+ // in the MCInst as such. Lop off the low two bits here.
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ Inst.addOperand(MCOperand::createImm(CE->getValue() / 4));
+ }
+
+ void addImm1_16Operands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ // The constant encodes as the immediate-1, and we store in the instruction
+ // the bits as encoded, so subtract off one here.
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ Inst.addOperand(MCOperand::createImm(CE->getValue() - 1));
+ }
+
+ void addImm1_32Operands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ // The constant encodes as the immediate-1, and we store in the instruction
+ // the bits as encoded, so subtract off one here.
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ Inst.addOperand(MCOperand::createImm(CE->getValue() - 1));
+ }
+
+ void addImmThumbSROperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ // The constant encodes as the immediate, except for 32, which encodes as
+ // zero.
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ unsigned Imm = CE->getValue();
+ Inst.addOperand(MCOperand::createImm((Imm == 32 ? 0 : Imm)));
+ }
+
+ void addPKHASRImmOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ // An ASR value of 32 encodes as 0, so that's how we want to add it to
+ // the instruction as well.
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ int Val = CE->getValue();
+ Inst.addOperand(MCOperand::createImm(Val == 32 ? 0 : Val));
+ }
+
+ void addT2SOImmNotOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ // The operand is actually a t2_so_imm, but we have its bitwise
+ // negation in the assembly source, so twiddle it here.
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ Inst.addOperand(MCOperand::createImm(~CE->getValue()));
+ }
+
+ void addT2SOImmNegOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ // The operand is actually a t2_so_imm, but we have its
+ // negation in the assembly source, so twiddle it here.
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ Inst.addOperand(MCOperand::createImm(-CE->getValue()));
+ }
+
+ void addImm0_4095NegOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ // The operand is actually an imm0_4095, but we have its
+ // negation in the assembly source, so twiddle it here.
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ Inst.addOperand(MCOperand::createImm(-CE->getValue()));
+ }
+
+ void addUnsignedOffset_b8s2Operands(MCInst &Inst, unsigned N) const {
+ if(const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm())) {
+ Inst.addOperand(MCOperand::createImm(CE->getValue() >> 2));
+ return;
+ }
+
+ const MCSymbolRefExpr *SR = dyn_cast<MCSymbolRefExpr>(Imm.Val);
+ assert(SR && "Unknown value type!");
+ Inst.addOperand(MCOperand::createExpr(SR));
+ }
+
+ void addThumbMemPCOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ if (isImm()) {
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (CE) {
+ Inst.addOperand(MCOperand::createImm(CE->getValue()));
+ return;
+ }
+
+ const MCSymbolRefExpr *SR = dyn_cast<MCSymbolRefExpr>(Imm.Val);
+ assert(SR && "Unknown value type!");
+ Inst.addOperand(MCOperand::createExpr(SR));
+ return;
+ }
+
+ assert(isMem() && "Unknown value type!");
+ assert(isa<MCConstantExpr>(Memory.OffsetImm) && "Unknown value type!");
+ Inst.addOperand(MCOperand::createImm(Memory.OffsetImm->getValue()));
+ }
+
+ void addMemBarrierOptOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::createImm(unsigned(getMemBarrierOpt())));
+ }
+
+ void addInstSyncBarrierOptOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::createImm(unsigned(getInstSyncBarrierOpt())));
+ }
+
+ void addMemNoOffsetOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::createReg(Memory.BaseRegNum));
+ }
+
+ void addMemPCRelImm12Operands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ int32_t Imm = Memory.OffsetImm->getValue();
+ Inst.addOperand(MCOperand::createImm(Imm));
+ }
+
+ void addAdrLabelOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ assert(isImm() && "Not an immediate!");
+
+ // If we have an immediate that's not a constant, treat it as a label
+ // reference needing a fixup.
+ if (!isa<MCConstantExpr>(getImm())) {
+ Inst.addOperand(MCOperand::createExpr(getImm()));
+ return;
+ }
+
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ int Val = CE->getValue();
+ Inst.addOperand(MCOperand::createImm(Val));
+ }
+
+ void addAlignedMemoryOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 2 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::createReg(Memory.BaseRegNum));
+ Inst.addOperand(MCOperand::createImm(Memory.Alignment));
+ }
+
+ void addDupAlignedMemoryNoneOperands(MCInst &Inst, unsigned N) const {
+ addAlignedMemoryOperands(Inst, N);
+ }
+
+ void addAlignedMemoryNoneOperands(MCInst &Inst, unsigned N) const {
+ addAlignedMemoryOperands(Inst, N);
+ }
+
+ void addAlignedMemory16Operands(MCInst &Inst, unsigned N) const {
+ addAlignedMemoryOperands(Inst, N);
+ }
+
+ void addDupAlignedMemory16Operands(MCInst &Inst, unsigned N) const {
+ addAlignedMemoryOperands(Inst, N);
+ }
+
+ void addAlignedMemory32Operands(MCInst &Inst, unsigned N) const {
+ addAlignedMemoryOperands(Inst, N);
+ }
+
+ void addDupAlignedMemory32Operands(MCInst &Inst, unsigned N) const {
+ addAlignedMemoryOperands(Inst, N);
+ }
+
+ void addAlignedMemory64Operands(MCInst &Inst, unsigned N) const {
+ addAlignedMemoryOperands(Inst, N);
+ }
+
+ void addDupAlignedMemory64Operands(MCInst &Inst, unsigned N) const {
+ addAlignedMemoryOperands(Inst, N);
+ }
+
+ void addAlignedMemory64or128Operands(MCInst &Inst, unsigned N) const {
+ addAlignedMemoryOperands(Inst, N);
+ }
+
+ void addDupAlignedMemory64or128Operands(MCInst &Inst, unsigned N) const {
+ addAlignedMemoryOperands(Inst, N);
+ }
+
+ void addAlignedMemory64or128or256Operands(MCInst &Inst, unsigned N) const {
+ addAlignedMemoryOperands(Inst, N);
+ }
+
+ void addAddrMode2Operands(MCInst &Inst, unsigned N) const {
+ assert(N == 3 && "Invalid number of operands!");
+ int32_t Val = Memory.OffsetImm ? Memory.OffsetImm->getValue() : 0;
+ if (!Memory.OffsetRegNum) {
+ ARM_AM::AddrOpc AddSub = Val < 0 ? ARM_AM::sub : ARM_AM::add;
+ // Special case for #-0
+ if (Val == INT32_MIN) Val = 0;
+ if (Val < 0) Val = -Val;
+ Val = ARM_AM::getAM2Opc(AddSub, Val, ARM_AM::no_shift);
+ } else {
+ // For register offset, we encode the shift type and negation flag
+ // here.
+ Val = ARM_AM::getAM2Opc(Memory.isNegative ? ARM_AM::sub : ARM_AM::add,
+ Memory.ShiftImm, Memory.ShiftType);
+ }
+ Inst.addOperand(MCOperand::createReg(Memory.BaseRegNum));
+ Inst.addOperand(MCOperand::createReg(Memory.OffsetRegNum));
+ Inst.addOperand(MCOperand::createImm(Val));
+ }
+
+ void addAM2OffsetImmOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 2 && "Invalid number of operands!");
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ assert(CE && "non-constant AM2OffsetImm operand!");
+ int32_t Val = CE->getValue();
+ ARM_AM::AddrOpc AddSub = Val < 0 ? ARM_AM::sub : ARM_AM::add;
+ // Special case for #-0
+ if (Val == INT32_MIN) Val = 0;
+ if (Val < 0) Val = -Val;
+ Val = ARM_AM::getAM2Opc(AddSub, Val, ARM_AM::no_shift);
+ Inst.addOperand(MCOperand::createReg(0));
+ Inst.addOperand(MCOperand::createImm(Val));
+ }
+
+ void addAddrMode3Operands(MCInst &Inst, unsigned N) const {
+ assert(N == 3 && "Invalid number of operands!");
+ // If we have an immediate that's not a constant, treat it as a label
+ // reference needing a fixup. If it is a constant, it's something else
+ // and we reject it.
+ if (isImm()) {
+ Inst.addOperand(MCOperand::createExpr(getImm()));
+ Inst.addOperand(MCOperand::createReg(0));
+ Inst.addOperand(MCOperand::createImm(0));
+ return;
+ }
+
+ int32_t Val = Memory.OffsetImm ? Memory.OffsetImm->getValue() : 0;
+ if (!Memory.OffsetRegNum) {
+ ARM_AM::AddrOpc AddSub = Val < 0 ? ARM_AM::sub : ARM_AM::add;
+ // Special case for #-0
+ if (Val == INT32_MIN) Val = 0;
+ if (Val < 0) Val = -Val;
+ Val = ARM_AM::getAM3Opc(AddSub, Val);
+ } else {
+ // For register offset, we encode the shift type and negation flag
+ // here.
+ Val = ARM_AM::getAM3Opc(Memory.isNegative ? ARM_AM::sub : ARM_AM::add, 0);
+ }
+ Inst.addOperand(MCOperand::createReg(Memory.BaseRegNum));
+ Inst.addOperand(MCOperand::createReg(Memory.OffsetRegNum));
+ Inst.addOperand(MCOperand::createImm(Val));
+ }
+
+ void addAM3OffsetOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 2 && "Invalid number of operands!");
+ if (Kind == k_PostIndexRegister) {
+ int32_t Val =
+ ARM_AM::getAM3Opc(PostIdxReg.isAdd ? ARM_AM::add : ARM_AM::sub, 0);
+ Inst.addOperand(MCOperand::createReg(PostIdxReg.RegNum));
+ Inst.addOperand(MCOperand::createImm(Val));
+ return;
+ }
+
+ // Constant offset.
+ const MCConstantExpr *CE = static_cast<const MCConstantExpr*>(getImm());
+ int32_t Val = CE->getValue();
+ ARM_AM::AddrOpc AddSub = Val < 0 ? ARM_AM::sub : ARM_AM::add;
+ // Special case for #-0
+ if (Val == INT32_MIN) Val = 0;
+ if (Val < 0) Val = -Val;
+ Val = ARM_AM::getAM3Opc(AddSub, Val);
+ Inst.addOperand(MCOperand::createReg(0));
+ Inst.addOperand(MCOperand::createImm(Val));
+ }
+
+ void addAddrMode5Operands(MCInst &Inst, unsigned N) const {
+ assert(N == 2 && "Invalid number of operands!");
+ // If we have an immediate that's not a constant, treat it as a label
+ // reference needing a fixup. If it is a constant, it's something else
+ // and we reject it.
+ if (isImm()) {
+ Inst.addOperand(MCOperand::createExpr(getImm()));
+ Inst.addOperand(MCOperand::createImm(0));
+ return;
+ }
+
+ // The lower two bits are always zero and as such are not encoded.
+ int32_t Val = Memory.OffsetImm ? Memory.OffsetImm->getValue() / 4 : 0;
+ ARM_AM::AddrOpc AddSub = Val < 0 ? ARM_AM::sub : ARM_AM::add;
+ // Special case for #-0
+ if (Val == INT32_MIN) Val = 0;
+ if (Val < 0) Val = -Val;
+ Val = ARM_AM::getAM5Opc(AddSub, Val);
+ Inst.addOperand(MCOperand::createReg(Memory.BaseRegNum));
+ Inst.addOperand(MCOperand::createImm(Val));
+ }
+
+ void addMemImm8s4OffsetOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 2 && "Invalid number of operands!");
+ // If we have an immediate that's not a constant, treat it as a label
+ // reference needing a fixup. If it is a constant, it's something else
+ // and we reject it.
+ if (isImm()) {
+ Inst.addOperand(MCOperand::createExpr(getImm()));
+ Inst.addOperand(MCOperand::createImm(0));
+ return;
+ }
+
+ int64_t Val = Memory.OffsetImm ? Memory.OffsetImm->getValue() : 0;
+ Inst.addOperand(MCOperand::createReg(Memory.BaseRegNum));
+ Inst.addOperand(MCOperand::createImm(Val));
+ }
+
+ void addMemImm0_1020s4OffsetOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 2 && "Invalid number of operands!");
+ // The lower two bits are always zero and as such are not encoded.
+ int32_t Val = Memory.OffsetImm ? Memory.OffsetImm->getValue() / 4 : 0;
+ Inst.addOperand(MCOperand::createReg(Memory.BaseRegNum));
+ Inst.addOperand(MCOperand::createImm(Val));
+ }
+
+ void addMemImm8OffsetOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 2 && "Invalid number of operands!");
+ int64_t Val = Memory.OffsetImm ? Memory.OffsetImm->getValue() : 0;
+ Inst.addOperand(MCOperand::createReg(Memory.BaseRegNum));
+ Inst.addOperand(MCOperand::createImm(Val));
+ }
+
+ void addMemPosImm8OffsetOperands(MCInst &Inst, unsigned N) const {
+ addMemImm8OffsetOperands(Inst, N);
+ }
+
+ void addMemNegImm8OffsetOperands(MCInst &Inst, unsigned N) const {
+ addMemImm8OffsetOperands(Inst, N);
+ }
+
+ void addMemUImm12OffsetOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 2 && "Invalid number of operands!");
+ // If this is an immediate, it's a label reference.
+ if (isImm()) {
+ addExpr(Inst, getImm());
+ Inst.addOperand(MCOperand::createImm(0));
+ return;
+ }
+
+ // Otherwise, it's a normal memory reg+offset.
+ int64_t Val = Memory.OffsetImm ? Memory.OffsetImm->getValue() : 0;
+ Inst.addOperand(MCOperand::createReg(Memory.BaseRegNum));
+ Inst.addOperand(MCOperand::createImm(Val));
+ }
+
+ void addMemImm12OffsetOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 2 && "Invalid number of operands!");
+ // If this is an immediate, it's a label reference.
+ if (isImm()) {
+ addExpr(Inst, getImm());
+ Inst.addOperand(MCOperand::createImm(0));
+ return;
+ }
+
+ // Otherwise, it's a normal memory reg+offset.
+ int64_t Val = Memory.OffsetImm ? Memory.OffsetImm->getValue() : 0;
+ Inst.addOperand(MCOperand::createReg(Memory.BaseRegNum));
+ Inst.addOperand(MCOperand::createImm(Val));
+ }
+
+ void addMemTBBOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 2 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::createReg(Memory.BaseRegNum));
+ Inst.addOperand(MCOperand::createReg(Memory.OffsetRegNum));
+ }
+
+ void addMemTBHOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 2 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::createReg(Memory.BaseRegNum));
+ Inst.addOperand(MCOperand::createReg(Memory.OffsetRegNum));
+ }
+
+ void addMemRegOffsetOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 3 && "Invalid number of operands!");
+ unsigned Val =
+ ARM_AM::getAM2Opc(Memory.isNegative ? ARM_AM::sub : ARM_AM::add,
+ Memory.ShiftImm, Memory.ShiftType);
+ Inst.addOperand(MCOperand::createReg(Memory.BaseRegNum));
+ Inst.addOperand(MCOperand::createReg(Memory.OffsetRegNum));
+ Inst.addOperand(MCOperand::createImm(Val));
+ }
+
+ void addT2MemRegOffsetOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 3 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::createReg(Memory.BaseRegNum));
+ Inst.addOperand(MCOperand::createReg(Memory.OffsetRegNum));
+ Inst.addOperand(MCOperand::createImm(Memory.ShiftImm));
+ }
+
+ void addMemThumbRROperands(MCInst &Inst, unsigned N) const {
+ assert(N == 2 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::createReg(Memory.BaseRegNum));
+ Inst.addOperand(MCOperand::createReg(Memory.OffsetRegNum));
+ }
+
+ void addMemThumbRIs4Operands(MCInst &Inst, unsigned N) const {
+ assert(N == 2 && "Invalid number of operands!");
+ int64_t Val = Memory.OffsetImm ? (Memory.OffsetImm->getValue() / 4) : 0;
+ Inst.addOperand(MCOperand::createReg(Memory.BaseRegNum));
+ Inst.addOperand(MCOperand::createImm(Val));
+ }
+
+ void addMemThumbRIs2Operands(MCInst &Inst, unsigned N) const {
+ assert(N == 2 && "Invalid number of operands!");
+ int64_t Val = Memory.OffsetImm ? (Memory.OffsetImm->getValue() / 2) : 0;
+ Inst.addOperand(MCOperand::createReg(Memory.BaseRegNum));
+ Inst.addOperand(MCOperand::createImm(Val));
+ }
+
+ void addMemThumbRIs1Operands(MCInst &Inst, unsigned N) const {
+ assert(N == 2 && "Invalid number of operands!");
+ int64_t Val = Memory.OffsetImm ? (Memory.OffsetImm->getValue()) : 0;
+ Inst.addOperand(MCOperand::createReg(Memory.BaseRegNum));
+ Inst.addOperand(MCOperand::createImm(Val));
+ }
+
+ void addMemThumbSPIOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 2 && "Invalid number of operands!");
+ int64_t Val = Memory.OffsetImm ? (Memory.OffsetImm->getValue() / 4) : 0;
+ Inst.addOperand(MCOperand::createReg(Memory.BaseRegNum));
+ Inst.addOperand(MCOperand::createImm(Val));
+ }
+
+ void addPostIdxImm8Operands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ assert(CE && "non-constant post-idx-imm8 operand!");
+ int Imm = CE->getValue();
+ bool isAdd = Imm >= 0;
+ if (Imm == INT32_MIN) Imm = 0;
+ Imm = (Imm < 0 ? -Imm : Imm) | (int)isAdd << 8;
+ Inst.addOperand(MCOperand::createImm(Imm));
+ }
+
+ void addPostIdxImm8s4Operands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ assert(CE && "non-constant post-idx-imm8s4 operand!");
+ int Imm = CE->getValue();
+ bool isAdd = Imm >= 0;
+ if (Imm == INT32_MIN) Imm = 0;
+ // Immediate is scaled by 4.
+ Imm = ((Imm < 0 ? -Imm : Imm) / 4) | (int)isAdd << 8;
+ Inst.addOperand(MCOperand::createImm(Imm));
+ }
+
+ void addPostIdxRegOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 2 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::createReg(PostIdxReg.RegNum));
+ Inst.addOperand(MCOperand::createImm(PostIdxReg.isAdd));
+ }
+
+ void addPostIdxRegShiftedOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 2 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::createReg(PostIdxReg.RegNum));
+ // The sign, shift type, and shift amount are encoded in a single operand
+ // using the AM2 encoding helpers.
+ ARM_AM::AddrOpc opc = PostIdxReg.isAdd ? ARM_AM::add : ARM_AM::sub;
+ unsigned Imm = ARM_AM::getAM2Opc(opc, PostIdxReg.ShiftImm,
+ PostIdxReg.ShiftTy);
+ Inst.addOperand(MCOperand::createImm(Imm));
+ }
+
+ void addMSRMaskOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::createImm(unsigned(getMSRMask())));
+ }
+
+ void addBankedRegOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::createImm(unsigned(getBankedReg())));
+ }
+
+ void addProcIFlagsOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::createImm(unsigned(getProcIFlags())));
+ }
+
+ void addVecListOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::createReg(VectorList.RegNum));
+ }
+
+ void addVecListIndexedOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 2 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::createReg(VectorList.RegNum));
+ Inst.addOperand(MCOperand::createImm(VectorList.LaneIndex));
+ }
+
+ void addVectorIndex8Operands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::createImm(getVectorIndex()));
+ }
+
+ void addVectorIndex16Operands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::createImm(getVectorIndex()));
+ }
+
+ void addVectorIndex32Operands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::createImm(getVectorIndex()));
+ }
+
+ void addNEONi8splatOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ // The immediate encodes the type of constant as well as the value.
+ // Mask in that this is an i8 splat.
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ Inst.addOperand(MCOperand::createImm(CE->getValue() | 0xe00));
+ }
+
+ void addNEONi16splatOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ // The immediate encodes the type of constant as well as the value.
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ unsigned Value = CE->getValue();
+ Value = ARM_AM::encodeNEONi16splat(Value);
+ Inst.addOperand(MCOperand::createImm(Value));
+ }
+
+ void addNEONi16splatNotOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ // The immediate encodes the type of constant as well as the value.
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ unsigned Value = CE->getValue();
+ Value = ARM_AM::encodeNEONi16splat(~Value & 0xffff);
+ Inst.addOperand(MCOperand::createImm(Value));
+ }
+
+ void addNEONi32splatOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ // The immediate encodes the type of constant as well as the value.
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ unsigned Value = CE->getValue();
+ Value = ARM_AM::encodeNEONi32splat(Value);
+ Inst.addOperand(MCOperand::createImm(Value));
+ }
+
+ void addNEONi32splatNotOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ // The immediate encodes the type of constant as well as the value.
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ unsigned Value = CE->getValue();
+ Value = ARM_AM::encodeNEONi32splat(~Value);
+ Inst.addOperand(MCOperand::createImm(Value));
+ }
+
+ void addNEONinvByteReplicateOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ // The immediate encodes the type of constant as well as the value.
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ unsigned Value = CE->getValue();
+ assert((Inst.getOpcode() == ARM::VMOVv8i8 ||
+ Inst.getOpcode() == ARM::VMOVv16i8) &&
+ "All vmvn instructions that wants to replicate non-zero byte "
+ "always must be replaced with VMOVv8i8 or VMOVv16i8.");
+ unsigned B = ((~Value) & 0xff);
+ B |= 0xe00; // cmode = 0b1110
+ Inst.addOperand(MCOperand::createImm(B));
+ }
+ void addNEONi32vmovOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ // The immediate encodes the type of constant as well as the value.
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ unsigned Value = CE->getValue();
+ if (Value >= 256 && Value <= 0xffff)
+ Value = (Value >> 8) | ((Value & 0xff) ? 0xc00 : 0x200);
+ else if (Value > 0xffff && Value <= 0xffffff)
+ Value = (Value >> 16) | ((Value & 0xff) ? 0xd00 : 0x400);
+ else if (Value > 0xffffff)
+ Value = (Value >> 24) | 0x600;
+ Inst.addOperand(MCOperand::createImm(Value));
+ }
+
+ void addNEONvmovByteReplicateOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ // The immediate encodes the type of constant as well as the value.
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ unsigned Value = CE->getValue();
+ assert((Inst.getOpcode() == ARM::VMOVv8i8 ||
+ Inst.getOpcode() == ARM::VMOVv16i8) &&
+ "All instructions that wants to replicate non-zero byte "
+ "always must be replaced with VMOVv8i8 or VMOVv16i8.");
+ unsigned B = Value & 0xff;
+ B |= 0xe00; // cmode = 0b1110
+ Inst.addOperand(MCOperand::createImm(B));
+ }
+ void addNEONi32vmovNegOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ // The immediate encodes the type of constant as well as the value.
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ unsigned Value = ~CE->getValue();
+ if (Value >= 256 && Value <= 0xffff)
+ Value = (Value >> 8) | ((Value & 0xff) ? 0xc00 : 0x200);
+ else if (Value > 0xffff && Value <= 0xffffff)
+ Value = (Value >> 16) | ((Value & 0xff) ? 0xd00 : 0x400);
+ else if (Value > 0xffffff)
+ Value = (Value >> 24) | 0x600;
+ Inst.addOperand(MCOperand::createImm(Value));
+ }
+
+ void addNEONi64splatOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ // The immediate encodes the type of constant as well as the value.
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ uint64_t Value = CE->getValue();
+ unsigned Imm = 0;
+ for (unsigned i = 0; i < 8; ++i, Value >>= 8) {
+ Imm |= (Value & 1) << i;
+ }
+ Inst.addOperand(MCOperand::createImm(Imm | 0x1e00));
+ }
+
+ void print(raw_ostream &OS) const override;
+
+ static std::unique_ptr<ARMOperand> CreateITMask(unsigned Mask, SMLoc S) {
+ auto Op = make_unique<ARMOperand>(k_ITCondMask);
+ Op->ITMask.Mask = Mask;
+ Op->StartLoc = S;
+ Op->EndLoc = S;
+ return Op;
+ }
+
+ static std::unique_ptr<ARMOperand> CreateCondCode(ARMCC::CondCodes CC,
+ SMLoc S) {
+ auto Op = make_unique<ARMOperand>(k_CondCode);
+ Op->CC.Val = CC;
+ Op->StartLoc = S;
+ Op->EndLoc = S;
+ return Op;
+ }
+
+ static std::unique_ptr<ARMOperand> CreateCoprocNum(unsigned CopVal, SMLoc S) {
+ auto Op = make_unique<ARMOperand>(k_CoprocNum);
+ Op->Cop.Val = CopVal;
+ Op->StartLoc = S;
+ Op->EndLoc = S;
+ return Op;
+ }
+
+ static std::unique_ptr<ARMOperand> CreateCoprocReg(unsigned CopVal, SMLoc S) {
+ auto Op = make_unique<ARMOperand>(k_CoprocReg);
+ Op->Cop.Val = CopVal;
+ Op->StartLoc = S;
+ Op->EndLoc = S;
+ return Op;
+ }
+
+ static std::unique_ptr<ARMOperand> CreateCoprocOption(unsigned Val, SMLoc S,
+ SMLoc E) {
+ auto Op = make_unique<ARMOperand>(k_CoprocOption);
+ Op->Cop.Val = Val;
+ Op->StartLoc = S;
+ Op->EndLoc = E;
+ return Op;
+ }
+
+ static std::unique_ptr<ARMOperand> CreateCCOut(unsigned RegNum, SMLoc S) {
+ auto Op = make_unique<ARMOperand>(k_CCOut);
+ Op->Reg.RegNum = RegNum;
+ Op->StartLoc = S;
+ Op->EndLoc = S;
+ return Op;
+ }
+
+ static std::unique_ptr<ARMOperand> CreateToken(StringRef Str, SMLoc S) {
+ auto Op = make_unique<ARMOperand>(k_Token);
+ Op->Tok.Data = Str.data();
+ Op->Tok.Length = Str.size();
+ Op->StartLoc = S;
+ Op->EndLoc = S;
+ return Op;
+ }
+
+ static std::unique_ptr<ARMOperand> CreateReg(unsigned RegNum, SMLoc S,
+ SMLoc E) {
+ auto Op = make_unique<ARMOperand>(k_Register);
+ Op->Reg.RegNum = RegNum;
+ Op->StartLoc = S;
+ Op->EndLoc = E;
+ return Op;
+ }
+
+ static std::unique_ptr<ARMOperand>
+ CreateShiftedRegister(ARM_AM::ShiftOpc ShTy, unsigned SrcReg,
+ unsigned ShiftReg, unsigned ShiftImm, SMLoc S,
+ SMLoc E) {
+ auto Op = make_unique<ARMOperand>(k_ShiftedRegister);
+ Op->RegShiftedReg.ShiftTy = ShTy;
+ Op->RegShiftedReg.SrcReg = SrcReg;
+ Op->RegShiftedReg.ShiftReg = ShiftReg;
+ Op->RegShiftedReg.ShiftImm = ShiftImm;
+ Op->StartLoc = S;
+ Op->EndLoc = E;
+ return Op;
+ }
+
+ static std::unique_ptr<ARMOperand>
+ CreateShiftedImmediate(ARM_AM::ShiftOpc ShTy, unsigned SrcReg,
+ unsigned ShiftImm, SMLoc S, SMLoc E) {
+ auto Op = make_unique<ARMOperand>(k_ShiftedImmediate);
+ Op->RegShiftedImm.ShiftTy = ShTy;
+ Op->RegShiftedImm.SrcReg = SrcReg;
+ Op->RegShiftedImm.ShiftImm = ShiftImm;
+ Op->StartLoc = S;
+ Op->EndLoc = E;
+ return Op;
+ }
+
+ static std::unique_ptr<ARMOperand> CreateShifterImm(bool isASR, unsigned Imm,
+ SMLoc S, SMLoc E) {
+ auto Op = make_unique<ARMOperand>(k_ShifterImmediate);
+ Op->ShifterImm.isASR = isASR;
+ Op->ShifterImm.Imm = Imm;
+ Op->StartLoc = S;
+ Op->EndLoc = E;
+ return Op;
+ }
+
+ static std::unique_ptr<ARMOperand> CreateRotImm(unsigned Imm, SMLoc S,
+ SMLoc E) {
+ auto Op = make_unique<ARMOperand>(k_RotateImmediate);
+ Op->RotImm.Imm = Imm;
+ Op->StartLoc = S;
+ Op->EndLoc = E;
+ return Op;
+ }
+
+ static std::unique_ptr<ARMOperand> CreateModImm(unsigned Bits, unsigned Rot,
+ SMLoc S, SMLoc E) {
+ auto Op = make_unique<ARMOperand>(k_ModifiedImmediate);
+ Op->ModImm.Bits = Bits;
+ Op->ModImm.Rot = Rot;
+ Op->StartLoc = S;
+ Op->EndLoc = E;
+ return Op;
+ }
+
+ static std::unique_ptr<ARMOperand>
+ CreateBitfield(unsigned LSB, unsigned Width, SMLoc S, SMLoc E) {
+ auto Op = make_unique<ARMOperand>(k_BitfieldDescriptor);
+ Op->Bitfield.LSB = LSB;
+ Op->Bitfield.Width = Width;
+ Op->StartLoc = S;
+ Op->EndLoc = E;
+ return Op;
+ }
+
+ static std::unique_ptr<ARMOperand>
+ CreateRegList(SmallVectorImpl<std::pair<unsigned, unsigned>> &Regs,
+ SMLoc StartLoc, SMLoc EndLoc) {
+ assert (Regs.size() > 0 && "RegList contains no registers?");
+ KindTy Kind = k_RegisterList;
+
+ if (ARMMCRegisterClasses[ARM::DPRRegClassID].contains(Regs.front().second))
+ Kind = k_DPRRegisterList;
+ else if (ARMMCRegisterClasses[ARM::SPRRegClassID].
+ contains(Regs.front().second))
+ Kind = k_SPRRegisterList;
+
+ // Sort based on the register encoding values.
+ array_pod_sort(Regs.begin(), Regs.end());
+
+ auto Op = make_unique<ARMOperand>(Kind);
+ for (SmallVectorImpl<std::pair<unsigned, unsigned> >::const_iterator
+ I = Regs.begin(), E = Regs.end(); I != E; ++I)
+ Op->Registers.push_back(I->second);
+ Op->StartLoc = StartLoc;
+ Op->EndLoc = EndLoc;
+ return Op;
+ }
+
+ static std::unique_ptr<ARMOperand> CreateVectorList(unsigned RegNum,
+ unsigned Count,
+ bool isDoubleSpaced,
+ SMLoc S, SMLoc E) {
+ auto Op = make_unique<ARMOperand>(k_VectorList);
+ Op->VectorList.RegNum = RegNum;
+ Op->VectorList.Count = Count;
+ Op->VectorList.isDoubleSpaced = isDoubleSpaced;
+ Op->StartLoc = S;
+ Op->EndLoc = E;
+ return Op;
+ }
+
+ static std::unique_ptr<ARMOperand>
+ CreateVectorListAllLanes(unsigned RegNum, unsigned Count, bool isDoubleSpaced,
+ SMLoc S, SMLoc E) {
+ auto Op = make_unique<ARMOperand>(k_VectorListAllLanes);
+ Op->VectorList.RegNum = RegNum;
+ Op->VectorList.Count = Count;
+ Op->VectorList.isDoubleSpaced = isDoubleSpaced;
+ Op->StartLoc = S;
+ Op->EndLoc = E;
+ return Op;
+ }
+
+ static std::unique_ptr<ARMOperand>
+ CreateVectorListIndexed(unsigned RegNum, unsigned Count, unsigned Index,
+ bool isDoubleSpaced, SMLoc S, SMLoc E) {
+ auto Op = make_unique<ARMOperand>(k_VectorListIndexed);
+ Op->VectorList.RegNum = RegNum;
+ Op->VectorList.Count = Count;
+ Op->VectorList.LaneIndex = Index;
+ Op->VectorList.isDoubleSpaced = isDoubleSpaced;
+ Op->StartLoc = S;
+ Op->EndLoc = E;
+ return Op;
+ }
+
+ static std::unique_ptr<ARMOperand>
+ CreateVectorIndex(unsigned Idx, SMLoc S, SMLoc E, MCContext &Ctx) {
+ auto Op = make_unique<ARMOperand>(k_VectorIndex);
+ Op->VectorIndex.Val = Idx;
+ Op->StartLoc = S;
+ Op->EndLoc = E;
+ return Op;
+ }
+
+ static std::unique_ptr<ARMOperand> CreateImm(const MCExpr *Val, SMLoc S,
+ SMLoc E) {
+ auto Op = make_unique<ARMOperand>(k_Immediate);
+ Op->Imm.Val = Val;
+ Op->StartLoc = S;
+ Op->EndLoc = E;
+ return Op;
+ }
+
+ static std::unique_ptr<ARMOperand>
+ CreateMem(unsigned BaseRegNum, const MCConstantExpr *OffsetImm,
+ unsigned OffsetRegNum, ARM_AM::ShiftOpc ShiftType,
+ unsigned ShiftImm, unsigned Alignment, bool isNegative, SMLoc S,
+ SMLoc E, SMLoc AlignmentLoc = SMLoc()) {
+ auto Op = make_unique<ARMOperand>(k_Memory);
+ Op->Memory.BaseRegNum = BaseRegNum;
+ Op->Memory.OffsetImm = OffsetImm;
+ Op->Memory.OffsetRegNum = OffsetRegNum;
+ Op->Memory.ShiftType = ShiftType;
+ Op->Memory.ShiftImm = ShiftImm;
+ Op->Memory.Alignment = Alignment;
+ Op->Memory.isNegative = isNegative;
+ Op->StartLoc = S;
+ Op->EndLoc = E;
+ Op->AlignmentLoc = AlignmentLoc;
+ return Op;
+ }
+
+ static std::unique_ptr<ARMOperand>
+ CreatePostIdxReg(unsigned RegNum, bool isAdd, ARM_AM::ShiftOpc ShiftTy,
+ unsigned ShiftImm, SMLoc S, SMLoc E) {
+ auto Op = make_unique<ARMOperand>(k_PostIndexRegister);
+ Op->PostIdxReg.RegNum = RegNum;
+ Op->PostIdxReg.isAdd = isAdd;
+ Op->PostIdxReg.ShiftTy = ShiftTy;
+ Op->PostIdxReg.ShiftImm = ShiftImm;
+ Op->StartLoc = S;
+ Op->EndLoc = E;
+ return Op;
+ }
+
+ static std::unique_ptr<ARMOperand> CreateMemBarrierOpt(ARM_MB::MemBOpt Opt,
+ SMLoc S) {
+ auto Op = make_unique<ARMOperand>(k_MemBarrierOpt);
+ Op->MBOpt.Val = Opt;
+ Op->StartLoc = S;
+ Op->EndLoc = S;
+ return Op;
+ }
+
+ static std::unique_ptr<ARMOperand>
+ CreateInstSyncBarrierOpt(ARM_ISB::InstSyncBOpt Opt, SMLoc S) {
+ auto Op = make_unique<ARMOperand>(k_InstSyncBarrierOpt);
+ Op->ISBOpt.Val = Opt;
+ Op->StartLoc = S;
+ Op->EndLoc = S;
+ return Op;
+ }
+
+ static std::unique_ptr<ARMOperand> CreateProcIFlags(ARM_PROC::IFlags IFlags,
+ SMLoc S) {
+ auto Op = make_unique<ARMOperand>(k_ProcIFlags);
+ Op->IFlags.Val = IFlags;
+ Op->StartLoc = S;
+ Op->EndLoc = S;
+ return Op;
+ }
+
+ static std::unique_ptr<ARMOperand> CreateMSRMask(unsigned MMask, SMLoc S) {
+ auto Op = make_unique<ARMOperand>(k_MSRMask);
+ Op->MMask.Val = MMask;
+ Op->StartLoc = S;
+ Op->EndLoc = S;
+ return Op;
+ }
+
+ static std::unique_ptr<ARMOperand> CreateBankedReg(unsigned Reg, SMLoc S) {
+ auto Op = make_unique<ARMOperand>(k_BankedReg);
+ Op->BankedReg.Val = Reg;
+ Op->StartLoc = S;
+ Op->EndLoc = S;
+ return Op;
+ }
+};
+
+} // end anonymous namespace.
+
+void ARMOperand::print(raw_ostream &OS) const {
+ switch (Kind) {
+ case k_CondCode:
+ OS << "<ARMCC::" << ARMCondCodeToString(getCondCode()) << ">";
+ break;
+ case k_CCOut:
+ OS << "<ccout " << getReg() << ">";
+ break;
+ case k_ITCondMask: {
+ static const char *const MaskStr[] = {
+ "()", "(t)", "(e)", "(tt)", "(et)", "(te)", "(ee)", "(ttt)", "(ett)",
+ "(tet)", "(eet)", "(tte)", "(ete)", "(tee)", "(eee)"
+ };
+ assert((ITMask.Mask & 0xf) == ITMask.Mask);
+ OS << "<it-mask " << MaskStr[ITMask.Mask] << ">";
+ break;
+ }
+ case k_CoprocNum:
+ OS << "<coprocessor number: " << getCoproc() << ">";
+ break;
+ case k_CoprocReg:
+ OS << "<coprocessor register: " << getCoproc() << ">";
+ break;
+ case k_CoprocOption:
+ OS << "<coprocessor option: " << CoprocOption.Val << ">";
+ break;
+ case k_MSRMask:
+ OS << "<mask: " << getMSRMask() << ">";
+ break;
+ case k_BankedReg:
+ OS << "<banked reg: " << getBankedReg() << ">";
+ break;
+ case k_Immediate:
+ OS << *getImm();
+ break;
+ case k_MemBarrierOpt:
+ OS << "<ARM_MB::" << MemBOptToString(getMemBarrierOpt(), false) << ">";
+ break;
+ case k_InstSyncBarrierOpt:
+ OS << "<ARM_ISB::" << InstSyncBOptToString(getInstSyncBarrierOpt()) << ">";
+ break;
+ case k_Memory:
+ OS << "<memory "
+ << " base:" << Memory.BaseRegNum;
+ OS << ">";
+ break;
+ case k_PostIndexRegister:
+ OS << "post-idx register " << (PostIdxReg.isAdd ? "" : "-")
+ << PostIdxReg.RegNum;
+ if (PostIdxReg.ShiftTy != ARM_AM::no_shift)
+ OS << ARM_AM::getShiftOpcStr(PostIdxReg.ShiftTy) << " "
+ << PostIdxReg.ShiftImm;
+ OS << ">";
+ break;
+ case k_ProcIFlags: {
+ OS << "<ARM_PROC::";
+ unsigned IFlags = getProcIFlags();
+ for (int i=2; i >= 0; --i)
+ if (IFlags & (1 << i))
+ OS << ARM_PROC::IFlagsToString(1 << i);
+ OS << ">";
+ break;
+ }
+ case k_Register:
+ OS << "<register " << getReg() << ">";
+ break;
+ case k_ShifterImmediate:
+ OS << "<shift " << (ShifterImm.isASR ? "asr" : "lsl")
+ << " #" << ShifterImm.Imm << ">";
+ break;
+ case k_ShiftedRegister:
+ OS << "<so_reg_reg "
+ << RegShiftedReg.SrcReg << " "
+ << ARM_AM::getShiftOpcStr(RegShiftedReg.ShiftTy)
+ << " " << RegShiftedReg.ShiftReg << ">";
+ break;
+ case k_ShiftedImmediate:
+ OS << "<so_reg_imm "
+ << RegShiftedImm.SrcReg << " "
+ << ARM_AM::getShiftOpcStr(RegShiftedImm.ShiftTy)
+ << " #" << RegShiftedImm.ShiftImm << ">";
+ break;
+ case k_RotateImmediate:
+ OS << "<ror " << " #" << (RotImm.Imm * 8) << ">";
+ break;
+ case k_ModifiedImmediate:
+ OS << "<mod_imm #" << ModImm.Bits << ", #"
+ << ModImm.Rot << ")>";
+ break;
+ case k_BitfieldDescriptor:
+ OS << "<bitfield " << "lsb: " << Bitfield.LSB
+ << ", width: " << Bitfield.Width << ">";
+ break;
+ case k_RegisterList:
+ case k_DPRRegisterList:
+ case k_SPRRegisterList: {
+ OS << "<register_list ";
+
+ const SmallVectorImpl<unsigned> &RegList = getRegList();
+ for (SmallVectorImpl<unsigned>::const_iterator
+ I = RegList.begin(), E = RegList.end(); I != E; ) {
+ OS << *I;
+ if (++I < E) OS << ", ";
+ }
+
+ OS << ">";
+ break;
+ }
+ case k_VectorList:
+ OS << "<vector_list " << VectorList.Count << " * "
+ << VectorList.RegNum << ">";
+ break;
+ case k_VectorListAllLanes:
+ OS << "<vector_list(all lanes) " << VectorList.Count << " * "
+ << VectorList.RegNum << ">";
+ break;
+ case k_VectorListIndexed:
+ OS << "<vector_list(lane " << VectorList.LaneIndex << ") "
+ << VectorList.Count << " * " << VectorList.RegNum << ">";
+ break;
+ case k_Token:
+ OS << "'" << getToken() << "'";
+ break;
+ case k_VectorIndex:
+ OS << "<vectorindex " << getVectorIndex() << ">";
+ break;
+ }
+}
+
+/// @name Auto-generated Match Functions
+/// {
+
+static unsigned MatchRegisterName(StringRef Name);
+
+/// }
+
+bool ARMAsmParser::ParseRegister(unsigned &RegNo,
+ SMLoc &StartLoc, SMLoc &EndLoc) {
+ const AsmToken &Tok = getParser().getTok();
+ StartLoc = Tok.getLoc();
+ EndLoc = Tok.getEndLoc();
+ RegNo = tryParseRegister();
+
+ return (RegNo == (unsigned)-1);
+}
+
+/// Try to parse a register name. The token must be an Identifier when called,
+/// and if it is a register name the token is eaten and the register number is
+/// returned. Otherwise return -1.
+///
+int ARMAsmParser::tryParseRegister() {
+ MCAsmParser &Parser = getParser();
+ const AsmToken &Tok = Parser.getTok();
+ if (Tok.isNot(AsmToken::Identifier)) return -1;
+
+ std::string lowerCase = Tok.getString().lower();
+ unsigned RegNum = MatchRegisterName(lowerCase);
+ if (!RegNum) {
+ RegNum = StringSwitch<unsigned>(lowerCase)
+ .Case("r13", ARM::SP)
+ .Case("r14", ARM::LR)
+ .Case("r15", ARM::PC)
+ .Case("ip", ARM::R12)
+ // Additional register name aliases for 'gas' compatibility.
+ .Case("a1", ARM::R0)
+ .Case("a2", ARM::R1)
+ .Case("a3", ARM::R2)
+ .Case("a4", ARM::R3)
+ .Case("v1", ARM::R4)
+ .Case("v2", ARM::R5)
+ .Case("v3", ARM::R6)
+ .Case("v4", ARM::R7)
+ .Case("v5", ARM::R8)
+ .Case("v6", ARM::R9)
+ .Case("v7", ARM::R10)
+ .Case("v8", ARM::R11)
+ .Case("sb", ARM::R9)
+ .Case("sl", ARM::R10)
+ .Case("fp", ARM::R11)
+ .Default(0);
+ }
+ if (!RegNum) {
+ // Check for aliases registered via .req. Canonicalize to lower case.
+ // That's more consistent since register names are case insensitive, and
+ // it's how the original entry was passed in from MC/MCParser/AsmParser.
+ StringMap<unsigned>::const_iterator Entry = RegisterReqs.find(lowerCase);
+ // If no match, return failure.
+ if (Entry == RegisterReqs.end())
+ return -1;
+ Parser.Lex(); // Eat identifier token.
+ return Entry->getValue();
+ }
+
+ // Some FPUs only have 16 D registers, so D16-D31 are invalid
+ if (hasD16() && RegNum >= ARM::D16 && RegNum <= ARM::D31)
+ return -1;
+
+ Parser.Lex(); // Eat identifier token.
+
+ return RegNum;
+}
+
+// Try to parse a shifter (e.g., "lsl <amt>"). On success, return 0.
+// If a recoverable error occurs, return 1. If an irrecoverable error
+// occurs, return -1. An irrecoverable error is one where tokens have been
+// consumed in the process of trying to parse the shifter (i.e., when it is
+// indeed a shifter operand, but malformed).
+int ARMAsmParser::tryParseShiftRegister(OperandVector &Operands) {
+ MCAsmParser &Parser = getParser();
+ SMLoc S = Parser.getTok().getLoc();
+ const AsmToken &Tok = Parser.getTok();
+ if (Tok.isNot(AsmToken::Identifier))
+ return -1;
+
+ std::string lowerCase = Tok.getString().lower();
+ ARM_AM::ShiftOpc ShiftTy = StringSwitch<ARM_AM::ShiftOpc>(lowerCase)
+ .Case("asl", ARM_AM::lsl)
+ .Case("lsl", ARM_AM::lsl)
+ .Case("lsr", ARM_AM::lsr)
+ .Case("asr", ARM_AM::asr)
+ .Case("ror", ARM_AM::ror)
+ .Case("rrx", ARM_AM::rrx)
+ .Default(ARM_AM::no_shift);
+
+ if (ShiftTy == ARM_AM::no_shift)
+ return 1;
+
+ Parser.Lex(); // Eat the operator.
+
+ // The source register for the shift has already been added to the
+ // operand list, so we need to pop it off and combine it into the shifted
+ // register operand instead.
+ std::unique_ptr<ARMOperand> PrevOp(
+ (ARMOperand *)Operands.pop_back_val().release());
+ if (!PrevOp->isReg())
+ return Error(PrevOp->getStartLoc(), "shift must be of a register");
+ int SrcReg = PrevOp->getReg();
+
+ SMLoc EndLoc;
+ int64_t Imm = 0;
+ int ShiftReg = 0;
+ if (ShiftTy == ARM_AM::rrx) {
+ // RRX Doesn't have an explicit shift amount. The encoder expects
+ // the shift register to be the same as the source register. Seems odd,
+ // but OK.
+ ShiftReg = SrcReg;
+ } else {
+ // Figure out if this is shifted by a constant or a register (for non-RRX).
+ if (Parser.getTok().is(AsmToken::Hash) ||
+ Parser.getTok().is(AsmToken::Dollar)) {
+ Parser.Lex(); // Eat hash.
+ SMLoc ImmLoc = Parser.getTok().getLoc();
+ const MCExpr *ShiftExpr = nullptr;
+ if (getParser().parseExpression(ShiftExpr, EndLoc)) {
+ Error(ImmLoc, "invalid immediate shift value");
+ return -1;
+ }
+ // The expression must be evaluatable as an immediate.
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(ShiftExpr);
+ if (!CE) {
+ Error(ImmLoc, "invalid immediate shift value");
+ return -1;
+ }
+ // Range check the immediate.
+ // lsl, ror: 0 <= imm <= 31
+ // lsr, asr: 0 <= imm <= 32
+ Imm = CE->getValue();
+ if (Imm < 0 ||
+ ((ShiftTy == ARM_AM::lsl || ShiftTy == ARM_AM::ror) && Imm > 31) ||
+ ((ShiftTy == ARM_AM::lsr || ShiftTy == ARM_AM::asr) && Imm > 32)) {
+ Error(ImmLoc, "immediate shift value out of range");
+ return -1;
+ }
+ // shift by zero is a nop. Always send it through as lsl.
+ // ('as' compatibility)
+ if (Imm == 0)
+ ShiftTy = ARM_AM::lsl;
+ } else if (Parser.getTok().is(AsmToken::Identifier)) {
+ SMLoc L = Parser.getTok().getLoc();
+ EndLoc = Parser.getTok().getEndLoc();
+ ShiftReg = tryParseRegister();
+ if (ShiftReg == -1) {
+ Error(L, "expected immediate or register in shift operand");
+ return -1;
+ }
+ } else {
+ Error(Parser.getTok().getLoc(),
+ "expected immediate or register in shift operand");
+ return -1;
+ }
+ }
+
+ if (ShiftReg && ShiftTy != ARM_AM::rrx)
+ Operands.push_back(ARMOperand::CreateShiftedRegister(ShiftTy, SrcReg,
+ ShiftReg, Imm,
+ S, EndLoc));
+ else
+ Operands.push_back(ARMOperand::CreateShiftedImmediate(ShiftTy, SrcReg, Imm,
+ S, EndLoc));
+
+ return 0;
+}
+
+
+/// Try to parse a register name. The token must be an Identifier when called.
+/// If it's a register, an AsmOperand is created. Another AsmOperand is created
+/// if there is a "writeback". 'true' if it's not a register.
+///
+/// TODO this is likely to change to allow different register types and or to
+/// parse for a specific register type.
+bool ARMAsmParser::tryParseRegisterWithWriteBack(OperandVector &Operands) {
+ MCAsmParser &Parser = getParser();
+ const AsmToken &RegTok = Parser.getTok();
+ int RegNo = tryParseRegister();
+ if (RegNo == -1)
+ return true;
+
+ Operands.push_back(ARMOperand::CreateReg(RegNo, RegTok.getLoc(),
+ RegTok.getEndLoc()));
+
+ const AsmToken &ExclaimTok = Parser.getTok();
+ if (ExclaimTok.is(AsmToken::Exclaim)) {
+ Operands.push_back(ARMOperand::CreateToken(ExclaimTok.getString(),
+ ExclaimTok.getLoc()));
+ Parser.Lex(); // Eat exclaim token
+ return false;
+ }
+
+ // Also check for an index operand. This is only legal for vector registers,
+ // but that'll get caught OK in operand matching, so we don't need to
+ // explicitly filter everything else out here.
+ if (Parser.getTok().is(AsmToken::LBrac)) {
+ SMLoc SIdx = Parser.getTok().getLoc();
+ Parser.Lex(); // Eat left bracket token.
+
+ const MCExpr *ImmVal;
+ if (getParser().parseExpression(ImmVal))
+ return true;
+ const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(ImmVal);
+ if (!MCE)
+ return TokError("immediate value expected for vector index");
+
+ if (Parser.getTok().isNot(AsmToken::RBrac))
+ return Error(Parser.getTok().getLoc(), "']' expected");
+
+ SMLoc E = Parser.getTok().getEndLoc();
+ Parser.Lex(); // Eat right bracket token.
+
+ Operands.push_back(ARMOperand::CreateVectorIndex(MCE->getValue(),
+ SIdx, E,
+ getContext()));
+ }
+
+ return false;
+}
+
+/// MatchCoprocessorOperandName - Try to parse an coprocessor related
+/// instruction with a symbolic operand name.
+/// We accept "crN" syntax for GAS compatibility.
+/// <operand-name> ::= <prefix><number>
+/// If CoprocOp is 'c', then:
+/// <prefix> ::= c | cr
+/// If CoprocOp is 'p', then :
+/// <prefix> ::= p
+/// <number> ::= integer in range [0, 15]
+static int MatchCoprocessorOperandName(StringRef Name, char CoprocOp) {
+ // Use the same layout as the tablegen'erated register name matcher. Ugly,
+ // but efficient.
+ if (Name.size() < 2 || Name[0] != CoprocOp)
+ return -1;
+ Name = (Name[1] == 'r') ? Name.drop_front(2) : Name.drop_front();
+
+ switch (Name.size()) {
+ default: return -1;
+ case 1:
+ switch (Name[0]) {
+ default: return -1;
+ case '0': return 0;
+ case '1': return 1;
+ case '2': return 2;
+ case '3': return 3;
+ case '4': return 4;
+ case '5': return 5;
+ case '6': return 6;
+ case '7': return 7;
+ case '8': return 8;
+ case '9': return 9;
+ }
+ case 2:
+ if (Name[0] != '1')
+ return -1;
+ switch (Name[1]) {
+ default: return -1;
+ // CP10 and CP11 are VFP/NEON and so vector instructions should be used.
+ // However, old cores (v5/v6) did use them in that way.
+ case '0': return 10;
+ case '1': return 11;
+ case '2': return 12;
+ case '3': return 13;
+ case '4': return 14;
+ case '5': return 15;
+ }
+ }
+}
+
+/// parseITCondCode - Try to parse a condition code for an IT instruction.
+ARMAsmParser::OperandMatchResultTy
+ARMAsmParser::parseITCondCode(OperandVector &Operands) {
+ MCAsmParser &Parser = getParser();
+ SMLoc S = Parser.getTok().getLoc();
+ const AsmToken &Tok = Parser.getTok();
+ if (!Tok.is(AsmToken::Identifier))
+ return MatchOperand_NoMatch;
+ unsigned CC = StringSwitch<unsigned>(Tok.getString().lower())
+ .Case("eq", ARMCC::EQ)
+ .Case("ne", ARMCC::NE)
+ .Case("hs", ARMCC::HS)
+ .Case("cs", ARMCC::HS)
+ .Case("lo", ARMCC::LO)
+ .Case("cc", ARMCC::LO)
+ .Case("mi", ARMCC::MI)
+ .Case("pl", ARMCC::PL)
+ .Case("vs", ARMCC::VS)
+ .Case("vc", ARMCC::VC)
+ .Case("hi", ARMCC::HI)
+ .Case("ls", ARMCC::LS)
+ .Case("ge", ARMCC::GE)
+ .Case("lt", ARMCC::LT)
+ .Case("gt", ARMCC::GT)
+ .Case("le", ARMCC::LE)
+ .Case("al", ARMCC::AL)
+ .Default(~0U);
+ if (CC == ~0U)
+ return MatchOperand_NoMatch;
+ Parser.Lex(); // Eat the token.
+
+ Operands.push_back(ARMOperand::CreateCondCode(ARMCC::CondCodes(CC), S));
+
+ return MatchOperand_Success;
+}
+
+/// parseCoprocNumOperand - Try to parse an coprocessor number operand. The
+/// token must be an Identifier when called, and if it is a coprocessor
+/// number, the token is eaten and the operand is added to the operand list.
+ARMAsmParser::OperandMatchResultTy
+ARMAsmParser::parseCoprocNumOperand(OperandVector &Operands) {
+ MCAsmParser &Parser = getParser();
+ SMLoc S = Parser.getTok().getLoc();
+ const AsmToken &Tok = Parser.getTok();
+ if (Tok.isNot(AsmToken::Identifier))
+ return MatchOperand_NoMatch;
+
+ int Num = MatchCoprocessorOperandName(Tok.getString(), 'p');
+ if (Num == -1)
+ return MatchOperand_NoMatch;
+ // ARMv7 and v8 don't allow cp10/cp11 due to VFP/NEON specific instructions
+ if ((hasV7Ops() || hasV8Ops()) && (Num == 10 || Num == 11))
+ return MatchOperand_NoMatch;
+
+ Parser.Lex(); // Eat identifier token.
+ Operands.push_back(ARMOperand::CreateCoprocNum(Num, S));
+ return MatchOperand_Success;
+}
+
+/// parseCoprocRegOperand - Try to parse an coprocessor register operand. The
+/// token must be an Identifier when called, and if it is a coprocessor
+/// number, the token is eaten and the operand is added to the operand list.
+ARMAsmParser::OperandMatchResultTy
+ARMAsmParser::parseCoprocRegOperand(OperandVector &Operands) {
+ MCAsmParser &Parser = getParser();
+ SMLoc S = Parser.getTok().getLoc();
+ const AsmToken &Tok = Parser.getTok();
+ if (Tok.isNot(AsmToken::Identifier))
+ return MatchOperand_NoMatch;
+
+ int Reg = MatchCoprocessorOperandName(Tok.getString(), 'c');
+ if (Reg == -1)
+ return MatchOperand_NoMatch;
+
+ Parser.Lex(); // Eat identifier token.
+ Operands.push_back(ARMOperand::CreateCoprocReg(Reg, S));
+ return MatchOperand_Success;
+}
+
+/// parseCoprocOptionOperand - Try to parse an coprocessor option operand.
+/// coproc_option : '{' imm0_255 '}'
+ARMAsmParser::OperandMatchResultTy
+ARMAsmParser::parseCoprocOptionOperand(OperandVector &Operands) {
+ MCAsmParser &Parser = getParser();
+ SMLoc S = Parser.getTok().getLoc();
+
+ // If this isn't a '{', this isn't a coprocessor immediate operand.
+ if (Parser.getTok().isNot(AsmToken::LCurly))
+ return MatchOperand_NoMatch;
+ Parser.Lex(); // Eat the '{'
+
+ const MCExpr *Expr;
+ SMLoc Loc = Parser.getTok().getLoc();
+ if (getParser().parseExpression(Expr)) {
+ Error(Loc, "illegal expression");
+ return MatchOperand_ParseFail;
+ }
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr);
+ if (!CE || CE->getValue() < 0 || CE->getValue() > 255) {
+ Error(Loc, "coprocessor option must be an immediate in range [0, 255]");
+ return MatchOperand_ParseFail;
+ }
+ int Val = CE->getValue();
+
+ // Check for and consume the closing '}'
+ if (Parser.getTok().isNot(AsmToken::RCurly))
+ return MatchOperand_ParseFail;
+ SMLoc E = Parser.getTok().getEndLoc();
+ Parser.Lex(); // Eat the '}'
+
+ Operands.push_back(ARMOperand::CreateCoprocOption(Val, S, E));
+ return MatchOperand_Success;
+}
+
+// For register list parsing, we need to map from raw GPR register numbering
+// to the enumeration values. The enumeration values aren't sorted by
+// register number due to our using "sp", "lr" and "pc" as canonical names.
+static unsigned getNextRegister(unsigned Reg) {
+ // If this is a GPR, we need to do it manually, otherwise we can rely
+ // on the sort ordering of the enumeration since the other reg-classes
+ // are sane.
+ if (!ARMMCRegisterClasses[ARM::GPRRegClassID].contains(Reg))
+ return Reg + 1;
+ switch(Reg) {
+ default: llvm_unreachable("Invalid GPR number!");
+ case ARM::R0: return ARM::R1; case ARM::R1: return ARM::R2;
+ case ARM::R2: return ARM::R3; case ARM::R3: return ARM::R4;
+ case ARM::R4: return ARM::R5; case ARM::R5: return ARM::R6;
+ case ARM::R6: return ARM::R7; case ARM::R7: return ARM::R8;
+ case ARM::R8: return ARM::R9; case ARM::R9: return ARM::R10;
+ case ARM::R10: return ARM::R11; case ARM::R11: return ARM::R12;
+ case ARM::R12: return ARM::SP; case ARM::SP: return ARM::LR;
+ case ARM::LR: return ARM::PC; case ARM::PC: return ARM::R0;
+ }
+}
+
+// Return the low-subreg of a given Q register.
+static unsigned getDRegFromQReg(unsigned QReg) {
+ switch (QReg) {
+ default: llvm_unreachable("expected a Q register!");
+ case ARM::Q0: return ARM::D0;
+ case ARM::Q1: return ARM::D2;
+ case ARM::Q2: return ARM::D4;
+ case ARM::Q3: return ARM::D6;
+ case ARM::Q4: return ARM::D8;
+ case ARM::Q5: return ARM::D10;
+ case ARM::Q6: return ARM::D12;
+ case ARM::Q7: return ARM::D14;
+ case ARM::Q8: return ARM::D16;
+ case ARM::Q9: return ARM::D18;
+ case ARM::Q10: return ARM::D20;
+ case ARM::Q11: return ARM::D22;
+ case ARM::Q12: return ARM::D24;
+ case ARM::Q13: return ARM::D26;
+ case ARM::Q14: return ARM::D28;
+ case ARM::Q15: return ARM::D30;
+ }
+}
+
+/// Parse a register list.
+bool ARMAsmParser::parseRegisterList(OperandVector &Operands) {
+ MCAsmParser &Parser = getParser();
+ assert(Parser.getTok().is(AsmToken::LCurly) &&
+ "Token is not a Left Curly Brace");
+ SMLoc S = Parser.getTok().getLoc();
+ Parser.Lex(); // Eat '{' token.
+ SMLoc RegLoc = Parser.getTok().getLoc();
+
+ // Check the first register in the list to see what register class
+ // this is a list of.
+ int Reg = tryParseRegister();
+ if (Reg == -1)
+ return Error(RegLoc, "register expected");
+
+ // The reglist instructions have at most 16 registers, so reserve
+ // space for that many.
+ int EReg = 0;
+ SmallVector<std::pair<unsigned, unsigned>, 16> Registers;
+
+ // Allow Q regs and just interpret them as the two D sub-registers.
+ if (ARMMCRegisterClasses[ARM::QPRRegClassID].contains(Reg)) {
+ Reg = getDRegFromQReg(Reg);
+ EReg = MRI->getEncodingValue(Reg);
+ Registers.push_back(std::pair<unsigned, unsigned>(EReg, Reg));
+ ++Reg;
+ }
+ const MCRegisterClass *RC;
+ if (ARMMCRegisterClasses[ARM::GPRRegClassID].contains(Reg))
+ RC = &ARMMCRegisterClasses[ARM::GPRRegClassID];
+ else if (ARMMCRegisterClasses[ARM::DPRRegClassID].contains(Reg))
+ RC = &ARMMCRegisterClasses[ARM::DPRRegClassID];
+ else if (ARMMCRegisterClasses[ARM::SPRRegClassID].contains(Reg))
+ RC = &ARMMCRegisterClasses[ARM::SPRRegClassID];
+ else
+ return Error(RegLoc, "invalid register in register list");
+
+ // Store the register.
+ EReg = MRI->getEncodingValue(Reg);
+ Registers.push_back(std::pair<unsigned, unsigned>(EReg, Reg));
+
+ // This starts immediately after the first register token in the list,
+ // so we can see either a comma or a minus (range separator) as a legal
+ // next token.
+ while (Parser.getTok().is(AsmToken::Comma) ||
+ Parser.getTok().is(AsmToken::Minus)) {
+ if (Parser.getTok().is(AsmToken::Minus)) {
+ Parser.Lex(); // Eat the minus.
+ SMLoc AfterMinusLoc = Parser.getTok().getLoc();
+ int EndReg = tryParseRegister();
+ if (EndReg == -1)
+ return Error(AfterMinusLoc, "register expected");
+ // Allow Q regs and just interpret them as the two D sub-registers.
+ if (ARMMCRegisterClasses[ARM::QPRRegClassID].contains(EndReg))
+ EndReg = getDRegFromQReg(EndReg) + 1;
+ // If the register is the same as the start reg, there's nothing
+ // more to do.
+ if (Reg == EndReg)
+ continue;
+ // The register must be in the same register class as the first.
+ if (!RC->contains(EndReg))
+ return Error(AfterMinusLoc, "invalid register in register list");
+ // Ranges must go from low to high.
+ if (MRI->getEncodingValue(Reg) > MRI->getEncodingValue(EndReg))
+ return Error(AfterMinusLoc, "bad range in register list");
+
+ // Add all the registers in the range to the register list.
+ while (Reg != EndReg) {
+ Reg = getNextRegister(Reg);
+ EReg = MRI->getEncodingValue(Reg);
+ Registers.push_back(std::pair<unsigned, unsigned>(EReg, Reg));
+ }
+ continue;
+ }
+ Parser.Lex(); // Eat the comma.
+ RegLoc = Parser.getTok().getLoc();
+ int OldReg = Reg;
+ const AsmToken RegTok = Parser.getTok();
+ Reg = tryParseRegister();
+ if (Reg == -1)
+ return Error(RegLoc, "register expected");
+ // Allow Q regs and just interpret them as the two D sub-registers.
+ bool isQReg = false;
+ if (ARMMCRegisterClasses[ARM::QPRRegClassID].contains(Reg)) {
+ Reg = getDRegFromQReg(Reg);
+ isQReg = true;
+ }
+ // The register must be in the same register class as the first.
+ if (!RC->contains(Reg))
+ return Error(RegLoc, "invalid register in register list");
+ // List must be monotonically increasing.
+ if (MRI->getEncodingValue(Reg) < MRI->getEncodingValue(OldReg)) {
+ if (ARMMCRegisterClasses[ARM::GPRRegClassID].contains(Reg))
+ Warning(RegLoc, "register list not in ascending order");
+ else
+ return Error(RegLoc, "register list not in ascending order");
+ }
+ if (MRI->getEncodingValue(Reg) == MRI->getEncodingValue(OldReg)) {
+ Warning(RegLoc, "duplicated register (" + RegTok.getString() +
+ ") in register list");
+ continue;
+ }
+ // VFP register lists must also be contiguous.
+ if (RC != &ARMMCRegisterClasses[ARM::GPRRegClassID] &&
+ Reg != OldReg + 1)
+ return Error(RegLoc, "non-contiguous register range");
+ EReg = MRI->getEncodingValue(Reg);
+ Registers.push_back(std::pair<unsigned, unsigned>(EReg, Reg));
+ if (isQReg) {
+ EReg = MRI->getEncodingValue(++Reg);
+ Registers.push_back(std::pair<unsigned, unsigned>(EReg, Reg));
+ }
+ }
+
+ if (Parser.getTok().isNot(AsmToken::RCurly))
+ return Error(Parser.getTok().getLoc(), "'}' expected");
+ SMLoc E = Parser.getTok().getEndLoc();
+ Parser.Lex(); // Eat '}' token.
+
+ // Push the register list operand.
+ Operands.push_back(ARMOperand::CreateRegList(Registers, S, E));
+
+ // The ARM system instruction variants for LDM/STM have a '^' token here.
+ if (Parser.getTok().is(AsmToken::Caret)) {
+ Operands.push_back(ARMOperand::CreateToken("^",Parser.getTok().getLoc()));
+ Parser.Lex(); // Eat '^' token.
+ }
+
+ return false;
+}
+
+// Helper function to parse the lane index for vector lists.
+ARMAsmParser::OperandMatchResultTy ARMAsmParser::
+parseVectorLane(VectorLaneTy &LaneKind, unsigned &Index, SMLoc &EndLoc) {
+ MCAsmParser &Parser = getParser();
+ Index = 0; // Always return a defined index value.
+ if (Parser.getTok().is(AsmToken::LBrac)) {
+ Parser.Lex(); // Eat the '['.
+ if (Parser.getTok().is(AsmToken::RBrac)) {
+ // "Dn[]" is the 'all lanes' syntax.
+ LaneKind = AllLanes;
+ EndLoc = Parser.getTok().getEndLoc();
+ Parser.Lex(); // Eat the ']'.
+ return MatchOperand_Success;
+ }
+
+ // There's an optional '#' token here. Normally there wouldn't be, but
+ // inline assemble puts one in, and it's friendly to accept that.
+ if (Parser.getTok().is(AsmToken::Hash))
+ Parser.Lex(); // Eat '#' or '$'.
+
+ const MCExpr *LaneIndex;
+ SMLoc Loc = Parser.getTok().getLoc();
+ if (getParser().parseExpression(LaneIndex)) {
+ Error(Loc, "illegal expression");
+ return MatchOperand_ParseFail;
+ }
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(LaneIndex);
+ if (!CE) {
+ Error(Loc, "lane index must be empty or an integer");
+ return MatchOperand_ParseFail;
+ }
+ if (Parser.getTok().isNot(AsmToken::RBrac)) {
+ Error(Parser.getTok().getLoc(), "']' expected");
+ return MatchOperand_ParseFail;
+ }
+ EndLoc = Parser.getTok().getEndLoc();
+ Parser.Lex(); // Eat the ']'.
+ int64_t Val = CE->getValue();
+
+ // FIXME: Make this range check context sensitive for .8, .16, .32.
+ if (Val < 0 || Val > 7) {
+ Error(Parser.getTok().getLoc(), "lane index out of range");
+ return MatchOperand_ParseFail;
+ }
+ Index = Val;
+ LaneKind = IndexedLane;
+ return MatchOperand_Success;
+ }
+ LaneKind = NoLanes;
+ return MatchOperand_Success;
+}
+
+// parse a vector register list
+ARMAsmParser::OperandMatchResultTy
+ARMAsmParser::parseVectorList(OperandVector &Operands) {
+ MCAsmParser &Parser = getParser();
+ VectorLaneTy LaneKind;
+ unsigned LaneIndex;
+ SMLoc S = Parser.getTok().getLoc();
+ // As an extension (to match gas), support a plain D register or Q register
+ // (without encosing curly braces) as a single or double entry list,
+ // respectively.
+ if (Parser.getTok().is(AsmToken::Identifier)) {
+ SMLoc E = Parser.getTok().getEndLoc();
+ int Reg = tryParseRegister();
+ if (Reg == -1)
+ return MatchOperand_NoMatch;
+ if (ARMMCRegisterClasses[ARM::DPRRegClassID].contains(Reg)) {
+ OperandMatchResultTy Res = parseVectorLane(LaneKind, LaneIndex, E);
+ if (Res != MatchOperand_Success)
+ return Res;
+ switch (LaneKind) {
+ case NoLanes:
+ Operands.push_back(ARMOperand::CreateVectorList(Reg, 1, false, S, E));
+ break;
+ case AllLanes:
+ Operands.push_back(ARMOperand::CreateVectorListAllLanes(Reg, 1, false,
+ S, E));
+ break;
+ case IndexedLane:
+ Operands.push_back(ARMOperand::CreateVectorListIndexed(Reg, 1,
+ LaneIndex,
+ false, S, E));
+ break;
+ }
+ return MatchOperand_Success;
+ }
+ if (ARMMCRegisterClasses[ARM::QPRRegClassID].contains(Reg)) {
+ Reg = getDRegFromQReg(Reg);
+ OperandMatchResultTy Res = parseVectorLane(LaneKind, LaneIndex, E);
+ if (Res != MatchOperand_Success)
+ return Res;
+ switch (LaneKind) {
+ case NoLanes:
+ Reg = MRI->getMatchingSuperReg(Reg, ARM::dsub_0,
+ &ARMMCRegisterClasses[ARM::DPairRegClassID]);
+ Operands.push_back(ARMOperand::CreateVectorList(Reg, 2, false, S, E));
+ break;
+ case AllLanes:
+ Reg = MRI->getMatchingSuperReg(Reg, ARM::dsub_0,
+ &ARMMCRegisterClasses[ARM::DPairRegClassID]);
+ Operands.push_back(ARMOperand::CreateVectorListAllLanes(Reg, 2, false,
+ S, E));
+ break;
+ case IndexedLane:
+ Operands.push_back(ARMOperand::CreateVectorListIndexed(Reg, 2,
+ LaneIndex,
+ false, S, E));
+ break;
+ }
+ return MatchOperand_Success;
+ }
+ Error(S, "vector register expected");
+ return MatchOperand_ParseFail;
+ }
+
+ if (Parser.getTok().isNot(AsmToken::LCurly))
+ return MatchOperand_NoMatch;
+
+ Parser.Lex(); // Eat '{' token.
+ SMLoc RegLoc = Parser.getTok().getLoc();
+
+ int Reg = tryParseRegister();
+ if (Reg == -1) {
+ Error(RegLoc, "register expected");
+ return MatchOperand_ParseFail;
+ }
+ unsigned Count = 1;
+ int Spacing = 0;
+ unsigned FirstReg = Reg;
+ // The list is of D registers, but we also allow Q regs and just interpret
+ // them as the two D sub-registers.
+ if (ARMMCRegisterClasses[ARM::QPRRegClassID].contains(Reg)) {
+ FirstReg = Reg = getDRegFromQReg(Reg);
+ Spacing = 1; // double-spacing requires explicit D registers, otherwise
+ // it's ambiguous with four-register single spaced.
+ ++Reg;
+ ++Count;
+ }
+
+ SMLoc E;
+ if (parseVectorLane(LaneKind, LaneIndex, E) != MatchOperand_Success)
+ return MatchOperand_ParseFail;
+
+ while (Parser.getTok().is(AsmToken::Comma) ||
+ Parser.getTok().is(AsmToken::Minus)) {
+ if (Parser.getTok().is(AsmToken::Minus)) {
+ if (!Spacing)
+ Spacing = 1; // Register range implies a single spaced list.
+ else if (Spacing == 2) {
+ Error(Parser.getTok().getLoc(),
+ "sequential registers in double spaced list");
+ return MatchOperand_ParseFail;
+ }
+ Parser.Lex(); // Eat the minus.
+ SMLoc AfterMinusLoc = Parser.getTok().getLoc();
+ int EndReg = tryParseRegister();
+ if (EndReg == -1) {
+ Error(AfterMinusLoc, "register expected");
+ return MatchOperand_ParseFail;
+ }
+ // Allow Q regs and just interpret them as the two D sub-registers.
+ if (ARMMCRegisterClasses[ARM::QPRRegClassID].contains(EndReg))
+ EndReg = getDRegFromQReg(EndReg) + 1;
+ // If the register is the same as the start reg, there's nothing
+ // more to do.
+ if (Reg == EndReg)
+ continue;
+ // The register must be in the same register class as the first.
+ if (!ARMMCRegisterClasses[ARM::DPRRegClassID].contains(EndReg)) {
+ Error(AfterMinusLoc, "invalid register in register list");
+ return MatchOperand_ParseFail;
+ }
+ // Ranges must go from low to high.
+ if (Reg > EndReg) {
+ Error(AfterMinusLoc, "bad range in register list");
+ return MatchOperand_ParseFail;
+ }
+ // Parse the lane specifier if present.
+ VectorLaneTy NextLaneKind;
+ unsigned NextLaneIndex;
+ if (parseVectorLane(NextLaneKind, NextLaneIndex, E) !=
+ MatchOperand_Success)
+ return MatchOperand_ParseFail;
+ if (NextLaneKind != LaneKind || LaneIndex != NextLaneIndex) {
+ Error(AfterMinusLoc, "mismatched lane index in register list");
+ return MatchOperand_ParseFail;
+ }
+
+ // Add all the registers in the range to the register list.
+ Count += EndReg - Reg;
+ Reg = EndReg;
+ continue;
+ }
+ Parser.Lex(); // Eat the comma.
+ RegLoc = Parser.getTok().getLoc();
+ int OldReg = Reg;
+ Reg = tryParseRegister();
+ if (Reg == -1) {
+ Error(RegLoc, "register expected");
+ return MatchOperand_ParseFail;
+ }
+ // vector register lists must be contiguous.
+ // It's OK to use the enumeration values directly here rather, as the
+ // VFP register classes have the enum sorted properly.
+ //
+ // The list is of D registers, but we also allow Q regs and just interpret
+ // them as the two D sub-registers.
+ if (ARMMCRegisterClasses[ARM::QPRRegClassID].contains(Reg)) {
+ if (!Spacing)
+ Spacing = 1; // Register range implies a single spaced list.
+ else if (Spacing == 2) {
+ Error(RegLoc,
+ "invalid register in double-spaced list (must be 'D' register')");
+ return MatchOperand_ParseFail;
+ }
+ Reg = getDRegFromQReg(Reg);
+ if (Reg != OldReg + 1) {
+ Error(RegLoc, "non-contiguous register range");
+ return MatchOperand_ParseFail;
+ }
+ ++Reg;
+ Count += 2;
+ // Parse the lane specifier if present.
+ VectorLaneTy NextLaneKind;
+ unsigned NextLaneIndex;
+ SMLoc LaneLoc = Parser.getTok().getLoc();
+ if (parseVectorLane(NextLaneKind, NextLaneIndex, E) !=
+ MatchOperand_Success)
+ return MatchOperand_ParseFail;
+ if (NextLaneKind != LaneKind || LaneIndex != NextLaneIndex) {
+ Error(LaneLoc, "mismatched lane index in register list");
+ return MatchOperand_ParseFail;
+ }
+ continue;
+ }
+ // Normal D register.
+ // Figure out the register spacing (single or double) of the list if
+ // we don't know it already.
+ if (!Spacing)
+ Spacing = 1 + (Reg == OldReg + 2);
+
+ // Just check that it's contiguous and keep going.
+ if (Reg != OldReg + Spacing) {
+ Error(RegLoc, "non-contiguous register range");
+ return MatchOperand_ParseFail;
+ }
+ ++Count;
+ // Parse the lane specifier if present.
+ VectorLaneTy NextLaneKind;
+ unsigned NextLaneIndex;
+ SMLoc EndLoc = Parser.getTok().getLoc();
+ if (parseVectorLane(NextLaneKind, NextLaneIndex, E) != MatchOperand_Success)
+ return MatchOperand_ParseFail;
+ if (NextLaneKind != LaneKind || LaneIndex != NextLaneIndex) {
+ Error(EndLoc, "mismatched lane index in register list");
+ return MatchOperand_ParseFail;
+ }
+ }
+
+ if (Parser.getTok().isNot(AsmToken::RCurly)) {
+ Error(Parser.getTok().getLoc(), "'}' expected");
+ return MatchOperand_ParseFail;
+ }
+ E = Parser.getTok().getEndLoc();
+ Parser.Lex(); // Eat '}' token.
+
+ switch (LaneKind) {
+ case NoLanes:
+ // Two-register operands have been converted to the
+ // composite register classes.
+ if (Count == 2) {
+ const MCRegisterClass *RC = (Spacing == 1) ?
+ &ARMMCRegisterClasses[ARM::DPairRegClassID] :
+ &ARMMCRegisterClasses[ARM::DPairSpcRegClassID];
+ FirstReg = MRI->getMatchingSuperReg(FirstReg, ARM::dsub_0, RC);
+ }
+
+ Operands.push_back(ARMOperand::CreateVectorList(FirstReg, Count,
+ (Spacing == 2), S, E));
+ break;
+ case AllLanes:
+ // Two-register operands have been converted to the
+ // composite register classes.
+ if (Count == 2) {
+ const MCRegisterClass *RC = (Spacing == 1) ?
+ &ARMMCRegisterClasses[ARM::DPairRegClassID] :
+ &ARMMCRegisterClasses[ARM::DPairSpcRegClassID];
+ FirstReg = MRI->getMatchingSuperReg(FirstReg, ARM::dsub_0, RC);
+ }
+ Operands.push_back(ARMOperand::CreateVectorListAllLanes(FirstReg, Count,
+ (Spacing == 2),
+ S, E));
+ break;
+ case IndexedLane:
+ Operands.push_back(ARMOperand::CreateVectorListIndexed(FirstReg, Count,
+ LaneIndex,
+ (Spacing == 2),
+ S, E));
+ break;
+ }
+ return MatchOperand_Success;
+}
+
+/// parseMemBarrierOptOperand - Try to parse DSB/DMB data barrier options.
+ARMAsmParser::OperandMatchResultTy
+ARMAsmParser::parseMemBarrierOptOperand(OperandVector &Operands) {
+ MCAsmParser &Parser = getParser();
+ SMLoc S = Parser.getTok().getLoc();
+ const AsmToken &Tok = Parser.getTok();
+ unsigned Opt;
+
+ if (Tok.is(AsmToken::Identifier)) {
+ StringRef OptStr = Tok.getString();
+
+ Opt = StringSwitch<unsigned>(OptStr.slice(0, OptStr.size()).lower())
+ .Case("sy", ARM_MB::SY)
+ .Case("st", ARM_MB::ST)
+ .Case("ld", ARM_MB::LD)
+ .Case("sh", ARM_MB::ISH)
+ .Case("ish", ARM_MB::ISH)
+ .Case("shst", ARM_MB::ISHST)
+ .Case("ishst", ARM_MB::ISHST)
+ .Case("ishld", ARM_MB::ISHLD)
+ .Case("nsh", ARM_MB::NSH)
+ .Case("un", ARM_MB::NSH)
+ .Case("nshst", ARM_MB::NSHST)
+ .Case("nshld", ARM_MB::NSHLD)
+ .Case("unst", ARM_MB::NSHST)
+ .Case("osh", ARM_MB::OSH)
+ .Case("oshst", ARM_MB::OSHST)
+ .Case("oshld", ARM_MB::OSHLD)
+ .Default(~0U);
+
+ // ishld, oshld, nshld and ld are only available from ARMv8.
+ if (!hasV8Ops() && (Opt == ARM_MB::ISHLD || Opt == ARM_MB::OSHLD ||
+ Opt == ARM_MB::NSHLD || Opt == ARM_MB::LD))
+ Opt = ~0U;
+
+ if (Opt == ~0U)
+ return MatchOperand_NoMatch;
+
+ Parser.Lex(); // Eat identifier token.
+ } else if (Tok.is(AsmToken::Hash) ||
+ Tok.is(AsmToken::Dollar) ||
+ Tok.is(AsmToken::Integer)) {
+ if (Parser.getTok().isNot(AsmToken::Integer))
+ Parser.Lex(); // Eat '#' or '$'.
+ SMLoc Loc = Parser.getTok().getLoc();
+
+ const MCExpr *MemBarrierID;
+ if (getParser().parseExpression(MemBarrierID)) {
+ Error(Loc, "illegal expression");
+ return MatchOperand_ParseFail;
+ }
+
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(MemBarrierID);
+ if (!CE) {
+ Error(Loc, "constant expression expected");
+ return MatchOperand_ParseFail;
+ }
+
+ int Val = CE->getValue();
+ if (Val & ~0xf) {
+ Error(Loc, "immediate value out of range");
+ return MatchOperand_ParseFail;
+ }
+
+ Opt = ARM_MB::RESERVED_0 + Val;
+ } else
+ return MatchOperand_ParseFail;
+
+ Operands.push_back(ARMOperand::CreateMemBarrierOpt((ARM_MB::MemBOpt)Opt, S));
+ return MatchOperand_Success;
+}
+
+/// parseInstSyncBarrierOptOperand - Try to parse ISB inst sync barrier options.
+ARMAsmParser::OperandMatchResultTy
+ARMAsmParser::parseInstSyncBarrierOptOperand(OperandVector &Operands) {
+ MCAsmParser &Parser = getParser();
+ SMLoc S = Parser.getTok().getLoc();
+ const AsmToken &Tok = Parser.getTok();
+ unsigned Opt;
+
+ if (Tok.is(AsmToken::Identifier)) {
+ StringRef OptStr = Tok.getString();
+
+ if (OptStr.equals_lower("sy"))
+ Opt = ARM_ISB::SY;
+ else
+ return MatchOperand_NoMatch;
+
+ Parser.Lex(); // Eat identifier token.
+ } else if (Tok.is(AsmToken::Hash) ||
+ Tok.is(AsmToken::Dollar) ||
+ Tok.is(AsmToken::Integer)) {
+ if (Parser.getTok().isNot(AsmToken::Integer))
+ Parser.Lex(); // Eat '#' or '$'.
+ SMLoc Loc = Parser.getTok().getLoc();
+
+ const MCExpr *ISBarrierID;
+ if (getParser().parseExpression(ISBarrierID)) {
+ Error(Loc, "illegal expression");
+ return MatchOperand_ParseFail;
+ }
+
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(ISBarrierID);
+ if (!CE) {
+ Error(Loc, "constant expression expected");
+ return MatchOperand_ParseFail;
+ }
+
+ int Val = CE->getValue();
+ if (Val & ~0xf) {
+ Error(Loc, "immediate value out of range");
+ return MatchOperand_ParseFail;
+ }
+
+ Opt = ARM_ISB::RESERVED_0 + Val;
+ } else
+ return MatchOperand_ParseFail;
+
+ Operands.push_back(ARMOperand::CreateInstSyncBarrierOpt(
+ (ARM_ISB::InstSyncBOpt)Opt, S));
+ return MatchOperand_Success;
+}
+
+
+/// parseProcIFlagsOperand - Try to parse iflags from CPS instruction.
+ARMAsmParser::OperandMatchResultTy
+ARMAsmParser::parseProcIFlagsOperand(OperandVector &Operands) {
+ MCAsmParser &Parser = getParser();
+ SMLoc S = Parser.getTok().getLoc();
+ const AsmToken &Tok = Parser.getTok();
+ if (!Tok.is(AsmToken::Identifier))
+ return MatchOperand_NoMatch;
+ StringRef IFlagsStr = Tok.getString();
+
+ // An iflags string of "none" is interpreted to mean that none of the AIF
+ // bits are set. Not a terribly useful instruction, but a valid encoding.
+ unsigned IFlags = 0;
+ if (IFlagsStr != "none") {
+ for (int i = 0, e = IFlagsStr.size(); i != e; ++i) {
+ unsigned Flag = StringSwitch<unsigned>(IFlagsStr.substr(i, 1))
+ .Case("a", ARM_PROC::A)
+ .Case("i", ARM_PROC::I)
+ .Case("f", ARM_PROC::F)
+ .Default(~0U);
+
+ // If some specific iflag is already set, it means that some letter is
+ // present more than once, this is not acceptable.
+ if (Flag == ~0U || (IFlags & Flag))
+ return MatchOperand_NoMatch;
+
+ IFlags |= Flag;
+ }
+ }
+
+ Parser.Lex(); // Eat identifier token.
+ Operands.push_back(ARMOperand::CreateProcIFlags((ARM_PROC::IFlags)IFlags, S));
+ return MatchOperand_Success;
+}
+
+/// parseMSRMaskOperand - Try to parse mask flags from MSR instruction.
+ARMAsmParser::OperandMatchResultTy
+ARMAsmParser::parseMSRMaskOperand(OperandVector &Operands) {
+ MCAsmParser &Parser = getParser();
+ SMLoc S = Parser.getTok().getLoc();
+ const AsmToken &Tok = Parser.getTok();
+ if (!Tok.is(AsmToken::Identifier))
+ return MatchOperand_NoMatch;
+ StringRef Mask = Tok.getString();
+
+ if (isMClass()) {
+ // See ARMv6-M 10.1.1
+ std::string Name = Mask.lower();
+ unsigned FlagsVal = StringSwitch<unsigned>(Name)
+ // Note: in the documentation:
+ // ARM deprecates using MSR APSR without a _<bits> qualifier as an alias
+ // for MSR APSR_nzcvq.
+ // but we do make it an alias here. This is so to get the "mask encoding"
+ // bits correct on MSR APSR writes.
+ //
+ // FIXME: Note the 0xc00 "mask encoding" bits version of the registers
+ // should really only be allowed when writing a special register. Note
+ // they get dropped in the MRS instruction reading a special register as
+ // the SYSm field is only 8 bits.
+ .Case("apsr", 0x800)
+ .Case("apsr_nzcvq", 0x800)
+ .Case("apsr_g", 0x400)
+ .Case("apsr_nzcvqg", 0xc00)
+ .Case("iapsr", 0x801)
+ .Case("iapsr_nzcvq", 0x801)
+ .Case("iapsr_g", 0x401)
+ .Case("iapsr_nzcvqg", 0xc01)
+ .Case("eapsr", 0x802)
+ .Case("eapsr_nzcvq", 0x802)
+ .Case("eapsr_g", 0x402)
+ .Case("eapsr_nzcvqg", 0xc02)
+ .Case("xpsr", 0x803)
+ .Case("xpsr_nzcvq", 0x803)
+ .Case("xpsr_g", 0x403)
+ .Case("xpsr_nzcvqg", 0xc03)
+ .Case("ipsr", 0x805)
+ .Case("epsr", 0x806)
+ .Case("iepsr", 0x807)
+ .Case("msp", 0x808)
+ .Case("psp", 0x809)
+ .Case("primask", 0x810)
+ .Case("basepri", 0x811)
+ .Case("basepri_max", 0x812)
+ .Case("faultmask", 0x813)
+ .Case("control", 0x814)
+ .Default(~0U);
+
+ if (FlagsVal == ~0U)
+ return MatchOperand_NoMatch;
+
+ if (!hasDSP() && (FlagsVal & 0x400))
+ // The _g and _nzcvqg versions are only valid if the DSP extension is
+ // available.
+ return MatchOperand_NoMatch;
+
+ if (!hasV7Ops() && FlagsVal >= 0x811 && FlagsVal <= 0x813)
+ // basepri, basepri_max and faultmask only valid for V7m.
+ return MatchOperand_NoMatch;
+
+ Parser.Lex(); // Eat identifier token.
+ Operands.push_back(ARMOperand::CreateMSRMask(FlagsVal, S));
+ return MatchOperand_Success;
+ }
+
+ // Split spec_reg from flag, example: CPSR_sxf => "CPSR" and "sxf"
+ size_t Start = 0, Next = Mask.find('_');
+ StringRef Flags = "";
+ std::string SpecReg = Mask.slice(Start, Next).lower();
+ if (Next != StringRef::npos)
+ Flags = Mask.slice(Next+1, Mask.size());
+
+ // FlagsVal contains the complete mask:
+ // 3-0: Mask
+ // 4: Special Reg (cpsr, apsr => 0; spsr => 1)
+ unsigned FlagsVal = 0;
+
+ if (SpecReg == "apsr") {
+ FlagsVal = StringSwitch<unsigned>(Flags)
+ .Case("nzcvq", 0x8) // same as CPSR_f
+ .Case("g", 0x4) // same as CPSR_s
+ .Case("nzcvqg", 0xc) // same as CPSR_fs
+ .Default(~0U);
+
+ if (FlagsVal == ~0U) {
+ if (!Flags.empty())
+ return MatchOperand_NoMatch;
+ else
+ FlagsVal = 8; // No flag
+ }
+ } else if (SpecReg == "cpsr" || SpecReg == "spsr") {
+ // cpsr_all is an alias for cpsr_fc, as is plain cpsr.
+ if (Flags == "all" || Flags == "")
+ Flags = "fc";
+ for (int i = 0, e = Flags.size(); i != e; ++i) {
+ unsigned Flag = StringSwitch<unsigned>(Flags.substr(i, 1))
+ .Case("c", 1)
+ .Case("x", 2)
+ .Case("s", 4)
+ .Case("f", 8)
+ .Default(~0U);
+
+ // If some specific flag is already set, it means that some letter is
+ // present more than once, this is not acceptable.
+ if (FlagsVal == ~0U || (FlagsVal & Flag))
+ return MatchOperand_NoMatch;
+ FlagsVal |= Flag;
+ }
+ } else // No match for special register.
+ return MatchOperand_NoMatch;
+
+ // Special register without flags is NOT equivalent to "fc" flags.
+ // NOTE: This is a divergence from gas' behavior. Uncommenting the following
+ // two lines would enable gas compatibility at the expense of breaking
+ // round-tripping.
+ //
+ // if (!FlagsVal)
+ // FlagsVal = 0x9;
+
+ // Bit 4: Special Reg (cpsr, apsr => 0; spsr => 1)
+ if (SpecReg == "spsr")
+ FlagsVal |= 16;
+
+ Parser.Lex(); // Eat identifier token.
+ Operands.push_back(ARMOperand::CreateMSRMask(FlagsVal, S));
+ return MatchOperand_Success;
+}
+
+/// parseBankedRegOperand - Try to parse a banked register (e.g. "lr_irq") for
+/// use in the MRS/MSR instructions added to support virtualization.
+ARMAsmParser::OperandMatchResultTy
+ARMAsmParser::parseBankedRegOperand(OperandVector &Operands) {
+ MCAsmParser &Parser = getParser();
+ SMLoc S = Parser.getTok().getLoc();
+ const AsmToken &Tok = Parser.getTok();
+ if (!Tok.is(AsmToken::Identifier))
+ return MatchOperand_NoMatch;
+ StringRef RegName = Tok.getString();
+
+ // The values here come from B9.2.3 of the ARM ARM, where bits 4-0 are SysM
+ // and bit 5 is R.
+ unsigned Encoding = StringSwitch<unsigned>(RegName.lower())
+ .Case("r8_usr", 0x00)
+ .Case("r9_usr", 0x01)
+ .Case("r10_usr", 0x02)
+ .Case("r11_usr", 0x03)
+ .Case("r12_usr", 0x04)
+ .Case("sp_usr", 0x05)
+ .Case("lr_usr", 0x06)
+ .Case("r8_fiq", 0x08)
+ .Case("r9_fiq", 0x09)
+ .Case("r10_fiq", 0x0a)
+ .Case("r11_fiq", 0x0b)
+ .Case("r12_fiq", 0x0c)
+ .Case("sp_fiq", 0x0d)
+ .Case("lr_fiq", 0x0e)
+ .Case("lr_irq", 0x10)
+ .Case("sp_irq", 0x11)
+ .Case("lr_svc", 0x12)
+ .Case("sp_svc", 0x13)
+ .Case("lr_abt", 0x14)
+ .Case("sp_abt", 0x15)
+ .Case("lr_und", 0x16)
+ .Case("sp_und", 0x17)
+ .Case("lr_mon", 0x1c)
+ .Case("sp_mon", 0x1d)
+ .Case("elr_hyp", 0x1e)
+ .Case("sp_hyp", 0x1f)
+ .Case("spsr_fiq", 0x2e)
+ .Case("spsr_irq", 0x30)
+ .Case("spsr_svc", 0x32)
+ .Case("spsr_abt", 0x34)
+ .Case("spsr_und", 0x36)
+ .Case("spsr_mon", 0x3c)
+ .Case("spsr_hyp", 0x3e)
+ .Default(~0U);
+
+ if (Encoding == ~0U)
+ return MatchOperand_NoMatch;
+
+ Parser.Lex(); // Eat identifier token.
+ Operands.push_back(ARMOperand::CreateBankedReg(Encoding, S));
+ return MatchOperand_Success;
+}
+
+ARMAsmParser::OperandMatchResultTy
+ARMAsmParser::parsePKHImm(OperandVector &Operands, StringRef Op, int Low,
+ int High) {
+ MCAsmParser &Parser = getParser();
+ const AsmToken &Tok = Parser.getTok();
+ if (Tok.isNot(AsmToken::Identifier)) {
+ Error(Parser.getTok().getLoc(), Op + " operand expected.");
+ return MatchOperand_ParseFail;
+ }
+ StringRef ShiftName = Tok.getString();
+ std::string LowerOp = Op.lower();
+ std::string UpperOp = Op.upper();
+ if (ShiftName != LowerOp && ShiftName != UpperOp) {
+ Error(Parser.getTok().getLoc(), Op + " operand expected.");
+ return MatchOperand_ParseFail;
+ }
+ Parser.Lex(); // Eat shift type token.
+
+ // There must be a '#' and a shift amount.
+ if (Parser.getTok().isNot(AsmToken::Hash) &&
+ Parser.getTok().isNot(AsmToken::Dollar)) {
+ Error(Parser.getTok().getLoc(), "'#' expected");
+ return MatchOperand_ParseFail;
+ }
+ Parser.Lex(); // Eat hash token.
+
+ const MCExpr *ShiftAmount;
+ SMLoc Loc = Parser.getTok().getLoc();
+ SMLoc EndLoc;
+ if (getParser().parseExpression(ShiftAmount, EndLoc)) {
+ Error(Loc, "illegal expression");
+ return MatchOperand_ParseFail;
+ }
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(ShiftAmount);
+ if (!CE) {
+ Error(Loc, "constant expression expected");
+ return MatchOperand_ParseFail;
+ }
+ int Val = CE->getValue();
+ if (Val < Low || Val > High) {
+ Error(Loc, "immediate value out of range");
+ return MatchOperand_ParseFail;
+ }
+
+ Operands.push_back(ARMOperand::CreateImm(CE, Loc, EndLoc));
+
+ return MatchOperand_Success;
+}
+
+ARMAsmParser::OperandMatchResultTy
+ARMAsmParser::parseSetEndImm(OperandVector &Operands) {
+ MCAsmParser &Parser = getParser();
+ const AsmToken &Tok = Parser.getTok();
+ SMLoc S = Tok.getLoc();
+ if (Tok.isNot(AsmToken::Identifier)) {
+ Error(S, "'be' or 'le' operand expected");
+ return MatchOperand_ParseFail;
+ }
+ int Val = StringSwitch<int>(Tok.getString().lower())
+ .Case("be", 1)
+ .Case("le", 0)
+ .Default(-1);
+ Parser.Lex(); // Eat the token.
+
+ if (Val == -1) {
+ Error(S, "'be' or 'le' operand expected");
+ return MatchOperand_ParseFail;
+ }
+ Operands.push_back(ARMOperand::CreateImm(MCConstantExpr::create(Val,
+ getContext()),
+ S, Tok.getEndLoc()));
+ return MatchOperand_Success;
+}
+
+/// parseShifterImm - Parse the shifter immediate operand for SSAT/USAT
+/// instructions. Legal values are:
+/// lsl #n 'n' in [0,31]
+/// asr #n 'n' in [1,32]
+/// n == 32 encoded as n == 0.
+ARMAsmParser::OperandMatchResultTy
+ARMAsmParser::parseShifterImm(OperandVector &Operands) {
+ MCAsmParser &Parser = getParser();
+ const AsmToken &Tok = Parser.getTok();
+ SMLoc S = Tok.getLoc();
+ if (Tok.isNot(AsmToken::Identifier)) {
+ Error(S, "shift operator 'asr' or 'lsl' expected");
+ return MatchOperand_ParseFail;
+ }
+ StringRef ShiftName = Tok.getString();
+ bool isASR;
+ if (ShiftName == "lsl" || ShiftName == "LSL")
+ isASR = false;
+ else if (ShiftName == "asr" || ShiftName == "ASR")
+ isASR = true;
+ else {
+ Error(S, "shift operator 'asr' or 'lsl' expected");
+ return MatchOperand_ParseFail;
+ }
+ Parser.Lex(); // Eat the operator.
+
+ // A '#' and a shift amount.
+ if (Parser.getTok().isNot(AsmToken::Hash) &&
+ Parser.getTok().isNot(AsmToken::Dollar)) {
+ Error(Parser.getTok().getLoc(), "'#' expected");
+ return MatchOperand_ParseFail;
+ }
+ Parser.Lex(); // Eat hash token.
+ SMLoc ExLoc = Parser.getTok().getLoc();
+
+ const MCExpr *ShiftAmount;
+ SMLoc EndLoc;
+ if (getParser().parseExpression(ShiftAmount, EndLoc)) {
+ Error(ExLoc, "malformed shift expression");
+ return MatchOperand_ParseFail;
+ }
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(ShiftAmount);
+ if (!CE) {
+ Error(ExLoc, "shift amount must be an immediate");
+ return MatchOperand_ParseFail;
+ }
+
+ int64_t Val = CE->getValue();
+ if (isASR) {
+ // Shift amount must be in [1,32]
+ if (Val < 1 || Val > 32) {
+ Error(ExLoc, "'asr' shift amount must be in range [1,32]");
+ return MatchOperand_ParseFail;
+ }
+ // asr #32 encoded as asr #0, but is not allowed in Thumb2 mode.
+ if (isThumb() && Val == 32) {
+ Error(ExLoc, "'asr #32' shift amount not allowed in Thumb mode");
+ return MatchOperand_ParseFail;
+ }
+ if (Val == 32) Val = 0;
+ } else {
+ // Shift amount must be in [1,32]
+ if (Val < 0 || Val > 31) {
+ Error(ExLoc, "'lsr' shift amount must be in range [0,31]");
+ return MatchOperand_ParseFail;
+ }
+ }
+
+ Operands.push_back(ARMOperand::CreateShifterImm(isASR, Val, S, EndLoc));
+
+ return MatchOperand_Success;
+}
+
+/// parseRotImm - Parse the shifter immediate operand for SXTB/UXTB family
+/// of instructions. Legal values are:
+/// ror #n 'n' in {0, 8, 16, 24}
+ARMAsmParser::OperandMatchResultTy
+ARMAsmParser::parseRotImm(OperandVector &Operands) {
+ MCAsmParser &Parser = getParser();
+ const AsmToken &Tok = Parser.getTok();
+ SMLoc S = Tok.getLoc();
+ if (Tok.isNot(AsmToken::Identifier))
+ return MatchOperand_NoMatch;
+ StringRef ShiftName = Tok.getString();
+ if (ShiftName != "ror" && ShiftName != "ROR")
+ return MatchOperand_NoMatch;
+ Parser.Lex(); // Eat the operator.
+
+ // A '#' and a rotate amount.
+ if (Parser.getTok().isNot(AsmToken::Hash) &&
+ Parser.getTok().isNot(AsmToken::Dollar)) {
+ Error(Parser.getTok().getLoc(), "'#' expected");
+ return MatchOperand_ParseFail;
+ }
+ Parser.Lex(); // Eat hash token.
+ SMLoc ExLoc = Parser.getTok().getLoc();
+
+ const MCExpr *ShiftAmount;
+ SMLoc EndLoc;
+ if (getParser().parseExpression(ShiftAmount, EndLoc)) {
+ Error(ExLoc, "malformed rotate expression");
+ return MatchOperand_ParseFail;
+ }
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(ShiftAmount);
+ if (!CE) {
+ Error(ExLoc, "rotate amount must be an immediate");
+ return MatchOperand_ParseFail;
+ }
+
+ int64_t Val = CE->getValue();
+ // Shift amount must be in {0, 8, 16, 24} (0 is undocumented extension)
+ // normally, zero is represented in asm by omitting the rotate operand
+ // entirely.
+ if (Val != 8 && Val != 16 && Val != 24 && Val != 0) {
+ Error(ExLoc, "'ror' rotate amount must be 8, 16, or 24");
+ return MatchOperand_ParseFail;
+ }
+
+ Operands.push_back(ARMOperand::CreateRotImm(Val, S, EndLoc));
+
+ return MatchOperand_Success;
+}
+
+ARMAsmParser::OperandMatchResultTy
+ARMAsmParser::parseModImm(OperandVector &Operands) {
+ MCAsmParser &Parser = getParser();
+ MCAsmLexer &Lexer = getLexer();
+ int64_t Imm1, Imm2;
+
+ SMLoc S = Parser.getTok().getLoc();
+
+ // 1) A mod_imm operand can appear in the place of a register name:
+ // add r0, #mod_imm
+ // add r0, r0, #mod_imm
+ // to correctly handle the latter, we bail out as soon as we see an
+ // identifier.
+ //
+ // 2) Similarly, we do not want to parse into complex operands:
+ // mov r0, #mod_imm
+ // mov r0, :lower16:(_foo)
+ if (Parser.getTok().is(AsmToken::Identifier) ||
+ Parser.getTok().is(AsmToken::Colon))
+ return MatchOperand_NoMatch;
+
+ // Hash (dollar) is optional as per the ARMARM
+ if (Parser.getTok().is(AsmToken::Hash) ||
+ Parser.getTok().is(AsmToken::Dollar)) {
+ // Avoid parsing into complex operands (#:)
+ if (Lexer.peekTok().is(AsmToken::Colon))
+ return MatchOperand_NoMatch;
+
+ // Eat the hash (dollar)
+ Parser.Lex();
+ }
+
+ SMLoc Sx1, Ex1;
+ Sx1 = Parser.getTok().getLoc();
+ const MCExpr *Imm1Exp;
+ if (getParser().parseExpression(Imm1Exp, Ex1)) {
+ Error(Sx1, "malformed expression");
+ return MatchOperand_ParseFail;
+ }
+
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Imm1Exp);
+
+ if (CE) {
+ // Immediate must fit within 32-bits
+ Imm1 = CE->getValue();
+ int Enc = ARM_AM::getSOImmVal(Imm1);
+ if (Enc != -1 && Parser.getTok().is(AsmToken::EndOfStatement)) {
+ // We have a match!
+ Operands.push_back(ARMOperand::CreateModImm((Enc & 0xFF),
+ (Enc & 0xF00) >> 7,
+ Sx1, Ex1));
+ return MatchOperand_Success;
+ }
+
+ // We have parsed an immediate which is not for us, fallback to a plain
+ // immediate. This can happen for instruction aliases. For an example,
+ // ARMInstrInfo.td defines the alias [mov <-> mvn] which can transform
+ // a mov (mvn) with a mod_imm_neg/mod_imm_not operand into the opposite
+ // instruction with a mod_imm operand. The alias is defined such that the
+ // parser method is shared, that's why we have to do this here.
+ if (Parser.getTok().is(AsmToken::EndOfStatement)) {
+ Operands.push_back(ARMOperand::CreateImm(Imm1Exp, Sx1, Ex1));
+ return MatchOperand_Success;
+ }
+ } else {
+ // Operands like #(l1 - l2) can only be evaluated at a later stage (via an
+ // MCFixup). Fallback to a plain immediate.
+ Operands.push_back(ARMOperand::CreateImm(Imm1Exp, Sx1, Ex1));
+ return MatchOperand_Success;
+ }
+
+ // From this point onward, we expect the input to be a (#bits, #rot) pair
+ if (Parser.getTok().isNot(AsmToken::Comma)) {
+ Error(Sx1, "expected modified immediate operand: #[0, 255], #even[0-30]");
+ return MatchOperand_ParseFail;
+ }
+
+ if (Imm1 & ~0xFF) {
+ Error(Sx1, "immediate operand must a number in the range [0, 255]");
+ return MatchOperand_ParseFail;
+ }
+
+ // Eat the comma
+ Parser.Lex();
+
+ // Repeat for #rot
+ SMLoc Sx2, Ex2;
+ Sx2 = Parser.getTok().getLoc();
+
+ // Eat the optional hash (dollar)
+ if (Parser.getTok().is(AsmToken::Hash) ||
+ Parser.getTok().is(AsmToken::Dollar))
+ Parser.Lex();
+
+ const MCExpr *Imm2Exp;
+ if (getParser().parseExpression(Imm2Exp, Ex2)) {
+ Error(Sx2, "malformed expression");
+ return MatchOperand_ParseFail;
+ }
+
+ CE = dyn_cast<MCConstantExpr>(Imm2Exp);
+
+ if (CE) {
+ Imm2 = CE->getValue();
+ if (!(Imm2 & ~0x1E)) {
+ // We have a match!
+ Operands.push_back(ARMOperand::CreateModImm(Imm1, Imm2, S, Ex2));
+ return MatchOperand_Success;
+ }
+ Error(Sx2, "immediate operand must an even number in the range [0, 30]");
+ return MatchOperand_ParseFail;
+ } else {
+ Error(Sx2, "constant expression expected");
+ return MatchOperand_ParseFail;
+ }
+}
+
+ARMAsmParser::OperandMatchResultTy
+ARMAsmParser::parseBitfield(OperandVector &Operands) {
+ MCAsmParser &Parser = getParser();
+ SMLoc S = Parser.getTok().getLoc();
+ // The bitfield descriptor is really two operands, the LSB and the width.
+ if (Parser.getTok().isNot(AsmToken::Hash) &&
+ Parser.getTok().isNot(AsmToken::Dollar)) {
+ Error(Parser.getTok().getLoc(), "'#' expected");
+ return MatchOperand_ParseFail;
+ }
+ Parser.Lex(); // Eat hash token.
+
+ const MCExpr *LSBExpr;
+ SMLoc E = Parser.getTok().getLoc();
+ if (getParser().parseExpression(LSBExpr)) {
+ Error(E, "malformed immediate expression");
+ return MatchOperand_ParseFail;
+ }
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(LSBExpr);
+ if (!CE) {
+ Error(E, "'lsb' operand must be an immediate");
+ return MatchOperand_ParseFail;
+ }
+
+ int64_t LSB = CE->getValue();
+ // The LSB must be in the range [0,31]
+ if (LSB < 0 || LSB > 31) {
+ Error(E, "'lsb' operand must be in the range [0,31]");
+ return MatchOperand_ParseFail;
+ }
+ E = Parser.getTok().getLoc();
+
+ // Expect another immediate operand.
+ if (Parser.getTok().isNot(AsmToken::Comma)) {
+ Error(Parser.getTok().getLoc(), "too few operands");
+ return MatchOperand_ParseFail;
+ }
+ Parser.Lex(); // Eat hash token.
+ if (Parser.getTok().isNot(AsmToken::Hash) &&
+ Parser.getTok().isNot(AsmToken::Dollar)) {
+ Error(Parser.getTok().getLoc(), "'#' expected");
+ return MatchOperand_ParseFail;
+ }
+ Parser.Lex(); // Eat hash token.
+
+ const MCExpr *WidthExpr;
+ SMLoc EndLoc;
+ if (getParser().parseExpression(WidthExpr, EndLoc)) {
+ Error(E, "malformed immediate expression");
+ return MatchOperand_ParseFail;
+ }
+ CE = dyn_cast<MCConstantExpr>(WidthExpr);
+ if (!CE) {
+ Error(E, "'width' operand must be an immediate");
+ return MatchOperand_ParseFail;
+ }
+
+ int64_t Width = CE->getValue();
+ // The LSB must be in the range [1,32-lsb]
+ if (Width < 1 || Width > 32 - LSB) {
+ Error(E, "'width' operand must be in the range [1,32-lsb]");
+ return MatchOperand_ParseFail;
+ }
+
+ Operands.push_back(ARMOperand::CreateBitfield(LSB, Width, S, EndLoc));
+
+ return MatchOperand_Success;
+}
+
+ARMAsmParser::OperandMatchResultTy
+ARMAsmParser::parsePostIdxReg(OperandVector &Operands) {
+ // Check for a post-index addressing register operand. Specifically:
+ // postidx_reg := '+' register {, shift}
+ // | '-' register {, shift}
+ // | register {, shift}
+
+ // This method must return MatchOperand_NoMatch without consuming any tokens
+ // in the case where there is no match, as other alternatives take other
+ // parse methods.
+ MCAsmParser &Parser = getParser();
+ AsmToken Tok = Parser.getTok();
+ SMLoc S = Tok.getLoc();
+ bool haveEaten = false;
+ bool isAdd = true;
+ if (Tok.is(AsmToken::Plus)) {
+ Parser.Lex(); // Eat the '+' token.
+ haveEaten = true;
+ } else if (Tok.is(AsmToken::Minus)) {
+ Parser.Lex(); // Eat the '-' token.
+ isAdd = false;
+ haveEaten = true;
+ }
+
+ SMLoc E = Parser.getTok().getEndLoc();
+ int Reg = tryParseRegister();
+ if (Reg == -1) {
+ if (!haveEaten)
+ return MatchOperand_NoMatch;
+ Error(Parser.getTok().getLoc(), "register expected");
+ return MatchOperand_ParseFail;
+ }
+
+ ARM_AM::ShiftOpc ShiftTy = ARM_AM::no_shift;
+ unsigned ShiftImm = 0;
+ if (Parser.getTok().is(AsmToken::Comma)) {
+ Parser.Lex(); // Eat the ','.
+ if (parseMemRegOffsetShift(ShiftTy, ShiftImm))
+ return MatchOperand_ParseFail;
+
+ // FIXME: Only approximates end...may include intervening whitespace.
+ E = Parser.getTok().getLoc();
+ }
+
+ Operands.push_back(ARMOperand::CreatePostIdxReg(Reg, isAdd, ShiftTy,
+ ShiftImm, S, E));
+
+ return MatchOperand_Success;
+}
+
+ARMAsmParser::OperandMatchResultTy
+ARMAsmParser::parseAM3Offset(OperandVector &Operands) {
+ // Check for a post-index addressing register operand. Specifically:
+ // am3offset := '+' register
+ // | '-' register
+ // | register
+ // | # imm
+ // | # + imm
+ // | # - imm
+
+ // This method must return MatchOperand_NoMatch without consuming any tokens
+ // in the case where there is no match, as other alternatives take other
+ // parse methods.
+ MCAsmParser &Parser = getParser();
+ AsmToken Tok = Parser.getTok();
+ SMLoc S = Tok.getLoc();
+
+ // Do immediates first, as we always parse those if we have a '#'.
+ if (Parser.getTok().is(AsmToken::Hash) ||
+ Parser.getTok().is(AsmToken::Dollar)) {
+ Parser.Lex(); // Eat '#' or '$'.
+ // Explicitly look for a '-', as we need to encode negative zero
+ // differently.
+ bool isNegative = Parser.getTok().is(AsmToken::Minus);
+ const MCExpr *Offset;
+ SMLoc E;
+ if (getParser().parseExpression(Offset, E))
+ return MatchOperand_ParseFail;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Offset);
+ if (!CE) {
+ Error(S, "constant expression expected");
+ return MatchOperand_ParseFail;
+ }
+ // Negative zero is encoded as the flag value INT32_MIN.
+ int32_t Val = CE->getValue();
+ if (isNegative && Val == 0)
+ Val = INT32_MIN;
+
+ Operands.push_back(
+ ARMOperand::CreateImm(MCConstantExpr::create(Val, getContext()), S, E));
+
+ return MatchOperand_Success;
+ }
+
+
+ bool haveEaten = false;
+ bool isAdd = true;
+ if (Tok.is(AsmToken::Plus)) {
+ Parser.Lex(); // Eat the '+' token.
+ haveEaten = true;
+ } else if (Tok.is(AsmToken::Minus)) {
+ Parser.Lex(); // Eat the '-' token.
+ isAdd = false;
+ haveEaten = true;
+ }
+
+ Tok = Parser.getTok();
+ int Reg = tryParseRegister();
+ if (Reg == -1) {
+ if (!haveEaten)
+ return MatchOperand_NoMatch;
+ Error(Tok.getLoc(), "register expected");
+ return MatchOperand_ParseFail;
+ }
+
+ Operands.push_back(ARMOperand::CreatePostIdxReg(Reg, isAdd, ARM_AM::no_shift,
+ 0, S, Tok.getEndLoc()));
+
+ return MatchOperand_Success;
+}
+
+/// Convert parsed operands to MCInst. Needed here because this instruction
+/// only has two register operands, but multiplication is commutative so
+/// assemblers should accept both "mul rD, rN, rD" and "mul rD, rD, rN".
+void ARMAsmParser::cvtThumbMultiply(MCInst &Inst,
+ const OperandVector &Operands) {
+ ((ARMOperand &)*Operands[3]).addRegOperands(Inst, 1);
+ ((ARMOperand &)*Operands[1]).addCCOutOperands(Inst, 1);
+ // If we have a three-operand form, make sure to set Rn to be the operand
+ // that isn't the same as Rd.
+ unsigned RegOp = 4;
+ if (Operands.size() == 6 &&
+ ((ARMOperand &)*Operands[4]).getReg() ==
+ ((ARMOperand &)*Operands[3]).getReg())
+ RegOp = 5;
+ ((ARMOperand &)*Operands[RegOp]).addRegOperands(Inst, 1);
+ Inst.addOperand(Inst.getOperand(0));
+ ((ARMOperand &)*Operands[2]).addCondCodeOperands(Inst, 2);
+}
+
+void ARMAsmParser::cvtThumbBranches(MCInst &Inst,
+ const OperandVector &Operands) {
+ int CondOp = -1, ImmOp = -1;
+ switch(Inst.getOpcode()) {
+ case ARM::tB:
+ case ARM::tBcc: CondOp = 1; ImmOp = 2; break;
+
+ case ARM::t2B:
+ case ARM::t2Bcc: CondOp = 1; ImmOp = 3; break;
+
+ default: llvm_unreachable("Unexpected instruction in cvtThumbBranches");
+ }
+ // first decide whether or not the branch should be conditional
+ // by looking at it's location relative to an IT block
+ if(inITBlock()) {
+ // inside an IT block we cannot have any conditional branches. any
+ // such instructions needs to be converted to unconditional form
+ switch(Inst.getOpcode()) {
+ case ARM::tBcc: Inst.setOpcode(ARM::tB); break;
+ case ARM::t2Bcc: Inst.setOpcode(ARM::t2B); break;
+ }
+ } else {
+ // outside IT blocks we can only have unconditional branches with AL
+ // condition code or conditional branches with non-AL condition code
+ unsigned Cond = static_cast<ARMOperand &>(*Operands[CondOp]).getCondCode();
+ switch(Inst.getOpcode()) {
+ case ARM::tB:
+ case ARM::tBcc:
+ Inst.setOpcode(Cond == ARMCC::AL ? ARM::tB : ARM::tBcc);
+ break;
+ case ARM::t2B:
+ case ARM::t2Bcc:
+ Inst.setOpcode(Cond == ARMCC::AL ? ARM::t2B : ARM::t2Bcc);
+ break;
+ }
+ }
+
+ // now decide on encoding size based on branch target range
+ switch(Inst.getOpcode()) {
+ // classify tB as either t2B or t1B based on range of immediate operand
+ case ARM::tB: {
+ ARMOperand &op = static_cast<ARMOperand &>(*Operands[ImmOp]);
+ if (!op.isSignedOffset<11, 1>() && isThumbTwo())
+ Inst.setOpcode(ARM::t2B);
+ break;
+ }
+ // classify tBcc as either t2Bcc or t1Bcc based on range of immediate operand
+ case ARM::tBcc: {
+ ARMOperand &op = static_cast<ARMOperand &>(*Operands[ImmOp]);
+ if (!op.isSignedOffset<8, 1>() && isThumbTwo())
+ Inst.setOpcode(ARM::t2Bcc);
+ break;
+ }
+ }
+ ((ARMOperand &)*Operands[ImmOp]).addImmOperands(Inst, 1);
+ ((ARMOperand &)*Operands[CondOp]).addCondCodeOperands(Inst, 2);
+}
+
+/// Parse an ARM memory expression, return false if successful else return true
+/// or an error. The first token must be a '[' when called.
+bool ARMAsmParser::parseMemory(OperandVector &Operands) {
+ MCAsmParser &Parser = getParser();
+ SMLoc S, E;
+ assert(Parser.getTok().is(AsmToken::LBrac) &&
+ "Token is not a Left Bracket");
+ S = Parser.getTok().getLoc();
+ Parser.Lex(); // Eat left bracket token.
+
+ const AsmToken &BaseRegTok = Parser.getTok();
+ int BaseRegNum = tryParseRegister();
+ if (BaseRegNum == -1)
+ return Error(BaseRegTok.getLoc(), "register expected");
+
+ // The next token must either be a comma, a colon or a closing bracket.
+ const AsmToken &Tok = Parser.getTok();
+ if (!Tok.is(AsmToken::Colon) && !Tok.is(AsmToken::Comma) &&
+ !Tok.is(AsmToken::RBrac))
+ return Error(Tok.getLoc(), "malformed memory operand");
+
+ if (Tok.is(AsmToken::RBrac)) {
+ E = Tok.getEndLoc();
+ Parser.Lex(); // Eat right bracket token.
+
+ Operands.push_back(ARMOperand::CreateMem(BaseRegNum, nullptr, 0,
+ ARM_AM::no_shift, 0, 0, false,
+ S, E));
+
+ // If there's a pre-indexing writeback marker, '!', just add it as a token
+ // operand. It's rather odd, but syntactically valid.
+ if (Parser.getTok().is(AsmToken::Exclaim)) {
+ Operands.push_back(ARMOperand::CreateToken("!",Parser.getTok().getLoc()));
+ Parser.Lex(); // Eat the '!'.
+ }
+
+ return false;
+ }
+
+ assert((Tok.is(AsmToken::Colon) || Tok.is(AsmToken::Comma)) &&
+ "Lost colon or comma in memory operand?!");
+ if (Tok.is(AsmToken::Comma)) {
+ Parser.Lex(); // Eat the comma.
+ }
+
+ // If we have a ':', it's an alignment specifier.
+ if (Parser.getTok().is(AsmToken::Colon)) {
+ Parser.Lex(); // Eat the ':'.
+ E = Parser.getTok().getLoc();
+ SMLoc AlignmentLoc = Tok.getLoc();
+
+ const MCExpr *Expr;
+ if (getParser().parseExpression(Expr))
+ return true;
+
+ // The expression has to be a constant. Memory references with relocations
+ // don't come through here, as they use the <label> forms of the relevant
+ // instructions.
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr);
+ if (!CE)
+ return Error (E, "constant expression expected");
+
+ unsigned Align = 0;
+ switch (CE->getValue()) {
+ default:
+ return Error(E,
+ "alignment specifier must be 16, 32, 64, 128, or 256 bits");
+ case 16: Align = 2; break;
+ case 32: Align = 4; break;
+ case 64: Align = 8; break;
+ case 128: Align = 16; break;
+ case 256: Align = 32; break;
+ }
+
+ // Now we should have the closing ']'
+ if (Parser.getTok().isNot(AsmToken::RBrac))
+ return Error(Parser.getTok().getLoc(), "']' expected");
+ E = Parser.getTok().getEndLoc();
+ Parser.Lex(); // Eat right bracket token.
+
+ // Don't worry about range checking the value here. That's handled by
+ // the is*() predicates.
+ Operands.push_back(ARMOperand::CreateMem(BaseRegNum, nullptr, 0,
+ ARM_AM::no_shift, 0, Align,
+ false, S, E, AlignmentLoc));
+
+ // If there's a pre-indexing writeback marker, '!', just add it as a token
+ // operand.
+ if (Parser.getTok().is(AsmToken::Exclaim)) {
+ Operands.push_back(ARMOperand::CreateToken("!",Parser.getTok().getLoc()));
+ Parser.Lex(); // Eat the '!'.
+ }
+
+ return false;
+ }
+
+ // If we have a '#', it's an immediate offset, else assume it's a register
+ // offset. Be friendly and also accept a plain integer (without a leading
+ // hash) for gas compatibility.
+ if (Parser.getTok().is(AsmToken::Hash) ||
+ Parser.getTok().is(AsmToken::Dollar) ||
+ Parser.getTok().is(AsmToken::Integer)) {
+ if (Parser.getTok().isNot(AsmToken::Integer))
+ Parser.Lex(); // Eat '#' or '$'.
+ E = Parser.getTok().getLoc();
+
+ bool isNegative = getParser().getTok().is(AsmToken::Minus);
+ const MCExpr *Offset;
+ if (getParser().parseExpression(Offset))
+ return true;
+
+ // The expression has to be a constant. Memory references with relocations
+ // don't come through here, as they use the <label> forms of the relevant
+ // instructions.
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Offset);
+ if (!CE)
+ return Error (E, "constant expression expected");
+
+ // If the constant was #-0, represent it as INT32_MIN.
+ int32_t Val = CE->getValue();
+ if (isNegative && Val == 0)
+ CE = MCConstantExpr::create(INT32_MIN, getContext());
+
+ // Now we should have the closing ']'
+ if (Parser.getTok().isNot(AsmToken::RBrac))
+ return Error(Parser.getTok().getLoc(), "']' expected");
+ E = Parser.getTok().getEndLoc();
+ Parser.Lex(); // Eat right bracket token.
+
+ // Don't worry about range checking the value here. That's handled by
+ // the is*() predicates.
+ Operands.push_back(ARMOperand::CreateMem(BaseRegNum, CE, 0,
+ ARM_AM::no_shift, 0, 0,
+ false, S, E));
+
+ // If there's a pre-indexing writeback marker, '!', just add it as a token
+ // operand.
+ if (Parser.getTok().is(AsmToken::Exclaim)) {
+ Operands.push_back(ARMOperand::CreateToken("!",Parser.getTok().getLoc()));
+ Parser.Lex(); // Eat the '!'.
+ }
+
+ return false;
+ }
+
+ // The register offset is optionally preceded by a '+' or '-'
+ bool isNegative = false;
+ if (Parser.getTok().is(AsmToken::Minus)) {
+ isNegative = true;
+ Parser.Lex(); // Eat the '-'.
+ } else if (Parser.getTok().is(AsmToken::Plus)) {
+ // Nothing to do.
+ Parser.Lex(); // Eat the '+'.
+ }
+
+ E = Parser.getTok().getLoc();
+ int OffsetRegNum = tryParseRegister();
+ if (OffsetRegNum == -1)
+ return Error(E, "register expected");
+
+ // If there's a shift operator, handle it.
+ ARM_AM::ShiftOpc ShiftType = ARM_AM::no_shift;
+ unsigned ShiftImm = 0;
+ if (Parser.getTok().is(AsmToken::Comma)) {
+ Parser.Lex(); // Eat the ','.
+ if (parseMemRegOffsetShift(ShiftType, ShiftImm))
+ return true;
+ }
+
+ // Now we should have the closing ']'
+ if (Parser.getTok().isNot(AsmToken::RBrac))
+ return Error(Parser.getTok().getLoc(), "']' expected");
+ E = Parser.getTok().getEndLoc();
+ Parser.Lex(); // Eat right bracket token.
+
+ Operands.push_back(ARMOperand::CreateMem(BaseRegNum, nullptr, OffsetRegNum,
+ ShiftType, ShiftImm, 0, isNegative,
+ S, E));
+
+ // If there's a pre-indexing writeback marker, '!', just add it as a token
+ // operand.
+ if (Parser.getTok().is(AsmToken::Exclaim)) {
+ Operands.push_back(ARMOperand::CreateToken("!",Parser.getTok().getLoc()));
+ Parser.Lex(); // Eat the '!'.
+ }
+
+ return false;
+}
+
+/// parseMemRegOffsetShift - one of these two:
+/// ( lsl | lsr | asr | ror ) , # shift_amount
+/// rrx
+/// return true if it parses a shift otherwise it returns false.
+bool ARMAsmParser::parseMemRegOffsetShift(ARM_AM::ShiftOpc &St,
+ unsigned &Amount) {
+ MCAsmParser &Parser = getParser();
+ SMLoc Loc = Parser.getTok().getLoc();
+ const AsmToken &Tok = Parser.getTok();
+ if (Tok.isNot(AsmToken::Identifier))
+ return true;
+ StringRef ShiftName = Tok.getString();
+ if (ShiftName == "lsl" || ShiftName == "LSL" ||
+ ShiftName == "asl" || ShiftName == "ASL")
+ St = ARM_AM::lsl;
+ else if (ShiftName == "lsr" || ShiftName == "LSR")
+ St = ARM_AM::lsr;
+ else if (ShiftName == "asr" || ShiftName == "ASR")
+ St = ARM_AM::asr;
+ else if (ShiftName == "ror" || ShiftName == "ROR")
+ St = ARM_AM::ror;
+ else if (ShiftName == "rrx" || ShiftName == "RRX")
+ St = ARM_AM::rrx;
+ else
+ return Error(Loc, "illegal shift operator");
+ Parser.Lex(); // Eat shift type token.
+
+ // rrx stands alone.
+ Amount = 0;
+ if (St != ARM_AM::rrx) {
+ Loc = Parser.getTok().getLoc();
+ // A '#' and a shift amount.
+ const AsmToken &HashTok = Parser.getTok();
+ if (HashTok.isNot(AsmToken::Hash) &&
+ HashTok.isNot(AsmToken::Dollar))
+ return Error(HashTok.getLoc(), "'#' expected");
+ Parser.Lex(); // Eat hash token.
+
+ const MCExpr *Expr;
+ if (getParser().parseExpression(Expr))
+ return true;
+ // Range check the immediate.
+ // lsl, ror: 0 <= imm <= 31
+ // lsr, asr: 0 <= imm <= 32
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr);
+ if (!CE)
+ return Error(Loc, "shift amount must be an immediate");
+ int64_t Imm = CE->getValue();
+ if (Imm < 0 ||
+ ((St == ARM_AM::lsl || St == ARM_AM::ror) && Imm > 31) ||
+ ((St == ARM_AM::lsr || St == ARM_AM::asr) && Imm > 32))
+ return Error(Loc, "immediate shift value out of range");
+ // If <ShiftTy> #0, turn it into a no_shift.
+ if (Imm == 0)
+ St = ARM_AM::lsl;
+ // For consistency, treat lsr #32 and asr #32 as having immediate value 0.
+ if (Imm == 32)
+ Imm = 0;
+ Amount = Imm;
+ }
+
+ return false;
+}
+
+/// parseFPImm - A floating point immediate expression operand.
+ARMAsmParser::OperandMatchResultTy
+ARMAsmParser::parseFPImm(OperandVector &Operands) {
+ MCAsmParser &Parser = getParser();
+ // Anything that can accept a floating point constant as an operand
+ // needs to go through here, as the regular parseExpression is
+ // integer only.
+ //
+ // This routine still creates a generic Immediate operand, containing
+ // a bitcast of the 64-bit floating point value. The various operands
+ // that accept floats can check whether the value is valid for them
+ // via the standard is*() predicates.
+
+ SMLoc S = Parser.getTok().getLoc();
+
+ if (Parser.getTok().isNot(AsmToken::Hash) &&
+ Parser.getTok().isNot(AsmToken::Dollar))
+ return MatchOperand_NoMatch;
+
+ // Disambiguate the VMOV forms that can accept an FP immediate.
+ // vmov.f32 <sreg>, #imm
+ // vmov.f64 <dreg>, #imm
+ // vmov.f32 <dreg>, #imm @ vector f32x2
+ // vmov.f32 <qreg>, #imm @ vector f32x4
+ //
+ // There are also the NEON VMOV instructions which expect an
+ // integer constant. Make sure we don't try to parse an FPImm
+ // for these:
+ // vmov.i{8|16|32|64} <dreg|qreg>, #imm
+ ARMOperand &TyOp = static_cast<ARMOperand &>(*Operands[2]);
+ bool isVmovf = TyOp.isToken() &&
+ (TyOp.getToken() == ".f32" || TyOp.getToken() == ".f64");
+ ARMOperand &Mnemonic = static_cast<ARMOperand &>(*Operands[0]);
+ bool isFconst = Mnemonic.isToken() && (Mnemonic.getToken() == "fconstd" ||
+ Mnemonic.getToken() == "fconsts");
+ if (!(isVmovf || isFconst))
+ return MatchOperand_NoMatch;
+
+ Parser.Lex(); // Eat '#' or '$'.
+
+ // Handle negation, as that still comes through as a separate token.
+ bool isNegative = false;
+ if (Parser.getTok().is(AsmToken::Minus)) {
+ isNegative = true;
+ Parser.Lex();
+ }
+ const AsmToken &Tok = Parser.getTok();
+ SMLoc Loc = Tok.getLoc();
+ if (Tok.is(AsmToken::Real) && isVmovf) {
+ APFloat RealVal(APFloat::IEEEsingle, Tok.getString());
+ uint64_t IntVal = RealVal.bitcastToAPInt().getZExtValue();
+ // If we had a '-' in front, toggle the sign bit.
+ IntVal ^= (uint64_t)isNegative << 31;
+ Parser.Lex(); // Eat the token.
+ Operands.push_back(ARMOperand::CreateImm(
+ MCConstantExpr::create(IntVal, getContext()),
+ S, Parser.getTok().getLoc()));
+ return MatchOperand_Success;
+ }
+ // Also handle plain integers. Instructions which allow floating point
+ // immediates also allow a raw encoded 8-bit value.
+ if (Tok.is(AsmToken::Integer) && isFconst) {
+ int64_t Val = Tok.getIntVal();
+ Parser.Lex(); // Eat the token.
+ if (Val > 255 || Val < 0) {
+ Error(Loc, "encoded floating point value out of range");
+ return MatchOperand_ParseFail;
+ }
+ float RealVal = ARM_AM::getFPImmFloat(Val);
+ Val = APFloat(RealVal).bitcastToAPInt().getZExtValue();
+
+ Operands.push_back(ARMOperand::CreateImm(
+ MCConstantExpr::create(Val, getContext()), S,
+ Parser.getTok().getLoc()));
+ return MatchOperand_Success;
+ }
+
+ Error(Loc, "invalid floating point immediate");
+ return MatchOperand_ParseFail;
+}
+
+/// Parse a arm instruction operand. For now this parses the operand regardless
+/// of the mnemonic.
+bool ARMAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) {
+ MCAsmParser &Parser = getParser();
+ SMLoc S, E;
+
+ // Check if the current operand has a custom associated parser, if so, try to
+ // custom parse the operand, or fallback to the general approach.
+ OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
+ if (ResTy == MatchOperand_Success)
+ return false;
+ // If there wasn't a custom match, try the generic matcher below. Otherwise,
+ // there was a match, but an error occurred, in which case, just return that
+ // the operand parsing failed.
+ if (ResTy == MatchOperand_ParseFail)
+ return true;
+
+ switch (getLexer().getKind()) {
+ default:
+ Error(Parser.getTok().getLoc(), "unexpected token in operand");
+ return true;
+ case AsmToken::Identifier: {
+ // If we've seen a branch mnemonic, the next operand must be a label. This
+ // is true even if the label is a register name. So "br r1" means branch to
+ // label "r1".
+ bool ExpectLabel = Mnemonic == "b" || Mnemonic == "bl";
+ if (!ExpectLabel) {
+ if (!tryParseRegisterWithWriteBack(Operands))
+ return false;
+ int Res = tryParseShiftRegister(Operands);
+ if (Res == 0) // success
+ return false;
+ else if (Res == -1) // irrecoverable error
+ return true;
+ // If this is VMRS, check for the apsr_nzcv operand.
+ if (Mnemonic == "vmrs" &&
+ Parser.getTok().getString().equals_lower("apsr_nzcv")) {
+ S = Parser.getTok().getLoc();
+ Parser.Lex();
+ Operands.push_back(ARMOperand::CreateToken("APSR_nzcv", S));
+ return false;
+ }
+ }
+
+ // Fall though for the Identifier case that is not a register or a
+ // special name.
+ }
+ case AsmToken::LParen: // parenthesized expressions like (_strcmp-4)
+ case AsmToken::Integer: // things like 1f and 2b as a branch targets
+ case AsmToken::String: // quoted label names.
+ case AsmToken::Dot: { // . as a branch target
+ // This was not a register so parse other operands that start with an
+ // identifier (like labels) as expressions and create them as immediates.
+ const MCExpr *IdVal;
+ S = Parser.getTok().getLoc();
+ if (getParser().parseExpression(IdVal))
+ return true;
+ E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
+ Operands.push_back(ARMOperand::CreateImm(IdVal, S, E));
+ return false;
+ }
+ case AsmToken::LBrac:
+ return parseMemory(Operands);
+ case AsmToken::LCurly:
+ return parseRegisterList(Operands);
+ case AsmToken::Dollar:
+ case AsmToken::Hash: {
+ // #42 -> immediate.
+ S = Parser.getTok().getLoc();
+ Parser.Lex();
+
+ if (Parser.getTok().isNot(AsmToken::Colon)) {
+ bool isNegative = Parser.getTok().is(AsmToken::Minus);
+ const MCExpr *ImmVal;
+ if (getParser().parseExpression(ImmVal))
+ return true;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(ImmVal);
+ if (CE) {
+ int32_t Val = CE->getValue();
+ if (isNegative && Val == 0)
+ ImmVal = MCConstantExpr::create(INT32_MIN, getContext());
+ }
+ E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
+ Operands.push_back(ARMOperand::CreateImm(ImmVal, S, E));
+
+ // There can be a trailing '!' on operands that we want as a separate
+ // '!' Token operand. Handle that here. For example, the compatibility
+ // alias for 'srsdb sp!, #imm' is 'srsdb #imm!'.
+ if (Parser.getTok().is(AsmToken::Exclaim)) {
+ Operands.push_back(ARMOperand::CreateToken(Parser.getTok().getString(),
+ Parser.getTok().getLoc()));
+ Parser.Lex(); // Eat exclaim token
+ }
+ return false;
+ }
+ // w/ a ':' after the '#', it's just like a plain ':'.
+ // FALLTHROUGH
+ }
+ case AsmToken::Colon: {
+ S = Parser.getTok().getLoc();
+ // ":lower16:" and ":upper16:" expression prefixes
+ // FIXME: Check it's an expression prefix,
+ // e.g. (FOO - :lower16:BAR) isn't legal.
+ ARMMCExpr::VariantKind RefKind;
+ if (parsePrefix(RefKind))
+ return true;
+
+ const MCExpr *SubExprVal;
+ if (getParser().parseExpression(SubExprVal))
+ return true;
+
+ const MCExpr *ExprVal = ARMMCExpr::create(RefKind, SubExprVal,
+ getContext());
+ E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
+ Operands.push_back(ARMOperand::CreateImm(ExprVal, S, E));
+ return false;
+ }
+ case AsmToken::Equal: {
+ S = Parser.getTok().getLoc();
+ if (Mnemonic != "ldr") // only parse for ldr pseudo (e.g. ldr r0, =val)
+ return Error(S, "unexpected token in operand");
+
+ Parser.Lex(); // Eat '='
+ const MCExpr *SubExprVal;
+ if (getParser().parseExpression(SubExprVal))
+ return true;
+ E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
+
+ const MCExpr *CPLoc =
+ getTargetStreamer().addConstantPoolEntry(SubExprVal, S);
+ Operands.push_back(ARMOperand::CreateImm(CPLoc, S, E));
+ return false;
+ }
+ }
+}
+
+// parsePrefix - Parse ARM 16-bit relocations expression prefix, i.e.
+// :lower16: and :upper16:.
+bool ARMAsmParser::parsePrefix(ARMMCExpr::VariantKind &RefKind) {
+ MCAsmParser &Parser = getParser();
+ RefKind = ARMMCExpr::VK_ARM_None;
+
+ // consume an optional '#' (GNU compatibility)
+ if (getLexer().is(AsmToken::Hash))
+ Parser.Lex();
+
+ // :lower16: and :upper16: modifiers
+ assert(getLexer().is(AsmToken::Colon) && "expected a :");
+ Parser.Lex(); // Eat ':'
+
+ if (getLexer().isNot(AsmToken::Identifier)) {
+ Error(Parser.getTok().getLoc(), "expected prefix identifier in operand");
+ return true;
+ }
+
+ enum {
+ COFF = (1 << MCObjectFileInfo::IsCOFF),
+ ELF = (1 << MCObjectFileInfo::IsELF),
+ MACHO = (1 << MCObjectFileInfo::IsMachO)
+ };
+ static const struct PrefixEntry {
+ const char *Spelling;
+ ARMMCExpr::VariantKind VariantKind;
+ uint8_t SupportedFormats;
+ } PrefixEntries[] = {
+ { "lower16", ARMMCExpr::VK_ARM_LO16, COFF | ELF | MACHO },
+ { "upper16", ARMMCExpr::VK_ARM_HI16, COFF | ELF | MACHO },
+ };
+
+ StringRef IDVal = Parser.getTok().getIdentifier();
+
+ const auto &Prefix =
+ std::find_if(std::begin(PrefixEntries), std::end(PrefixEntries),
+ [&IDVal](const PrefixEntry &PE) {
+ return PE.Spelling == IDVal;
+ });
+ if (Prefix == std::end(PrefixEntries)) {
+ Error(Parser.getTok().getLoc(), "unexpected prefix in operand");
+ return true;
+ }
+
+ uint8_t CurrentFormat;
+ switch (getContext().getObjectFileInfo()->getObjectFileType()) {
+ case MCObjectFileInfo::IsMachO:
+ CurrentFormat = MACHO;
+ break;
+ case MCObjectFileInfo::IsELF:
+ CurrentFormat = ELF;
+ break;
+ case MCObjectFileInfo::IsCOFF:
+ CurrentFormat = COFF;
+ break;
+ }
+
+ if (~Prefix->SupportedFormats & CurrentFormat) {
+ Error(Parser.getTok().getLoc(),
+ "cannot represent relocation in the current file format");
+ return true;
+ }
+
+ RefKind = Prefix->VariantKind;
+ Parser.Lex();
+
+ if (getLexer().isNot(AsmToken::Colon)) {
+ Error(Parser.getTok().getLoc(), "unexpected token after prefix");
+ return true;
+ }
+ Parser.Lex(); // Eat the last ':'
+
+ return false;
+}
+
+/// \brief Given a mnemonic, split out possible predication code and carry
+/// setting letters to form a canonical mnemonic and flags.
+//
+// FIXME: Would be nice to autogen this.
+// FIXME: This is a bit of a maze of special cases.
+StringRef ARMAsmParser::splitMnemonic(StringRef Mnemonic,
+ unsigned &PredicationCode,
+ bool &CarrySetting,
+ unsigned &ProcessorIMod,
+ StringRef &ITMask) {
+ PredicationCode = ARMCC::AL;
+ CarrySetting = false;
+ ProcessorIMod = 0;
+
+ // Ignore some mnemonics we know aren't predicated forms.
+ //
+ // FIXME: Would be nice to autogen this.
+ if ((Mnemonic == "movs" && isThumb()) ||
+ Mnemonic == "teq" || Mnemonic == "vceq" || Mnemonic == "svc" ||
+ Mnemonic == "mls" || Mnemonic == "smmls" || Mnemonic == "vcls" ||
+ Mnemonic == "vmls" || Mnemonic == "vnmls" || Mnemonic == "vacge" ||
+ Mnemonic == "vcge" || Mnemonic == "vclt" || Mnemonic == "vacgt" ||
+ Mnemonic == "vaclt" || Mnemonic == "vacle" || Mnemonic == "hlt" ||
+ Mnemonic == "vcgt" || Mnemonic == "vcle" || Mnemonic == "smlal" ||
+ Mnemonic == "umaal" || Mnemonic == "umlal" || Mnemonic == "vabal" ||
+ Mnemonic == "vmlal" || Mnemonic == "vpadal" || Mnemonic == "vqdmlal" ||
+ Mnemonic == "fmuls" || Mnemonic == "vmaxnm" || Mnemonic == "vminnm" ||
+ Mnemonic == "vcvta" || Mnemonic == "vcvtn" || Mnemonic == "vcvtp" ||
+ Mnemonic == "vcvtm" || Mnemonic == "vrinta" || Mnemonic == "vrintn" ||
+ Mnemonic == "vrintp" || Mnemonic == "vrintm" || Mnemonic == "hvc" ||
+ Mnemonic.startswith("vsel"))
+ return Mnemonic;
+
+ // First, split out any predication code. Ignore mnemonics we know aren't
+ // predicated but do have a carry-set and so weren't caught above.
+ if (Mnemonic != "adcs" && Mnemonic != "bics" && Mnemonic != "movs" &&
+ Mnemonic != "muls" && Mnemonic != "smlals" && Mnemonic != "smulls" &&
+ Mnemonic != "umlals" && Mnemonic != "umulls" && Mnemonic != "lsls" &&
+ Mnemonic != "sbcs" && Mnemonic != "rscs") {
+ unsigned CC = StringSwitch<unsigned>(Mnemonic.substr(Mnemonic.size()-2))
+ .Case("eq", ARMCC::EQ)
+ .Case("ne", ARMCC::NE)
+ .Case("hs", ARMCC::HS)
+ .Case("cs", ARMCC::HS)
+ .Case("lo", ARMCC::LO)
+ .Case("cc", ARMCC::LO)
+ .Case("mi", ARMCC::MI)
+ .Case("pl", ARMCC::PL)
+ .Case("vs", ARMCC::VS)
+ .Case("vc", ARMCC::VC)
+ .Case("hi", ARMCC::HI)
+ .Case("ls", ARMCC::LS)
+ .Case("ge", ARMCC::GE)
+ .Case("lt", ARMCC::LT)
+ .Case("gt", ARMCC::GT)
+ .Case("le", ARMCC::LE)
+ .Case("al", ARMCC::AL)
+ .Default(~0U);
+ if (CC != ~0U) {
+ Mnemonic = Mnemonic.slice(0, Mnemonic.size() - 2);
+ PredicationCode = CC;
+ }
+ }
+
+ // Next, determine if we have a carry setting bit. We explicitly ignore all
+ // the instructions we know end in 's'.
+ if (Mnemonic.endswith("s") &&
+ !(Mnemonic == "cps" || Mnemonic == "mls" ||
+ Mnemonic == "mrs" || Mnemonic == "smmls" || Mnemonic == "vabs" ||
+ Mnemonic == "vcls" || Mnemonic == "vmls" || Mnemonic == "vmrs" ||
+ Mnemonic == "vnmls" || Mnemonic == "vqabs" || Mnemonic == "vrecps" ||
+ Mnemonic == "vrsqrts" || Mnemonic == "srs" || Mnemonic == "flds" ||
+ Mnemonic == "fmrs" || Mnemonic == "fsqrts" || Mnemonic == "fsubs" ||
+ Mnemonic == "fsts" || Mnemonic == "fcpys" || Mnemonic == "fdivs" ||
+ Mnemonic == "fmuls" || Mnemonic == "fcmps" || Mnemonic == "fcmpzs" ||
+ Mnemonic == "vfms" || Mnemonic == "vfnms" || Mnemonic == "fconsts" ||
+ (Mnemonic == "movs" && isThumb()))) {
+ Mnemonic = Mnemonic.slice(0, Mnemonic.size() - 1);
+ CarrySetting = true;
+ }
+
+ // The "cps" instruction can have a interrupt mode operand which is glued into
+ // the mnemonic. Check if this is the case, split it and parse the imod op
+ if (Mnemonic.startswith("cps")) {
+ // Split out any imod code.
+ unsigned IMod =
+ StringSwitch<unsigned>(Mnemonic.substr(Mnemonic.size()-2, 2))
+ .Case("ie", ARM_PROC::IE)
+ .Case("id", ARM_PROC::ID)
+ .Default(~0U);
+ if (IMod != ~0U) {
+ Mnemonic = Mnemonic.slice(0, Mnemonic.size()-2);
+ ProcessorIMod = IMod;
+ }
+ }
+
+ // The "it" instruction has the condition mask on the end of the mnemonic.
+ if (Mnemonic.startswith("it")) {
+ ITMask = Mnemonic.slice(2, Mnemonic.size());
+ Mnemonic = Mnemonic.slice(0, 2);
+ }
+
+ return Mnemonic;
+}
+
+/// \brief Given a canonical mnemonic, determine if the instruction ever allows
+/// inclusion of carry set or predication code operands.
+//
+// FIXME: It would be nice to autogen this.
+void ARMAsmParser::getMnemonicAcceptInfo(StringRef Mnemonic, StringRef FullInst,
+ bool &CanAcceptCarrySet,
+ bool &CanAcceptPredicationCode) {
+ CanAcceptCarrySet =
+ Mnemonic == "and" || Mnemonic == "lsl" || Mnemonic == "lsr" ||
+ Mnemonic == "rrx" || Mnemonic == "ror" || Mnemonic == "sub" ||
+ Mnemonic == "add" || Mnemonic == "adc" || Mnemonic == "mul" ||
+ Mnemonic == "bic" || Mnemonic == "asr" || Mnemonic == "orr" ||
+ Mnemonic == "mvn" || Mnemonic == "rsb" || Mnemonic == "rsc" ||
+ Mnemonic == "orn" || Mnemonic == "sbc" || Mnemonic == "eor" ||
+ Mnemonic == "neg" || Mnemonic == "vfm" || Mnemonic == "vfnm" ||
+ (!isThumb() &&
+ (Mnemonic == "smull" || Mnemonic == "mov" || Mnemonic == "mla" ||
+ Mnemonic == "smlal" || Mnemonic == "umlal" || Mnemonic == "umull"));
+
+ if (Mnemonic == "bkpt" || Mnemonic == "cbnz" || Mnemonic == "setend" ||
+ Mnemonic == "cps" || Mnemonic == "it" || Mnemonic == "cbz" ||
+ Mnemonic == "trap" || Mnemonic == "hlt" || Mnemonic == "udf" ||
+ Mnemonic.startswith("crc32") || Mnemonic.startswith("cps") ||
+ Mnemonic.startswith("vsel") || Mnemonic == "vmaxnm" ||
+ Mnemonic == "vminnm" || Mnemonic == "vcvta" || Mnemonic == "vcvtn" ||
+ Mnemonic == "vcvtp" || Mnemonic == "vcvtm" || Mnemonic == "vrinta" ||
+ Mnemonic == "vrintn" || Mnemonic == "vrintp" || Mnemonic == "vrintm" ||
+ Mnemonic.startswith("aes") || Mnemonic == "hvc" || Mnemonic == "setpan" ||
+ Mnemonic.startswith("sha1") || Mnemonic.startswith("sha256") ||
+ (FullInst.startswith("vmull") && FullInst.endswith(".p64"))) {
+ // These mnemonics are never predicable
+ CanAcceptPredicationCode = false;
+ } else if (!isThumb()) {
+ // Some instructions are only predicable in Thumb mode
+ CanAcceptPredicationCode =
+ Mnemonic != "cdp2" && Mnemonic != "clrex" && Mnemonic != "mcr2" &&
+ Mnemonic != "mcrr2" && Mnemonic != "mrc2" && Mnemonic != "mrrc2" &&
+ Mnemonic != "dmb" && Mnemonic != "dsb" && Mnemonic != "isb" &&
+ Mnemonic != "pld" && Mnemonic != "pli" && Mnemonic != "pldw" &&
+ Mnemonic != "ldc2" && Mnemonic != "ldc2l" && Mnemonic != "stc2" &&
+ Mnemonic != "stc2l" && !Mnemonic.startswith("rfe") &&
+ !Mnemonic.startswith("srs");
+ } else if (isThumbOne()) {
+ if (hasV6MOps())
+ CanAcceptPredicationCode = Mnemonic != "movs";
+ else
+ CanAcceptPredicationCode = Mnemonic != "nop" && Mnemonic != "movs";
+ } else
+ CanAcceptPredicationCode = true;
+}
+
+// \brief Some Thumb instructions have two operand forms that are not
+// available as three operand, convert to two operand form if possible.
+//
+// FIXME: We would really like to be able to tablegen'erate this.
+void ARMAsmParser::tryConvertingToTwoOperandForm(StringRef Mnemonic,
+ bool CarrySetting,
+ OperandVector &Operands) {
+ if (Operands.size() != 6)
+ return;
+
+ const auto &Op3 = static_cast<ARMOperand &>(*Operands[3]);
+ auto &Op4 = static_cast<ARMOperand &>(*Operands[4]);
+ if (!Op3.isReg() || !Op4.isReg())
+ return;
+
+ auto Op3Reg = Op3.getReg();
+ auto Op4Reg = Op4.getReg();
+
+ // For most Thumb2 cases we just generate the 3 operand form and reduce
+ // it in processInstruction(), but the 3 operand form of ADD (t2ADDrr)
+ // won't accept SP or PC so we do the transformation here taking care
+ // with immediate range in the 'add sp, sp #imm' case.
+ auto &Op5 = static_cast<ARMOperand &>(*Operands[5]);
+ if (isThumbTwo()) {
+ if (Mnemonic != "add")
+ return;
+ bool TryTransform = Op3Reg == ARM::PC || Op4Reg == ARM::PC ||
+ (Op5.isReg() && Op5.getReg() == ARM::PC);
+ if (!TryTransform) {
+ TryTransform = (Op3Reg == ARM::SP || Op4Reg == ARM::SP ||
+ (Op5.isReg() && Op5.getReg() == ARM::SP)) &&
+ !(Op3Reg == ARM::SP && Op4Reg == ARM::SP &&
+ Op5.isImm() && !Op5.isImm0_508s4());
+ }
+ if (!TryTransform)
+ return;
+ } else if (!isThumbOne())
+ return;
+
+ if (!(Mnemonic == "add" || Mnemonic == "sub" || Mnemonic == "and" ||
+ Mnemonic == "eor" || Mnemonic == "lsl" || Mnemonic == "lsr" ||
+ Mnemonic == "asr" || Mnemonic == "adc" || Mnemonic == "sbc" ||
+ Mnemonic == "ror" || Mnemonic == "orr" || Mnemonic == "bic"))
+ return;
+
+ // If first 2 operands of a 3 operand instruction are the same
+ // then transform to 2 operand version of the same instruction
+ // e.g. 'adds r0, r0, #1' transforms to 'adds r0, #1'
+ bool Transform = Op3Reg == Op4Reg;
+
+ // For communtative operations, we might be able to transform if we swap
+ // Op4 and Op5. The 'ADD Rdm, SP, Rdm' form is already handled specially
+ // as tADDrsp.
+ const ARMOperand *LastOp = &Op5;
+ bool Swap = false;
+ if (!Transform && Op5.isReg() && Op3Reg == Op5.getReg() &&
+ ((Mnemonic == "add" && Op4Reg != ARM::SP) ||
+ Mnemonic == "and" || Mnemonic == "eor" ||
+ Mnemonic == "adc" || Mnemonic == "orr")) {
+ Swap = true;
+ LastOp = &Op4;
+ Transform = true;
+ }
+
+ // If both registers are the same then remove one of them from
+ // the operand list, with certain exceptions.
+ if (Transform) {
+ // Don't transform 'adds Rd, Rd, Rm' or 'sub{s} Rd, Rd, Rm' because the
+ // 2 operand forms don't exist.
+ if (((Mnemonic == "add" && CarrySetting) || Mnemonic == "sub") &&
+ LastOp->isReg())
+ Transform = false;
+
+ // Don't transform 'add/sub{s} Rd, Rd, #imm' if the immediate fits into
+ // 3-bits because the ARMARM says not to.
+ if ((Mnemonic == "add" || Mnemonic == "sub") && LastOp->isImm0_7())
+ Transform = false;
+ }
+
+ if (Transform) {
+ if (Swap)
+ std::swap(Op4, Op5);
+ Operands.erase(Operands.begin() + 3);
+ }
+}
+
+bool ARMAsmParser::shouldOmitCCOutOperand(StringRef Mnemonic,
+ OperandVector &Operands) {
+ // FIXME: This is all horribly hacky. We really need a better way to deal
+ // with optional operands like this in the matcher table.
+
+ // The 'mov' mnemonic is special. One variant has a cc_out operand, while
+ // another does not. Specifically, the MOVW instruction does not. So we
+ // special case it here and remove the defaulted (non-setting) cc_out
+ // operand if that's the instruction we're trying to match.
+ //
+ // We do this as post-processing of the explicit operands rather than just
+ // conditionally adding the cc_out in the first place because we need
+ // to check the type of the parsed immediate operand.
+ if (Mnemonic == "mov" && Operands.size() > 4 && !isThumb() &&
+ !static_cast<ARMOperand &>(*Operands[4]).isModImm() &&
+ static_cast<ARMOperand &>(*Operands[4]).isImm0_65535Expr() &&
+ static_cast<ARMOperand &>(*Operands[1]).getReg() == 0)
+ return true;
+
+ // Register-register 'add' for thumb does not have a cc_out operand
+ // when there are only two register operands.
+ if (isThumb() && Mnemonic == "add" && Operands.size() == 5 &&
+ static_cast<ARMOperand &>(*Operands[3]).isReg() &&
+ static_cast<ARMOperand &>(*Operands[4]).isReg() &&
+ static_cast<ARMOperand &>(*Operands[1]).getReg() == 0)
+ return true;
+ // Register-register 'add' for thumb does not have a cc_out operand
+ // when it's an ADD Rdm, SP, {Rdm|#imm0_255} instruction. We do
+ // have to check the immediate range here since Thumb2 has a variant
+ // that can handle a different range and has a cc_out operand.
+ if (((isThumb() && Mnemonic == "add") ||
+ (isThumbTwo() && Mnemonic == "sub")) &&
+ Operands.size() == 6 && static_cast<ARMOperand &>(*Operands[3]).isReg() &&
+ static_cast<ARMOperand &>(*Operands[4]).isReg() &&
+ static_cast<ARMOperand &>(*Operands[4]).getReg() == ARM::SP &&
+ static_cast<ARMOperand &>(*Operands[1]).getReg() == 0 &&
+ ((Mnemonic == "add" && static_cast<ARMOperand &>(*Operands[5]).isReg()) ||
+ static_cast<ARMOperand &>(*Operands[5]).isImm0_1020s4()))
+ return true;
+ // For Thumb2, add/sub immediate does not have a cc_out operand for the
+ // imm0_4095 variant. That's the least-preferred variant when
+ // selecting via the generic "add" mnemonic, so to know that we
+ // should remove the cc_out operand, we have to explicitly check that
+ // it's not one of the other variants. Ugh.
+ if (isThumbTwo() && (Mnemonic == "add" || Mnemonic == "sub") &&
+ Operands.size() == 6 && static_cast<ARMOperand &>(*Operands[3]).isReg() &&
+ static_cast<ARMOperand &>(*Operands[4]).isReg() &&
+ static_cast<ARMOperand &>(*Operands[5]).isImm()) {
+ // Nest conditions rather than one big 'if' statement for readability.
+ //
+ // If both registers are low, we're in an IT block, and the immediate is
+ // in range, we should use encoding T1 instead, which has a cc_out.
+ if (inITBlock() &&
+ isARMLowRegister(static_cast<ARMOperand &>(*Operands[3]).getReg()) &&
+ isARMLowRegister(static_cast<ARMOperand &>(*Operands[4]).getReg()) &&
+ static_cast<ARMOperand &>(*Operands[5]).isImm0_7())
+ return false;
+ // Check against T3. If the second register is the PC, this is an
+ // alternate form of ADR, which uses encoding T4, so check for that too.
+ if (static_cast<ARMOperand &>(*Operands[4]).getReg() != ARM::PC &&
+ static_cast<ARMOperand &>(*Operands[5]).isT2SOImm())
+ return false;
+
+ // Otherwise, we use encoding T4, which does not have a cc_out
+ // operand.
+ return true;
+ }
+
+ // The thumb2 multiply instruction doesn't have a CCOut register, so
+ // if we have a "mul" mnemonic in Thumb mode, check if we'll be able to
+ // use the 16-bit encoding or not.
+ if (isThumbTwo() && Mnemonic == "mul" && Operands.size() == 6 &&
+ static_cast<ARMOperand &>(*Operands[1]).getReg() == 0 &&
+ static_cast<ARMOperand &>(*Operands[3]).isReg() &&
+ static_cast<ARMOperand &>(*Operands[4]).isReg() &&
+ static_cast<ARMOperand &>(*Operands[5]).isReg() &&
+ // If the registers aren't low regs, the destination reg isn't the
+ // same as one of the source regs, or the cc_out operand is zero
+ // outside of an IT block, we have to use the 32-bit encoding, so
+ // remove the cc_out operand.
+ (!isARMLowRegister(static_cast<ARMOperand &>(*Operands[3]).getReg()) ||
+ !isARMLowRegister(static_cast<ARMOperand &>(*Operands[4]).getReg()) ||
+ !isARMLowRegister(static_cast<ARMOperand &>(*Operands[5]).getReg()) ||
+ !inITBlock() || (static_cast<ARMOperand &>(*Operands[3]).getReg() !=
+ static_cast<ARMOperand &>(*Operands[5]).getReg() &&
+ static_cast<ARMOperand &>(*Operands[3]).getReg() !=
+ static_cast<ARMOperand &>(*Operands[4]).getReg())))
+ return true;
+
+ // Also check the 'mul' syntax variant that doesn't specify an explicit
+ // destination register.
+ if (isThumbTwo() && Mnemonic == "mul" && Operands.size() == 5 &&
+ static_cast<ARMOperand &>(*Operands[1]).getReg() == 0 &&
+ static_cast<ARMOperand &>(*Operands[3]).isReg() &&
+ static_cast<ARMOperand &>(*Operands[4]).isReg() &&
+ // If the registers aren't low regs or the cc_out operand is zero
+ // outside of an IT block, we have to use the 32-bit encoding, so
+ // remove the cc_out operand.
+ (!isARMLowRegister(static_cast<ARMOperand &>(*Operands[3]).getReg()) ||
+ !isARMLowRegister(static_cast<ARMOperand &>(*Operands[4]).getReg()) ||
+ !inITBlock()))
+ return true;
+
+
+
+ // Register-register 'add/sub' for thumb does not have a cc_out operand
+ // when it's an ADD/SUB SP, #imm. Be lenient on count since there's also
+ // the "add/sub SP, SP, #imm" version. If the follow-up operands aren't
+ // right, this will result in better diagnostics (which operand is off)
+ // anyway.
+ if (isThumb() && (Mnemonic == "add" || Mnemonic == "sub") &&
+ (Operands.size() == 5 || Operands.size() == 6) &&
+ static_cast<ARMOperand &>(*Operands[3]).isReg() &&
+ static_cast<ARMOperand &>(*Operands[3]).getReg() == ARM::SP &&
+ static_cast<ARMOperand &>(*Operands[1]).getReg() == 0 &&
+ (static_cast<ARMOperand &>(*Operands[4]).isImm() ||
+ (Operands.size() == 6 &&
+ static_cast<ARMOperand &>(*Operands[5]).isImm())))
+ return true;
+
+ return false;
+}
+
+bool ARMAsmParser::shouldOmitPredicateOperand(StringRef Mnemonic,
+ OperandVector &Operands) {
+ // VRINT{Z, R, X} have a predicate operand in VFP, but not in NEON
+ unsigned RegIdx = 3;
+ if ((Mnemonic == "vrintz" || Mnemonic == "vrintx" || Mnemonic == "vrintr") &&
+ (static_cast<ARMOperand &>(*Operands[2]).getToken() == ".f32" ||
+ static_cast<ARMOperand &>(*Operands[2]).getToken() == ".f16")) {
+ if (static_cast<ARMOperand &>(*Operands[3]).isToken() &&
+ (static_cast<ARMOperand &>(*Operands[3]).getToken() == ".f32" ||
+ static_cast<ARMOperand &>(*Operands[3]).getToken() == ".f16"))
+ RegIdx = 4;
+
+ if (static_cast<ARMOperand &>(*Operands[RegIdx]).isReg() &&
+ (ARMMCRegisterClasses[ARM::DPRRegClassID].contains(
+ static_cast<ARMOperand &>(*Operands[RegIdx]).getReg()) ||
+ ARMMCRegisterClasses[ARM::QPRRegClassID].contains(
+ static_cast<ARMOperand &>(*Operands[RegIdx]).getReg())))
+ return true;
+ }
+ return false;
+}
+
+static bool isDataTypeToken(StringRef Tok) {
+ return Tok == ".8" || Tok == ".16" || Tok == ".32" || Tok == ".64" ||
+ Tok == ".i8" || Tok == ".i16" || Tok == ".i32" || Tok == ".i64" ||
+ Tok == ".u8" || Tok == ".u16" || Tok == ".u32" || Tok == ".u64" ||
+ Tok == ".s8" || Tok == ".s16" || Tok == ".s32" || Tok == ".s64" ||
+ Tok == ".p8" || Tok == ".p16" || Tok == ".f32" || Tok == ".f64" ||
+ Tok == ".f" || Tok == ".d";
+}
+
+// FIXME: This bit should probably be handled via an explicit match class
+// in the .td files that matches the suffix instead of having it be
+// a literal string token the way it is now.
+static bool doesIgnoreDataTypeSuffix(StringRef Mnemonic, StringRef DT) {
+ return Mnemonic.startswith("vldm") || Mnemonic.startswith("vstm");
+}
+static void applyMnemonicAliases(StringRef &Mnemonic, uint64_t Features,
+ unsigned VariantID);
+
+static bool RequiresVFPRegListValidation(StringRef Inst,
+ bool &AcceptSinglePrecisionOnly,
+ bool &AcceptDoublePrecisionOnly) {
+ if (Inst.size() < 7)
+ return false;
+
+ if (Inst.startswith("fldm") || Inst.startswith("fstm")) {
+ StringRef AddressingMode = Inst.substr(4, 2);
+ if (AddressingMode == "ia" || AddressingMode == "db" ||
+ AddressingMode == "ea" || AddressingMode == "fd") {
+ AcceptSinglePrecisionOnly = Inst[6] == 's';
+ AcceptDoublePrecisionOnly = Inst[6] == 'd' || Inst[6] == 'x';
+ return true;
+ }
+ }
+
+ return false;
+}
+
+/// Parse an arm instruction mnemonic followed by its operands.
+bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
+ SMLoc NameLoc, OperandVector &Operands) {
+ MCAsmParser &Parser = getParser();
+ // FIXME: Can this be done via tablegen in some fashion?
+ bool RequireVFPRegisterListCheck;
+ bool AcceptSinglePrecisionOnly;
+ bool AcceptDoublePrecisionOnly;
+ RequireVFPRegisterListCheck =
+ RequiresVFPRegListValidation(Name, AcceptSinglePrecisionOnly,
+ AcceptDoublePrecisionOnly);
+
+ // Apply mnemonic aliases before doing anything else, as the destination
+ // mnemonic may include suffices and we want to handle them normally.
+ // The generic tblgen'erated code does this later, at the start of
+ // MatchInstructionImpl(), but that's too late for aliases that include
+ // any sort of suffix.
+ uint64_t AvailableFeatures = getAvailableFeatures();
+ unsigned AssemblerDialect = getParser().getAssemblerDialect();
+ applyMnemonicAliases(Name, AvailableFeatures, AssemblerDialect);
+
+ // First check for the ARM-specific .req directive.
+ if (Parser.getTok().is(AsmToken::Identifier) &&
+ Parser.getTok().getIdentifier() == ".req") {
+ parseDirectiveReq(Name, NameLoc);
+ // We always return 'error' for this, as we're done with this
+ // statement and don't need to match the 'instruction."
+ return true;
+ }
+
+ // Create the leading tokens for the mnemonic, split by '.' characters.
+ size_t Start = 0, Next = Name.find('.');
+ StringRef Mnemonic = Name.slice(Start, Next);
+
+ // Split out the predication code and carry setting flag from the mnemonic.
+ unsigned PredicationCode;
+ unsigned ProcessorIMod;
+ bool CarrySetting;
+ StringRef ITMask;
+ Mnemonic = splitMnemonic(Mnemonic, PredicationCode, CarrySetting,
+ ProcessorIMod, ITMask);
+
+ // In Thumb1, only the branch (B) instruction can be predicated.
+ if (isThumbOne() && PredicationCode != ARMCC::AL && Mnemonic != "b") {
+ Parser.eatToEndOfStatement();
+ return Error(NameLoc, "conditional execution not supported in Thumb1");
+ }
+
+ Operands.push_back(ARMOperand::CreateToken(Mnemonic, NameLoc));
+
+ // Handle the IT instruction ITMask. Convert it to a bitmask. This
+ // is the mask as it will be for the IT encoding if the conditional
+ // encoding has a '1' as it's bit0 (i.e. 't' ==> '1'). In the case
+ // where the conditional bit0 is zero, the instruction post-processing
+ // will adjust the mask accordingly.
+ if (Mnemonic == "it") {
+ SMLoc Loc = SMLoc::getFromPointer(NameLoc.getPointer() + 2);
+ if (ITMask.size() > 3) {
+ Parser.eatToEndOfStatement();
+ return Error(Loc, "too many conditions on IT instruction");
+ }
+ unsigned Mask = 8;
+ for (unsigned i = ITMask.size(); i != 0; --i) {
+ char pos = ITMask[i - 1];
+ if (pos != 't' && pos != 'e') {
+ Parser.eatToEndOfStatement();
+ return Error(Loc, "illegal IT block condition mask '" + ITMask + "'");
+ }
+ Mask >>= 1;
+ if (ITMask[i - 1] == 't')
+ Mask |= 8;
+ }
+ Operands.push_back(ARMOperand::CreateITMask(Mask, Loc));
+ }
+
+ // FIXME: This is all a pretty gross hack. We should automatically handle
+ // optional operands like this via tblgen.
+
+ // Next, add the CCOut and ConditionCode operands, if needed.
+ //
+ // For mnemonics which can ever incorporate a carry setting bit or predication
+ // code, our matching model involves us always generating CCOut and
+ // ConditionCode operands to match the mnemonic "as written" and then we let
+ // the matcher deal with finding the right instruction or generating an
+ // appropriate error.
+ bool CanAcceptCarrySet, CanAcceptPredicationCode;
+ getMnemonicAcceptInfo(Mnemonic, Name, CanAcceptCarrySet, CanAcceptPredicationCode);
+
+ // If we had a carry-set on an instruction that can't do that, issue an
+ // error.
+ if (!CanAcceptCarrySet && CarrySetting) {
+ Parser.eatToEndOfStatement();
+ return Error(NameLoc, "instruction '" + Mnemonic +
+ "' can not set flags, but 's' suffix specified");
+ }
+ // If we had a predication code on an instruction that can't do that, issue an
+ // error.
+ if (!CanAcceptPredicationCode && PredicationCode != ARMCC::AL) {
+ Parser.eatToEndOfStatement();
+ return Error(NameLoc, "instruction '" + Mnemonic +
+ "' is not predicable, but condition code specified");
+ }
+
+ // Add the carry setting operand, if necessary.
+ if (CanAcceptCarrySet) {
+ SMLoc Loc = SMLoc::getFromPointer(NameLoc.getPointer() + Mnemonic.size());
+ Operands.push_back(ARMOperand::CreateCCOut(CarrySetting ? ARM::CPSR : 0,
+ Loc));
+ }
+
+ // Add the predication code operand, if necessary.
+ if (CanAcceptPredicationCode) {
+ SMLoc Loc = SMLoc::getFromPointer(NameLoc.getPointer() + Mnemonic.size() +
+ CarrySetting);
+ Operands.push_back(ARMOperand::CreateCondCode(
+ ARMCC::CondCodes(PredicationCode), Loc));
+ }
+
+ // Add the processor imod operand, if necessary.
+ if (ProcessorIMod) {
+ Operands.push_back(ARMOperand::CreateImm(
+ MCConstantExpr::create(ProcessorIMod, getContext()),
+ NameLoc, NameLoc));
+ } else if (Mnemonic == "cps" && isMClass()) {
+ return Error(NameLoc, "instruction 'cps' requires effect for M-class");
+ }
+
+ // Add the remaining tokens in the mnemonic.
+ while (Next != StringRef::npos) {
+ Start = Next;
+ Next = Name.find('.', Start + 1);
+ StringRef ExtraToken = Name.slice(Start, Next);
+
+ // Some NEON instructions have an optional datatype suffix that is
+ // completely ignored. Check for that.
+ if (isDataTypeToken(ExtraToken) &&
+ doesIgnoreDataTypeSuffix(Mnemonic, ExtraToken))
+ continue;
+
+ // For for ARM mode generate an error if the .n qualifier is used.
+ if (ExtraToken == ".n" && !isThumb()) {
+ SMLoc Loc = SMLoc::getFromPointer(NameLoc.getPointer() + Start);
+ Parser.eatToEndOfStatement();
+ return Error(Loc, "instruction with .n (narrow) qualifier not allowed in "
+ "arm mode");
+ }
+
+ // The .n qualifier is always discarded as that is what the tables
+ // and matcher expect. In ARM mode the .w qualifier has no effect,
+ // so discard it to avoid errors that can be caused by the matcher.
+ if (ExtraToken != ".n" && (isThumb() || ExtraToken != ".w")) {
+ SMLoc Loc = SMLoc::getFromPointer(NameLoc.getPointer() + Start);
+ Operands.push_back(ARMOperand::CreateToken(ExtraToken, Loc));
+ }
+ }
+
+ // Read the remaining operands.
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ // Read the first operand.
+ if (parseOperand(Operands, Mnemonic)) {
+ Parser.eatToEndOfStatement();
+ return true;
+ }
+
+ while (getLexer().is(AsmToken::Comma)) {
+ Parser.Lex(); // Eat the comma.
+
+ // Parse and remember the operand.
+ if (parseOperand(Operands, Mnemonic)) {
+ Parser.eatToEndOfStatement();
+ return true;
+ }
+ }
+ }
+
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ SMLoc Loc = getLexer().getLoc();
+ Parser.eatToEndOfStatement();
+ return Error(Loc, "unexpected token in argument list");
+ }
+
+ Parser.Lex(); // Consume the EndOfStatement
+
+ if (RequireVFPRegisterListCheck) {
+ ARMOperand &Op = static_cast<ARMOperand &>(*Operands.back());
+ if (AcceptSinglePrecisionOnly && !Op.isSPRRegList())
+ return Error(Op.getStartLoc(),
+ "VFP/Neon single precision register expected");
+ if (AcceptDoublePrecisionOnly && !Op.isDPRRegList())
+ return Error(Op.getStartLoc(),
+ "VFP/Neon double precision register expected");
+ }
+
+ tryConvertingToTwoOperandForm(Mnemonic, CarrySetting, Operands);
+
+ // Some instructions, mostly Thumb, have forms for the same mnemonic that
+ // do and don't have a cc_out optional-def operand. With some spot-checks
+ // of the operand list, we can figure out which variant we're trying to
+ // parse and adjust accordingly before actually matching. We shouldn't ever
+ // try to remove a cc_out operand that was explicitly set on the
+ // mnemonic, of course (CarrySetting == true). Reason number #317 the
+ // table driven matcher doesn't fit well with the ARM instruction set.
+ if (!CarrySetting && shouldOmitCCOutOperand(Mnemonic, Operands))
+ Operands.erase(Operands.begin() + 1);
+
+ // Some instructions have the same mnemonic, but don't always
+ // have a predicate. Distinguish them here and delete the
+ // predicate if needed.
+ if (shouldOmitPredicateOperand(Mnemonic, Operands))
+ Operands.erase(Operands.begin() + 1);
+
+ // ARM mode 'blx' need special handling, as the register operand version
+ // is predicable, but the label operand version is not. So, we can't rely
+ // on the Mnemonic based checking to correctly figure out when to put
+ // a k_CondCode operand in the list. If we're trying to match the label
+ // version, remove the k_CondCode operand here.
+ if (!isThumb() && Mnemonic == "blx" && Operands.size() == 3 &&
+ static_cast<ARMOperand &>(*Operands[2]).isImm())
+ Operands.erase(Operands.begin() + 1);
+
+ // Adjust operands of ldrexd/strexd to MCK_GPRPair.
+ // ldrexd/strexd require even/odd GPR pair. To enforce this constraint,
+ // a single GPRPair reg operand is used in the .td file to replace the two
+ // GPRs. However, when parsing from asm, the two GRPs cannot be automatically
+ // expressed as a GPRPair, so we have to manually merge them.
+ // FIXME: We would really like to be able to tablegen'erate this.
+ if (!isThumb() && Operands.size() > 4 &&
+ (Mnemonic == "ldrexd" || Mnemonic == "strexd" || Mnemonic == "ldaexd" ||
+ Mnemonic == "stlexd")) {
+ bool isLoad = (Mnemonic == "ldrexd" || Mnemonic == "ldaexd");
+ unsigned Idx = isLoad ? 2 : 3;
+ ARMOperand &Op1 = static_cast<ARMOperand &>(*Operands[Idx]);
+ ARMOperand &Op2 = static_cast<ARMOperand &>(*Operands[Idx + 1]);
+
+ const MCRegisterClass& MRC = MRI->getRegClass(ARM::GPRRegClassID);
+ // Adjust only if Op1 and Op2 are GPRs.
+ if (Op1.isReg() && Op2.isReg() && MRC.contains(Op1.getReg()) &&
+ MRC.contains(Op2.getReg())) {
+ unsigned Reg1 = Op1.getReg();
+ unsigned Reg2 = Op2.getReg();
+ unsigned Rt = MRI->getEncodingValue(Reg1);
+ unsigned Rt2 = MRI->getEncodingValue(Reg2);
+
+ // Rt2 must be Rt + 1 and Rt must be even.
+ if (Rt + 1 != Rt2 || (Rt & 1)) {
+ Error(Op2.getStartLoc(), isLoad
+ ? "destination operands must be sequential"
+ : "source operands must be sequential");
+ return true;
+ }
+ unsigned NewReg = MRI->getMatchingSuperReg(Reg1, ARM::gsub_0,
+ &(MRI->getRegClass(ARM::GPRPairRegClassID)));
+ Operands[Idx] =
+ ARMOperand::CreateReg(NewReg, Op1.getStartLoc(), Op2.getEndLoc());
+ Operands.erase(Operands.begin() + Idx + 1);
+ }
+ }
+
+ // GNU Assembler extension (compatibility)
+ if ((Mnemonic == "ldrd" || Mnemonic == "strd")) {
+ ARMOperand &Op2 = static_cast<ARMOperand &>(*Operands[2]);
+ ARMOperand &Op3 = static_cast<ARMOperand &>(*Operands[3]);
+ if (Op3.isMem()) {
+ assert(Op2.isReg() && "expected register argument");
+
+ unsigned SuperReg = MRI->getMatchingSuperReg(
+ Op2.getReg(), ARM::gsub_0, &MRI->getRegClass(ARM::GPRPairRegClassID));
+
+ assert(SuperReg && "expected register pair");
+
+ unsigned PairedReg = MRI->getSubReg(SuperReg, ARM::gsub_1);
+
+ Operands.insert(
+ Operands.begin() + 3,
+ ARMOperand::CreateReg(PairedReg, Op2.getStartLoc(), Op2.getEndLoc()));
+ }
+ }
+
+ // FIXME: As said above, this is all a pretty gross hack. This instruction
+ // does not fit with other "subs" and tblgen.
+ // Adjust operands of B9.3.19 SUBS PC, LR, #imm (Thumb2) system instruction
+ // so the Mnemonic is the original name "subs" and delete the predicate
+ // operand so it will match the table entry.
+ if (isThumbTwo() && Mnemonic == "sub" && Operands.size() == 6 &&
+ static_cast<ARMOperand &>(*Operands[3]).isReg() &&
+ static_cast<ARMOperand &>(*Operands[3]).getReg() == ARM::PC &&
+ static_cast<ARMOperand &>(*Operands[4]).isReg() &&
+ static_cast<ARMOperand &>(*Operands[4]).getReg() == ARM::LR &&
+ static_cast<ARMOperand &>(*Operands[5]).isImm()) {
+ Operands.front() = ARMOperand::CreateToken(Name, NameLoc);
+ Operands.erase(Operands.begin() + 1);
+ }
+ return false;
+}
+
+// Validate context-sensitive operand constraints.
+
+// return 'true' if register list contains non-low GPR registers,
+// 'false' otherwise. If Reg is in the register list or is HiReg, set
+// 'containsReg' to true.
+static bool checkLowRegisterList(const MCInst &Inst, unsigned OpNo,
+ unsigned Reg, unsigned HiReg,
+ bool &containsReg) {
+ containsReg = false;
+ for (unsigned i = OpNo; i < Inst.getNumOperands(); ++i) {
+ unsigned OpReg = Inst.getOperand(i).getReg();
+ if (OpReg == Reg)
+ containsReg = true;
+ // Anything other than a low register isn't legal here.
+ if (!isARMLowRegister(OpReg) && (!HiReg || OpReg != HiReg))
+ return true;
+ }
+ return false;
+}
+
+// Check if the specified regisgter is in the register list of the inst,
+// starting at the indicated operand number.
+static bool listContainsReg(const MCInst &Inst, unsigned OpNo, unsigned Reg) {
+ for (unsigned i = OpNo, e = Inst.getNumOperands(); i < e; ++i) {
+ unsigned OpReg = Inst.getOperand(i).getReg();
+ if (OpReg == Reg)
+ return true;
+ }
+ return false;
+}
+
+// Return true if instruction has the interesting property of being
+// allowed in IT blocks, but not being predicable.
+static bool instIsBreakpoint(const MCInst &Inst) {
+ return Inst.getOpcode() == ARM::tBKPT ||
+ Inst.getOpcode() == ARM::BKPT ||
+ Inst.getOpcode() == ARM::tHLT ||
+ Inst.getOpcode() == ARM::HLT;
+
+}
+
+bool ARMAsmParser::validatetLDMRegList(const MCInst &Inst,
+ const OperandVector &Operands,
+ unsigned ListNo, bool IsARPop) {
+ const ARMOperand &Op = static_cast<const ARMOperand &>(*Operands[ListNo]);
+ bool HasWritebackToken = Op.isToken() && Op.getToken() == "!";
+
+ bool ListContainsSP = listContainsReg(Inst, ListNo, ARM::SP);
+ bool ListContainsLR = listContainsReg(Inst, ListNo, ARM::LR);
+ bool ListContainsPC = listContainsReg(Inst, ListNo, ARM::PC);
+
+ if (!IsARPop && ListContainsSP)
+ return Error(Operands[ListNo + HasWritebackToken]->getStartLoc(),
+ "SP may not be in the register list");
+ else if (ListContainsPC && ListContainsLR)
+ return Error(Operands[ListNo + HasWritebackToken]->getStartLoc(),
+ "PC and LR may not be in the register list simultaneously");
+ else if (inITBlock() && !lastInITBlock() && ListContainsPC)
+ return Error(Operands[ListNo + HasWritebackToken]->getStartLoc(),
+ "instruction must be outside of IT block or the last "
+ "instruction in an IT block");
+ return false;
+}
+
+bool ARMAsmParser::validatetSTMRegList(const MCInst &Inst,
+ const OperandVector &Operands,
+ unsigned ListNo) {
+ const ARMOperand &Op = static_cast<const ARMOperand &>(*Operands[ListNo]);
+ bool HasWritebackToken = Op.isToken() && Op.getToken() == "!";
+
+ bool ListContainsSP = listContainsReg(Inst, ListNo, ARM::SP);
+ bool ListContainsPC = listContainsReg(Inst, ListNo, ARM::PC);
+
+ if (ListContainsSP && ListContainsPC)
+ return Error(Operands[ListNo + HasWritebackToken]->getStartLoc(),
+ "SP and PC may not be in the register list");
+ else if (ListContainsSP)
+ return Error(Operands[ListNo + HasWritebackToken]->getStartLoc(),
+ "SP may not be in the register list");
+ else if (ListContainsPC)
+ return Error(Operands[ListNo + HasWritebackToken]->getStartLoc(),
+ "PC may not be in the register list");
+ return false;
+}
+
+// FIXME: We would really like to be able to tablegen'erate this.
+bool ARMAsmParser::validateInstruction(MCInst &Inst,
+ const OperandVector &Operands) {
+ const MCInstrDesc &MCID = MII.get(Inst.getOpcode());
+ SMLoc Loc = Operands[0]->getStartLoc();
+
+ // Check the IT block state first.
+ // NOTE: BKPT and HLT instructions have the interesting property of being
+ // allowed in IT blocks, but not being predicable. They just always execute.
+ if (inITBlock() && !instIsBreakpoint(Inst)) {
+ unsigned Bit = 1;
+ if (ITState.FirstCond)
+ ITState.FirstCond = false;
+ else
+ Bit = (ITState.Mask >> (5 - ITState.CurPosition)) & 1;
+ // The instruction must be predicable.
+ if (!MCID.isPredicable())
+ return Error(Loc, "instructions in IT block must be predicable");
+ unsigned Cond = Inst.getOperand(MCID.findFirstPredOperandIdx()).getImm();
+ unsigned ITCond = Bit ? ITState.Cond :
+ ARMCC::getOppositeCondition(ITState.Cond);
+ if (Cond != ITCond) {
+ // Find the condition code Operand to get its SMLoc information.
+ SMLoc CondLoc;
+ for (unsigned I = 1; I < Operands.size(); ++I)
+ if (static_cast<ARMOperand &>(*Operands[I]).isCondCode())
+ CondLoc = Operands[I]->getStartLoc();
+ return Error(CondLoc, "incorrect condition in IT block; got '" +
+ StringRef(ARMCondCodeToString(ARMCC::CondCodes(Cond))) +
+ "', but expected '" +
+ ARMCondCodeToString(ARMCC::CondCodes(ITCond)) + "'");
+ }
+ // Check for non-'al' condition codes outside of the IT block.
+ } else if (isThumbTwo() && MCID.isPredicable() &&
+ Inst.getOperand(MCID.findFirstPredOperandIdx()).getImm() !=
+ ARMCC::AL && Inst.getOpcode() != ARM::tBcc &&
+ Inst.getOpcode() != ARM::t2Bcc)
+ return Error(Loc, "predicated instructions must be in IT block");
+
+ const unsigned Opcode = Inst.getOpcode();
+ switch (Opcode) {
+ case ARM::LDRD:
+ case ARM::LDRD_PRE:
+ case ARM::LDRD_POST: {
+ const unsigned RtReg = Inst.getOperand(0).getReg();
+
+ // Rt can't be R14.
+ if (RtReg == ARM::LR)
+ return Error(Operands[3]->getStartLoc(),
+ "Rt can't be R14");
+
+ const unsigned Rt = MRI->getEncodingValue(RtReg);
+ // Rt must be even-numbered.
+ if ((Rt & 1) == 1)
+ return Error(Operands[3]->getStartLoc(),
+ "Rt must be even-numbered");
+
+ // Rt2 must be Rt + 1.
+ const unsigned Rt2 = MRI->getEncodingValue(Inst.getOperand(1).getReg());
+ if (Rt2 != Rt + 1)
+ return Error(Operands[3]->getStartLoc(),
+ "destination operands must be sequential");
+
+ if (Opcode == ARM::LDRD_PRE || Opcode == ARM::LDRD_POST) {
+ const unsigned Rn = MRI->getEncodingValue(Inst.getOperand(3).getReg());
+ // For addressing modes with writeback, the base register needs to be
+ // different from the destination registers.
+ if (Rn == Rt || Rn == Rt2)
+ return Error(Operands[3]->getStartLoc(),
+ "base register needs to be different from destination "
+ "registers");
+ }
+
+ return false;
+ }
+ case ARM::t2LDRDi8:
+ case ARM::t2LDRD_PRE:
+ case ARM::t2LDRD_POST: {
+ // Rt2 must be different from Rt.
+ unsigned Rt = MRI->getEncodingValue(Inst.getOperand(0).getReg());
+ unsigned Rt2 = MRI->getEncodingValue(Inst.getOperand(1).getReg());
+ if (Rt2 == Rt)
+ return Error(Operands[3]->getStartLoc(),
+ "destination operands can't be identical");
+ return false;
+ }
+ case ARM::t2BXJ: {
+ const unsigned RmReg = Inst.getOperand(0).getReg();
+ // Rm = SP is no longer unpredictable in v8-A
+ if (RmReg == ARM::SP && !hasV8Ops())
+ return Error(Operands[2]->getStartLoc(),
+ "r13 (SP) is an unpredictable operand to BXJ");
+ return false;
+ }
+ case ARM::STRD: {
+ // Rt2 must be Rt + 1.
+ unsigned Rt = MRI->getEncodingValue(Inst.getOperand(0).getReg());
+ unsigned Rt2 = MRI->getEncodingValue(Inst.getOperand(1).getReg());
+ if (Rt2 != Rt + 1)
+ return Error(Operands[3]->getStartLoc(),
+ "source operands must be sequential");
+ return false;
+ }
+ case ARM::STRD_PRE:
+ case ARM::STRD_POST: {
+ // Rt2 must be Rt + 1.
+ unsigned Rt = MRI->getEncodingValue(Inst.getOperand(1).getReg());
+ unsigned Rt2 = MRI->getEncodingValue(Inst.getOperand(2).getReg());
+ if (Rt2 != Rt + 1)
+ return Error(Operands[3]->getStartLoc(),
+ "source operands must be sequential");
+ return false;
+ }
+ case ARM::STR_PRE_IMM:
+ case ARM::STR_PRE_REG:
+ case ARM::STR_POST_IMM:
+ case ARM::STR_POST_REG:
+ case ARM::STRH_PRE:
+ case ARM::STRH_POST:
+ case ARM::STRB_PRE_IMM:
+ case ARM::STRB_PRE_REG:
+ case ARM::STRB_POST_IMM:
+ case ARM::STRB_POST_REG: {
+ // Rt must be different from Rn.
+ const unsigned Rt = MRI->getEncodingValue(Inst.getOperand(1).getReg());
+ const unsigned Rn = MRI->getEncodingValue(Inst.getOperand(2).getReg());
+
+ if (Rt == Rn)
+ return Error(Operands[3]->getStartLoc(),
+ "source register and base register can't be identical");
+ return false;
+ }
+ case ARM::LDR_PRE_IMM:
+ case ARM::LDR_PRE_REG:
+ case ARM::LDR_POST_IMM:
+ case ARM::LDR_POST_REG:
+ case ARM::LDRH_PRE:
+ case ARM::LDRH_POST:
+ case ARM::LDRSH_PRE:
+ case ARM::LDRSH_POST:
+ case ARM::LDRB_PRE_IMM:
+ case ARM::LDRB_PRE_REG:
+ case ARM::LDRB_POST_IMM:
+ case ARM::LDRB_POST_REG:
+ case ARM::LDRSB_PRE:
+ case ARM::LDRSB_POST: {
+ // Rt must be different from Rn.
+ const unsigned Rt = MRI->getEncodingValue(Inst.getOperand(0).getReg());
+ const unsigned Rn = MRI->getEncodingValue(Inst.getOperand(2).getReg());
+
+ if (Rt == Rn)
+ return Error(Operands[3]->getStartLoc(),
+ "destination register and base register can't be identical");
+ return false;
+ }
+ case ARM::SBFX:
+ case ARM::UBFX: {
+ // Width must be in range [1, 32-lsb].
+ unsigned LSB = Inst.getOperand(2).getImm();
+ unsigned Widthm1 = Inst.getOperand(3).getImm();
+ if (Widthm1 >= 32 - LSB)
+ return Error(Operands[5]->getStartLoc(),
+ "bitfield width must be in range [1,32-lsb]");
+ return false;
+ }
+ // Notionally handles ARM::tLDMIA_UPD too.
+ case ARM::tLDMIA: {
+ // If we're parsing Thumb2, the .w variant is available and handles
+ // most cases that are normally illegal for a Thumb1 LDM instruction.
+ // We'll make the transformation in processInstruction() if necessary.
+ //
+ // Thumb LDM instructions are writeback iff the base register is not
+ // in the register list.
+ unsigned Rn = Inst.getOperand(0).getReg();
+ bool HasWritebackToken =
+ (static_cast<ARMOperand &>(*Operands[3]).isToken() &&
+ static_cast<ARMOperand &>(*Operands[3]).getToken() == "!");
+ bool ListContainsBase;
+ if (checkLowRegisterList(Inst, 3, Rn, 0, ListContainsBase) && !isThumbTwo())
+ return Error(Operands[3 + HasWritebackToken]->getStartLoc(),
+ "registers must be in range r0-r7");
+ // If we should have writeback, then there should be a '!' token.
+ if (!ListContainsBase && !HasWritebackToken && !isThumbTwo())
+ return Error(Operands[2]->getStartLoc(),
+ "writeback operator '!' expected");
+ // If we should not have writeback, there must not be a '!'. This is
+ // true even for the 32-bit wide encodings.
+ if (ListContainsBase && HasWritebackToken)
+ return Error(Operands[3]->getStartLoc(),
+ "writeback operator '!' not allowed when base register "
+ "in register list");
+
+ if (validatetLDMRegList(Inst, Operands, 3))
+ return true;
+ break;
+ }
+ case ARM::LDMIA_UPD:
+ case ARM::LDMDB_UPD:
+ case ARM::LDMIB_UPD:
+ case ARM::LDMDA_UPD:
+ // ARM variants loading and updating the same register are only officially
+ // UNPREDICTABLE on v7 upwards. Goodness knows what they did before.
+ if (!hasV7Ops())
+ break;
+ if (listContainsReg(Inst, 3, Inst.getOperand(0).getReg()))
+ return Error(Operands.back()->getStartLoc(),
+ "writeback register not allowed in register list");
+ break;
+ case ARM::t2LDMIA:
+ case ARM::t2LDMDB:
+ if (validatetLDMRegList(Inst, Operands, 3))
+ return true;
+ break;
+ case ARM::t2STMIA:
+ case ARM::t2STMDB:
+ if (validatetSTMRegList(Inst, Operands, 3))
+ return true;
+ break;
+ case ARM::t2LDMIA_UPD:
+ case ARM::t2LDMDB_UPD:
+ case ARM::t2STMIA_UPD:
+ case ARM::t2STMDB_UPD: {
+ if (listContainsReg(Inst, 3, Inst.getOperand(0).getReg()))
+ return Error(Operands.back()->getStartLoc(),
+ "writeback register not allowed in register list");
+
+ if (Opcode == ARM::t2LDMIA_UPD || Opcode == ARM::t2LDMDB_UPD) {
+ if (validatetLDMRegList(Inst, Operands, 3))
+ return true;
+ } else {
+ if (validatetSTMRegList(Inst, Operands, 3))
+ return true;
+ }
+ break;
+ }
+ case ARM::sysLDMIA_UPD:
+ case ARM::sysLDMDA_UPD:
+ case ARM::sysLDMDB_UPD:
+ case ARM::sysLDMIB_UPD:
+ if (!listContainsReg(Inst, 3, ARM::PC))
+ return Error(Operands[4]->getStartLoc(),
+ "writeback register only allowed on system LDM "
+ "if PC in register-list");
+ break;
+ case ARM::sysSTMIA_UPD:
+ case ARM::sysSTMDA_UPD:
+ case ARM::sysSTMDB_UPD:
+ case ARM::sysSTMIB_UPD:
+ return Error(Operands[2]->getStartLoc(),
+ "system STM cannot have writeback register");
+ case ARM::tMUL: {
+ // The second source operand must be the same register as the destination
+ // operand.
+ //
+ // In this case, we must directly check the parsed operands because the
+ // cvtThumbMultiply() function is written in such a way that it guarantees
+ // this first statement is always true for the new Inst. Essentially, the
+ // destination is unconditionally copied into the second source operand
+ // without checking to see if it matches what we actually parsed.
+ if (Operands.size() == 6 && (((ARMOperand &)*Operands[3]).getReg() !=
+ ((ARMOperand &)*Operands[5]).getReg()) &&
+ (((ARMOperand &)*Operands[3]).getReg() !=
+ ((ARMOperand &)*Operands[4]).getReg())) {
+ return Error(Operands[3]->getStartLoc(),
+ "destination register must match source register");
+ }
+ break;
+ }
+ // Like for ldm/stm, push and pop have hi-reg handling version in Thumb2,
+ // so only issue a diagnostic for thumb1. The instructions will be
+ // switched to the t2 encodings in processInstruction() if necessary.
+ case ARM::tPOP: {
+ bool ListContainsBase;
+ if (checkLowRegisterList(Inst, 2, 0, ARM::PC, ListContainsBase) &&
+ !isThumbTwo())
+ return Error(Operands[2]->getStartLoc(),
+ "registers must be in range r0-r7 or pc");
+ if (validatetLDMRegList(Inst, Operands, 2, !isMClass()))
+ return true;
+ break;
+ }
+ case ARM::tPUSH: {
+ bool ListContainsBase;
+ if (checkLowRegisterList(Inst, 2, 0, ARM::LR, ListContainsBase) &&
+ !isThumbTwo())
+ return Error(Operands[2]->getStartLoc(),
+ "registers must be in range r0-r7 or lr");
+ if (validatetSTMRegList(Inst, Operands, 2))
+ return true;
+ break;
+ }
+ case ARM::tSTMIA_UPD: {
+ bool ListContainsBase, InvalidLowList;
+ InvalidLowList = checkLowRegisterList(Inst, 4, Inst.getOperand(0).getReg(),
+ 0, ListContainsBase);
+ if (InvalidLowList && !isThumbTwo())
+ return Error(Operands[4]->getStartLoc(),
+ "registers must be in range r0-r7");
+
+ // This would be converted to a 32-bit stm, but that's not valid if the
+ // writeback register is in the list.
+ if (InvalidLowList && ListContainsBase)
+ return Error(Operands[4]->getStartLoc(),
+ "writeback operator '!' not allowed when base register "
+ "in register list");
+
+ if (validatetSTMRegList(Inst, Operands, 4))
+ return true;
+ break;
+ }
+ case ARM::tADDrSP: {
+ // If the non-SP source operand and the destination operand are not the
+ // same, we need thumb2 (for the wide encoding), or we have an error.
+ if (!isThumbTwo() &&
+ Inst.getOperand(0).getReg() != Inst.getOperand(2).getReg()) {
+ return Error(Operands[4]->getStartLoc(),
+ "source register must be the same as destination");
+ }
+ break;
+ }
+ // Final range checking for Thumb unconditional branch instructions.
+ case ARM::tB:
+ if (!(static_cast<ARMOperand &>(*Operands[2])).isSignedOffset<11, 1>())
+ return Error(Operands[2]->getStartLoc(), "branch target out of range");
+ break;
+ case ARM::t2B: {
+ int op = (Operands[2]->isImm()) ? 2 : 3;
+ if (!static_cast<ARMOperand &>(*Operands[op]).isSignedOffset<24, 1>())
+ return Error(Operands[op]->getStartLoc(), "branch target out of range");
+ break;
+ }
+ // Final range checking for Thumb conditional branch instructions.
+ case ARM::tBcc:
+ if (!static_cast<ARMOperand &>(*Operands[2]).isSignedOffset<8, 1>())
+ return Error(Operands[2]->getStartLoc(), "branch target out of range");
+ break;
+ case ARM::t2Bcc: {
+ int Op = (Operands[2]->isImm()) ? 2 : 3;
+ if (!static_cast<ARMOperand &>(*Operands[Op]).isSignedOffset<20, 1>())
+ return Error(Operands[Op]->getStartLoc(), "branch target out of range");
+ break;
+ }
+ case ARM::MOVi16:
+ case ARM::t2MOVi16:
+ case ARM::t2MOVTi16:
+ {
+ // We want to avoid misleadingly allowing something like "mov r0, <symbol>"
+ // especially when we turn it into a movw and the expression <symbol> does
+ // not have a :lower16: or :upper16 as part of the expression. We don't
+ // want the behavior of silently truncating, which can be unexpected and
+ // lead to bugs that are difficult to find since this is an easy mistake
+ // to make.
+ int i = (Operands[3]->isImm()) ? 3 : 4;
+ ARMOperand &Op = static_cast<ARMOperand &>(*Operands[i]);
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Op.getImm());
+ if (CE) break;
+ const MCExpr *E = dyn_cast<MCExpr>(Op.getImm());
+ if (!E) break;
+ const ARMMCExpr *ARM16Expr = dyn_cast<ARMMCExpr>(E);
+ if (!ARM16Expr || (ARM16Expr->getKind() != ARMMCExpr::VK_ARM_HI16 &&
+ ARM16Expr->getKind() != ARMMCExpr::VK_ARM_LO16))
+ return Error(
+ Op.getStartLoc(),
+ "immediate expression for mov requires :lower16: or :upper16");
+ break;
+ }
+ }
+
+ return false;
+}
+
+static unsigned getRealVSTOpcode(unsigned Opc, unsigned &Spacing) {
+ switch(Opc) {
+ default: llvm_unreachable("unexpected opcode!");
+ // VST1LN
+ case ARM::VST1LNdWB_fixed_Asm_8: Spacing = 1; return ARM::VST1LNd8_UPD;
+ case ARM::VST1LNdWB_fixed_Asm_16: Spacing = 1; return ARM::VST1LNd16_UPD;
+ case ARM::VST1LNdWB_fixed_Asm_32: Spacing = 1; return ARM::VST1LNd32_UPD;
+ case ARM::VST1LNdWB_register_Asm_8: Spacing = 1; return ARM::VST1LNd8_UPD;
+ case ARM::VST1LNdWB_register_Asm_16: Spacing = 1; return ARM::VST1LNd16_UPD;
+ case ARM::VST1LNdWB_register_Asm_32: Spacing = 1; return ARM::VST1LNd32_UPD;
+ case ARM::VST1LNdAsm_8: Spacing = 1; return ARM::VST1LNd8;
+ case ARM::VST1LNdAsm_16: Spacing = 1; return ARM::VST1LNd16;
+ case ARM::VST1LNdAsm_32: Spacing = 1; return ARM::VST1LNd32;
+
+ // VST2LN
+ case ARM::VST2LNdWB_fixed_Asm_8: Spacing = 1; return ARM::VST2LNd8_UPD;
+ case ARM::VST2LNdWB_fixed_Asm_16: Spacing = 1; return ARM::VST2LNd16_UPD;
+ case ARM::VST2LNdWB_fixed_Asm_32: Spacing = 1; return ARM::VST2LNd32_UPD;
+ case ARM::VST2LNqWB_fixed_Asm_16: Spacing = 2; return ARM::VST2LNq16_UPD;
+ case ARM::VST2LNqWB_fixed_Asm_32: Spacing = 2; return ARM::VST2LNq32_UPD;
+
+ case ARM::VST2LNdWB_register_Asm_8: Spacing = 1; return ARM::VST2LNd8_UPD;
+ case ARM::VST2LNdWB_register_Asm_16: Spacing = 1; return ARM::VST2LNd16_UPD;
+ case ARM::VST2LNdWB_register_Asm_32: Spacing = 1; return ARM::VST2LNd32_UPD;
+ case ARM::VST2LNqWB_register_Asm_16: Spacing = 2; return ARM::VST2LNq16_UPD;
+ case ARM::VST2LNqWB_register_Asm_32: Spacing = 2; return ARM::VST2LNq32_UPD;
+
+ case ARM::VST2LNdAsm_8: Spacing = 1; return ARM::VST2LNd8;
+ case ARM::VST2LNdAsm_16: Spacing = 1; return ARM::VST2LNd16;
+ case ARM::VST2LNdAsm_32: Spacing = 1; return ARM::VST2LNd32;
+ case ARM::VST2LNqAsm_16: Spacing = 2; return ARM::VST2LNq16;
+ case ARM::VST2LNqAsm_32: Spacing = 2; return ARM::VST2LNq32;
+
+ // VST3LN
+ case ARM::VST3LNdWB_fixed_Asm_8: Spacing = 1; return ARM::VST3LNd8_UPD;
+ case ARM::VST3LNdWB_fixed_Asm_16: Spacing = 1; return ARM::VST3LNd16_UPD;
+ case ARM::VST3LNdWB_fixed_Asm_32: Spacing = 1; return ARM::VST3LNd32_UPD;
+ case ARM::VST3LNqWB_fixed_Asm_16: Spacing = 1; return ARM::VST3LNq16_UPD;
+ case ARM::VST3LNqWB_fixed_Asm_32: Spacing = 2; return ARM::VST3LNq32_UPD;
+ case ARM::VST3LNdWB_register_Asm_8: Spacing = 1; return ARM::VST3LNd8_UPD;
+ case ARM::VST3LNdWB_register_Asm_16: Spacing = 1; return ARM::VST3LNd16_UPD;
+ case ARM::VST3LNdWB_register_Asm_32: Spacing = 1; return ARM::VST3LNd32_UPD;
+ case ARM::VST3LNqWB_register_Asm_16: Spacing = 2; return ARM::VST3LNq16_UPD;
+ case ARM::VST3LNqWB_register_Asm_32: Spacing = 2; return ARM::VST3LNq32_UPD;
+ case ARM::VST3LNdAsm_8: Spacing = 1; return ARM::VST3LNd8;
+ case ARM::VST3LNdAsm_16: Spacing = 1; return ARM::VST3LNd16;
+ case ARM::VST3LNdAsm_32: Spacing = 1; return ARM::VST3LNd32;
+ case ARM::VST3LNqAsm_16: Spacing = 2; return ARM::VST3LNq16;
+ case ARM::VST3LNqAsm_32: Spacing = 2; return ARM::VST3LNq32;
+
+ // VST3
+ case ARM::VST3dWB_fixed_Asm_8: Spacing = 1; return ARM::VST3d8_UPD;
+ case ARM::VST3dWB_fixed_Asm_16: Spacing = 1; return ARM::VST3d16_UPD;
+ case ARM::VST3dWB_fixed_Asm_32: Spacing = 1; return ARM::VST3d32_UPD;
+ case ARM::VST3qWB_fixed_Asm_8: Spacing = 2; return ARM::VST3q8_UPD;
+ case ARM::VST3qWB_fixed_Asm_16: Spacing = 2; return ARM::VST3q16_UPD;
+ case ARM::VST3qWB_fixed_Asm_32: Spacing = 2; return ARM::VST3q32_UPD;
+ case ARM::VST3dWB_register_Asm_8: Spacing = 1; return ARM::VST3d8_UPD;
+ case ARM::VST3dWB_register_Asm_16: Spacing = 1; return ARM::VST3d16_UPD;
+ case ARM::VST3dWB_register_Asm_32: Spacing = 1; return ARM::VST3d32_UPD;
+ case ARM::VST3qWB_register_Asm_8: Spacing = 2; return ARM::VST3q8_UPD;
+ case ARM::VST3qWB_register_Asm_16: Spacing = 2; return ARM::VST3q16_UPD;
+ case ARM::VST3qWB_register_Asm_32: Spacing = 2; return ARM::VST3q32_UPD;
+ case ARM::VST3dAsm_8: Spacing = 1; return ARM::VST3d8;
+ case ARM::VST3dAsm_16: Spacing = 1; return ARM::VST3d16;
+ case ARM::VST3dAsm_32: Spacing = 1; return ARM::VST3d32;
+ case ARM::VST3qAsm_8: Spacing = 2; return ARM::VST3q8;
+ case ARM::VST3qAsm_16: Spacing = 2; return ARM::VST3q16;
+ case ARM::VST3qAsm_32: Spacing = 2; return ARM::VST3q32;
+
+ // VST4LN
+ case ARM::VST4LNdWB_fixed_Asm_8: Spacing = 1; return ARM::VST4LNd8_UPD;
+ case ARM::VST4LNdWB_fixed_Asm_16: Spacing = 1; return ARM::VST4LNd16_UPD;
+ case ARM::VST4LNdWB_fixed_Asm_32: Spacing = 1; return ARM::VST4LNd32_UPD;
+ case ARM::VST4LNqWB_fixed_Asm_16: Spacing = 1; return ARM::VST4LNq16_UPD;
+ case ARM::VST4LNqWB_fixed_Asm_32: Spacing = 2; return ARM::VST4LNq32_UPD;
+ case ARM::VST4LNdWB_register_Asm_8: Spacing = 1; return ARM::VST4LNd8_UPD;
+ case ARM::VST4LNdWB_register_Asm_16: Spacing = 1; return ARM::VST4LNd16_UPD;
+ case ARM::VST4LNdWB_register_Asm_32: Spacing = 1; return ARM::VST4LNd32_UPD;
+ case ARM::VST4LNqWB_register_Asm_16: Spacing = 2; return ARM::VST4LNq16_UPD;
+ case ARM::VST4LNqWB_register_Asm_32: Spacing = 2; return ARM::VST4LNq32_UPD;
+ case ARM::VST4LNdAsm_8: Spacing = 1; return ARM::VST4LNd8;
+ case ARM::VST4LNdAsm_16: Spacing = 1; return ARM::VST4LNd16;
+ case ARM::VST4LNdAsm_32: Spacing = 1; return ARM::VST4LNd32;
+ case ARM::VST4LNqAsm_16: Spacing = 2; return ARM::VST4LNq16;
+ case ARM::VST4LNqAsm_32: Spacing = 2; return ARM::VST4LNq32;
+
+ // VST4
+ case ARM::VST4dWB_fixed_Asm_8: Spacing = 1; return ARM::VST4d8_UPD;
+ case ARM::VST4dWB_fixed_Asm_16: Spacing = 1; return ARM::VST4d16_UPD;
+ case ARM::VST4dWB_fixed_Asm_32: Spacing = 1; return ARM::VST4d32_UPD;
+ case ARM::VST4qWB_fixed_Asm_8: Spacing = 2; return ARM::VST4q8_UPD;
+ case ARM::VST4qWB_fixed_Asm_16: Spacing = 2; return ARM::VST4q16_UPD;
+ case ARM::VST4qWB_fixed_Asm_32: Spacing = 2; return ARM::VST4q32_UPD;
+ case ARM::VST4dWB_register_Asm_8: Spacing = 1; return ARM::VST4d8_UPD;
+ case ARM::VST4dWB_register_Asm_16: Spacing = 1; return ARM::VST4d16_UPD;
+ case ARM::VST4dWB_register_Asm_32: Spacing = 1; return ARM::VST4d32_UPD;
+ case ARM::VST4qWB_register_Asm_8: Spacing = 2; return ARM::VST4q8_UPD;
+ case ARM::VST4qWB_register_Asm_16: Spacing = 2; return ARM::VST4q16_UPD;
+ case ARM::VST4qWB_register_Asm_32: Spacing = 2; return ARM::VST4q32_UPD;
+ case ARM::VST4dAsm_8: Spacing = 1; return ARM::VST4d8;
+ case ARM::VST4dAsm_16: Spacing = 1; return ARM::VST4d16;
+ case ARM::VST4dAsm_32: Spacing = 1; return ARM::VST4d32;
+ case ARM::VST4qAsm_8: Spacing = 2; return ARM::VST4q8;
+ case ARM::VST4qAsm_16: Spacing = 2; return ARM::VST4q16;
+ case ARM::VST4qAsm_32: Spacing = 2; return ARM::VST4q32;
+ }
+}
+
+static unsigned getRealVLDOpcode(unsigned Opc, unsigned &Spacing) {
+ switch(Opc) {
+ default: llvm_unreachable("unexpected opcode!");
+ // VLD1LN
+ case ARM::VLD1LNdWB_fixed_Asm_8: Spacing = 1; return ARM::VLD1LNd8_UPD;
+ case ARM::VLD1LNdWB_fixed_Asm_16: Spacing = 1; return ARM::VLD1LNd16_UPD;
+ case ARM::VLD1LNdWB_fixed_Asm_32: Spacing = 1; return ARM::VLD1LNd32_UPD;
+ case ARM::VLD1LNdWB_register_Asm_8: Spacing = 1; return ARM::VLD1LNd8_UPD;
+ case ARM::VLD1LNdWB_register_Asm_16: Spacing = 1; return ARM::VLD1LNd16_UPD;
+ case ARM::VLD1LNdWB_register_Asm_32: Spacing = 1; return ARM::VLD1LNd32_UPD;
+ case ARM::VLD1LNdAsm_8: Spacing = 1; return ARM::VLD1LNd8;
+ case ARM::VLD1LNdAsm_16: Spacing = 1; return ARM::VLD1LNd16;
+ case ARM::VLD1LNdAsm_32: Spacing = 1; return ARM::VLD1LNd32;
+
+ // VLD2LN
+ case ARM::VLD2LNdWB_fixed_Asm_8: Spacing = 1; return ARM::VLD2LNd8_UPD;
+ case ARM::VLD2LNdWB_fixed_Asm_16: Spacing = 1; return ARM::VLD2LNd16_UPD;
+ case ARM::VLD2LNdWB_fixed_Asm_32: Spacing = 1; return ARM::VLD2LNd32_UPD;
+ case ARM::VLD2LNqWB_fixed_Asm_16: Spacing = 1; return ARM::VLD2LNq16_UPD;
+ case ARM::VLD2LNqWB_fixed_Asm_32: Spacing = 2; return ARM::VLD2LNq32_UPD;
+ case ARM::VLD2LNdWB_register_Asm_8: Spacing = 1; return ARM::VLD2LNd8_UPD;
+ case ARM::VLD2LNdWB_register_Asm_16: Spacing = 1; return ARM::VLD2LNd16_UPD;
+ case ARM::VLD2LNdWB_register_Asm_32: Spacing = 1; return ARM::VLD2LNd32_UPD;
+ case ARM::VLD2LNqWB_register_Asm_16: Spacing = 2; return ARM::VLD2LNq16_UPD;
+ case ARM::VLD2LNqWB_register_Asm_32: Spacing = 2; return ARM::VLD2LNq32_UPD;
+ case ARM::VLD2LNdAsm_8: Spacing = 1; return ARM::VLD2LNd8;
+ case ARM::VLD2LNdAsm_16: Spacing = 1; return ARM::VLD2LNd16;
+ case ARM::VLD2LNdAsm_32: Spacing = 1; return ARM::VLD2LNd32;
+ case ARM::VLD2LNqAsm_16: Spacing = 2; return ARM::VLD2LNq16;
+ case ARM::VLD2LNqAsm_32: Spacing = 2; return ARM::VLD2LNq32;
+
+ // VLD3DUP
+ case ARM::VLD3DUPdWB_fixed_Asm_8: Spacing = 1; return ARM::VLD3DUPd8_UPD;
+ case ARM::VLD3DUPdWB_fixed_Asm_16: Spacing = 1; return ARM::VLD3DUPd16_UPD;
+ case ARM::VLD3DUPdWB_fixed_Asm_32: Spacing = 1; return ARM::VLD3DUPd32_UPD;
+ case ARM::VLD3DUPqWB_fixed_Asm_8: Spacing = 1; return ARM::VLD3DUPq8_UPD;
+ case ARM::VLD3DUPqWB_fixed_Asm_16: Spacing = 2; return ARM::VLD3DUPq16_UPD;
+ case ARM::VLD3DUPqWB_fixed_Asm_32: Spacing = 2; return ARM::VLD3DUPq32_UPD;
+ case ARM::VLD3DUPdWB_register_Asm_8: Spacing = 1; return ARM::VLD3DUPd8_UPD;
+ case ARM::VLD3DUPdWB_register_Asm_16: Spacing = 1; return ARM::VLD3DUPd16_UPD;
+ case ARM::VLD3DUPdWB_register_Asm_32: Spacing = 1; return ARM::VLD3DUPd32_UPD;
+ case ARM::VLD3DUPqWB_register_Asm_8: Spacing = 2; return ARM::VLD3DUPq8_UPD;
+ case ARM::VLD3DUPqWB_register_Asm_16: Spacing = 2; return ARM::VLD3DUPq16_UPD;
+ case ARM::VLD3DUPqWB_register_Asm_32: Spacing = 2; return ARM::VLD3DUPq32_UPD;
+ case ARM::VLD3DUPdAsm_8: Spacing = 1; return ARM::VLD3DUPd8;
+ case ARM::VLD3DUPdAsm_16: Spacing = 1; return ARM::VLD3DUPd16;
+ case ARM::VLD3DUPdAsm_32: Spacing = 1; return ARM::VLD3DUPd32;
+ case ARM::VLD3DUPqAsm_8: Spacing = 2; return ARM::VLD3DUPq8;
+ case ARM::VLD3DUPqAsm_16: Spacing = 2; return ARM::VLD3DUPq16;
+ case ARM::VLD3DUPqAsm_32: Spacing = 2; return ARM::VLD3DUPq32;
+
+ // VLD3LN
+ case ARM::VLD3LNdWB_fixed_Asm_8: Spacing = 1; return ARM::VLD3LNd8_UPD;
+ case ARM::VLD3LNdWB_fixed_Asm_16: Spacing = 1; return ARM::VLD3LNd16_UPD;
+ case ARM::VLD3LNdWB_fixed_Asm_32: Spacing = 1; return ARM::VLD3LNd32_UPD;
+ case ARM::VLD3LNqWB_fixed_Asm_16: Spacing = 1; return ARM::VLD3LNq16_UPD;
+ case ARM::VLD3LNqWB_fixed_Asm_32: Spacing = 2; return ARM::VLD3LNq32_UPD;
+ case ARM::VLD3LNdWB_register_Asm_8: Spacing = 1; return ARM::VLD3LNd8_UPD;
+ case ARM::VLD3LNdWB_register_Asm_16: Spacing = 1; return ARM::VLD3LNd16_UPD;
+ case ARM::VLD3LNdWB_register_Asm_32: Spacing = 1; return ARM::VLD3LNd32_UPD;
+ case ARM::VLD3LNqWB_register_Asm_16: Spacing = 2; return ARM::VLD3LNq16_UPD;
+ case ARM::VLD3LNqWB_register_Asm_32: Spacing = 2; return ARM::VLD3LNq32_UPD;
+ case ARM::VLD3LNdAsm_8: Spacing = 1; return ARM::VLD3LNd8;
+ case ARM::VLD3LNdAsm_16: Spacing = 1; return ARM::VLD3LNd16;
+ case ARM::VLD3LNdAsm_32: Spacing = 1; return ARM::VLD3LNd32;
+ case ARM::VLD3LNqAsm_16: Spacing = 2; return ARM::VLD3LNq16;
+ case ARM::VLD3LNqAsm_32: Spacing = 2; return ARM::VLD3LNq32;
+
+ // VLD3
+ case ARM::VLD3dWB_fixed_Asm_8: Spacing = 1; return ARM::VLD3d8_UPD;
+ case ARM::VLD3dWB_fixed_Asm_16: Spacing = 1; return ARM::VLD3d16_UPD;
+ case ARM::VLD3dWB_fixed_Asm_32: Spacing = 1; return ARM::VLD3d32_UPD;
+ case ARM::VLD3qWB_fixed_Asm_8: Spacing = 2; return ARM::VLD3q8_UPD;
+ case ARM::VLD3qWB_fixed_Asm_16: Spacing = 2; return ARM::VLD3q16_UPD;
+ case ARM::VLD3qWB_fixed_Asm_32: Spacing = 2; return ARM::VLD3q32_UPD;
+ case ARM::VLD3dWB_register_Asm_8: Spacing = 1; return ARM::VLD3d8_UPD;
+ case ARM::VLD3dWB_register_Asm_16: Spacing = 1; return ARM::VLD3d16_UPD;
+ case ARM::VLD3dWB_register_Asm_32: Spacing = 1; return ARM::VLD3d32_UPD;
+ case ARM::VLD3qWB_register_Asm_8: Spacing = 2; return ARM::VLD3q8_UPD;
+ case ARM::VLD3qWB_register_Asm_16: Spacing = 2; return ARM::VLD3q16_UPD;
+ case ARM::VLD3qWB_register_Asm_32: Spacing = 2; return ARM::VLD3q32_UPD;
+ case ARM::VLD3dAsm_8: Spacing = 1; return ARM::VLD3d8;
+ case ARM::VLD3dAsm_16: Spacing = 1; return ARM::VLD3d16;
+ case ARM::VLD3dAsm_32: Spacing = 1; return ARM::VLD3d32;
+ case ARM::VLD3qAsm_8: Spacing = 2; return ARM::VLD3q8;
+ case ARM::VLD3qAsm_16: Spacing = 2; return ARM::VLD3q16;
+ case ARM::VLD3qAsm_32: Spacing = 2; return ARM::VLD3q32;
+
+ // VLD4LN
+ case ARM::VLD4LNdWB_fixed_Asm_8: Spacing = 1; return ARM::VLD4LNd8_UPD;
+ case ARM::VLD4LNdWB_fixed_Asm_16: Spacing = 1; return ARM::VLD4LNd16_UPD;
+ case ARM::VLD4LNdWB_fixed_Asm_32: Spacing = 1; return ARM::VLD4LNd32_UPD;
+ case ARM::VLD4LNqWB_fixed_Asm_16: Spacing = 2; return ARM::VLD4LNq16_UPD;
+ case ARM::VLD4LNqWB_fixed_Asm_32: Spacing = 2; return ARM::VLD4LNq32_UPD;
+ case ARM::VLD4LNdWB_register_Asm_8: Spacing = 1; return ARM::VLD4LNd8_UPD;
+ case ARM::VLD4LNdWB_register_Asm_16: Spacing = 1; return ARM::VLD4LNd16_UPD;
+ case ARM::VLD4LNdWB_register_Asm_32: Spacing = 1; return ARM::VLD4LNd32_UPD;
+ case ARM::VLD4LNqWB_register_Asm_16: Spacing = 2; return ARM::VLD4LNq16_UPD;
+ case ARM::VLD4LNqWB_register_Asm_32: Spacing = 2; return ARM::VLD4LNq32_UPD;
+ case ARM::VLD4LNdAsm_8: Spacing = 1; return ARM::VLD4LNd8;
+ case ARM::VLD4LNdAsm_16: Spacing = 1; return ARM::VLD4LNd16;
+ case ARM::VLD4LNdAsm_32: Spacing = 1; return ARM::VLD4LNd32;
+ case ARM::VLD4LNqAsm_16: Spacing = 2; return ARM::VLD4LNq16;
+ case ARM::VLD4LNqAsm_32: Spacing = 2; return ARM::VLD4LNq32;
+
+ // VLD4DUP
+ case ARM::VLD4DUPdWB_fixed_Asm_8: Spacing = 1; return ARM::VLD4DUPd8_UPD;
+ case ARM::VLD4DUPdWB_fixed_Asm_16: Spacing = 1; return ARM::VLD4DUPd16_UPD;
+ case ARM::VLD4DUPdWB_fixed_Asm_32: Spacing = 1; return ARM::VLD4DUPd32_UPD;
+ case ARM::VLD4DUPqWB_fixed_Asm_8: Spacing = 1; return ARM::VLD4DUPq8_UPD;
+ case ARM::VLD4DUPqWB_fixed_Asm_16: Spacing = 1; return ARM::VLD4DUPq16_UPD;
+ case ARM::VLD4DUPqWB_fixed_Asm_32: Spacing = 2; return ARM::VLD4DUPq32_UPD;
+ case ARM::VLD4DUPdWB_register_Asm_8: Spacing = 1; return ARM::VLD4DUPd8_UPD;
+ case ARM::VLD4DUPdWB_register_Asm_16: Spacing = 1; return ARM::VLD4DUPd16_UPD;
+ case ARM::VLD4DUPdWB_register_Asm_32: Spacing = 1; return ARM::VLD4DUPd32_UPD;
+ case ARM::VLD4DUPqWB_register_Asm_8: Spacing = 2; return ARM::VLD4DUPq8_UPD;
+ case ARM::VLD4DUPqWB_register_Asm_16: Spacing = 2; return ARM::VLD4DUPq16_UPD;
+ case ARM::VLD4DUPqWB_register_Asm_32: Spacing = 2; return ARM::VLD4DUPq32_UPD;
+ case ARM::VLD4DUPdAsm_8: Spacing = 1; return ARM::VLD4DUPd8;
+ case ARM::VLD4DUPdAsm_16: Spacing = 1; return ARM::VLD4DUPd16;
+ case ARM::VLD4DUPdAsm_32: Spacing = 1; return ARM::VLD4DUPd32;
+ case ARM::VLD4DUPqAsm_8: Spacing = 2; return ARM::VLD4DUPq8;
+ case ARM::VLD4DUPqAsm_16: Spacing = 2; return ARM::VLD4DUPq16;
+ case ARM::VLD4DUPqAsm_32: Spacing = 2; return ARM::VLD4DUPq32;
+
+ // VLD4
+ case ARM::VLD4dWB_fixed_Asm_8: Spacing = 1; return ARM::VLD4d8_UPD;
+ case ARM::VLD4dWB_fixed_Asm_16: Spacing = 1; return ARM::VLD4d16_UPD;
+ case ARM::VLD4dWB_fixed_Asm_32: Spacing = 1; return ARM::VLD4d32_UPD;
+ case ARM::VLD4qWB_fixed_Asm_8: Spacing = 2; return ARM::VLD4q8_UPD;
+ case ARM::VLD4qWB_fixed_Asm_16: Spacing = 2; return ARM::VLD4q16_UPD;
+ case ARM::VLD4qWB_fixed_Asm_32: Spacing = 2; return ARM::VLD4q32_UPD;
+ case ARM::VLD4dWB_register_Asm_8: Spacing = 1; return ARM::VLD4d8_UPD;
+ case ARM::VLD4dWB_register_Asm_16: Spacing = 1; return ARM::VLD4d16_UPD;
+ case ARM::VLD4dWB_register_Asm_32: Spacing = 1; return ARM::VLD4d32_UPD;
+ case ARM::VLD4qWB_register_Asm_8: Spacing = 2; return ARM::VLD4q8_UPD;
+ case ARM::VLD4qWB_register_Asm_16: Spacing = 2; return ARM::VLD4q16_UPD;
+ case ARM::VLD4qWB_register_Asm_32: Spacing = 2; return ARM::VLD4q32_UPD;
+ case ARM::VLD4dAsm_8: Spacing = 1; return ARM::VLD4d8;
+ case ARM::VLD4dAsm_16: Spacing = 1; return ARM::VLD4d16;
+ case ARM::VLD4dAsm_32: Spacing = 1; return ARM::VLD4d32;
+ case ARM::VLD4qAsm_8: Spacing = 2; return ARM::VLD4q8;
+ case ARM::VLD4qAsm_16: Spacing = 2; return ARM::VLD4q16;
+ case ARM::VLD4qAsm_32: Spacing = 2; return ARM::VLD4q32;
+ }
+}
+
+bool ARMAsmParser::processInstruction(MCInst &Inst,
+ const OperandVector &Operands,
+ MCStreamer &Out) {
+ switch (Inst.getOpcode()) {
+ // Alias for alternate form of 'ldr{,b}t Rt, [Rn], #imm' instruction.
+ case ARM::LDRT_POST:
+ case ARM::LDRBT_POST: {
+ const unsigned Opcode =
+ (Inst.getOpcode() == ARM::LDRT_POST) ? ARM::LDRT_POST_IMM
+ : ARM::LDRBT_POST_IMM;
+ MCInst TmpInst;
+ TmpInst.setOpcode(Opcode);
+ TmpInst.addOperand(Inst.getOperand(0));
+ TmpInst.addOperand(Inst.getOperand(1));
+ TmpInst.addOperand(Inst.getOperand(1));
+ TmpInst.addOperand(MCOperand::createReg(0));
+ TmpInst.addOperand(MCOperand::createImm(0));
+ TmpInst.addOperand(Inst.getOperand(2));
+ TmpInst.addOperand(Inst.getOperand(3));
+ Inst = TmpInst;
+ return true;
+ }
+ // Alias for alternate form of 'str{,b}t Rt, [Rn], #imm' instruction.
+ case ARM::STRT_POST:
+ case ARM::STRBT_POST: {
+ const unsigned Opcode =
+ (Inst.getOpcode() == ARM::STRT_POST) ? ARM::STRT_POST_IMM
+ : ARM::STRBT_POST_IMM;
+ MCInst TmpInst;
+ TmpInst.setOpcode(Opcode);
+ TmpInst.addOperand(Inst.getOperand(1));
+ TmpInst.addOperand(Inst.getOperand(0));
+ TmpInst.addOperand(Inst.getOperand(1));
+ TmpInst.addOperand(MCOperand::createReg(0));
+ TmpInst.addOperand(MCOperand::createImm(0));
+ TmpInst.addOperand(Inst.getOperand(2));
+ TmpInst.addOperand(Inst.getOperand(3));
+ Inst = TmpInst;
+ return true;
+ }
+ // Alias for alternate form of 'ADR Rd, #imm' instruction.
+ case ARM::ADDri: {
+ if (Inst.getOperand(1).getReg() != ARM::PC ||
+ Inst.getOperand(5).getReg() != 0 ||
+ !(Inst.getOperand(2).isExpr() || Inst.getOperand(2).isImm()))
+ return false;
+ MCInst TmpInst;
+ TmpInst.setOpcode(ARM::ADR);
+ TmpInst.addOperand(Inst.getOperand(0));
+ if (Inst.getOperand(2).isImm()) {
+ // Immediate (mod_imm) will be in its encoded form, we must unencode it
+ // before passing it to the ADR instruction.
+ unsigned Enc = Inst.getOperand(2).getImm();
+ TmpInst.addOperand(MCOperand::createImm(
+ ARM_AM::rotr32(Enc & 0xFF, (Enc & 0xF00) >> 7)));
+ } else {
+ // Turn PC-relative expression into absolute expression.
+ // Reading PC provides the start of the current instruction + 8 and
+ // the transform to adr is biased by that.
+ MCSymbol *Dot = getContext().createTempSymbol();
+ Out.EmitLabel(Dot);
+ const MCExpr *OpExpr = Inst.getOperand(2).getExpr();
+ const MCExpr *InstPC = MCSymbolRefExpr::create(Dot,
+ MCSymbolRefExpr::VK_None,
+ getContext());
+ const MCExpr *Const8 = MCConstantExpr::create(8, getContext());
+ const MCExpr *ReadPC = MCBinaryExpr::createAdd(InstPC, Const8,
+ getContext());
+ const MCExpr *FixupAddr = MCBinaryExpr::createAdd(ReadPC, OpExpr,
+ getContext());
+ TmpInst.addOperand(MCOperand::createExpr(FixupAddr));
+ }
+ TmpInst.addOperand(Inst.getOperand(3));
+ TmpInst.addOperand(Inst.getOperand(4));
+ Inst = TmpInst;
+ return true;
+ }
+ // Aliases for alternate PC+imm syntax of LDR instructions.
+ case ARM::t2LDRpcrel:
+ // Select the narrow version if the immediate will fit.
+ if (Inst.getOperand(1).getImm() > 0 &&
+ Inst.getOperand(1).getImm() <= 0xff &&
+ !(static_cast<ARMOperand &>(*Operands[2]).isToken() &&
+ static_cast<ARMOperand &>(*Operands[2]).getToken() == ".w"))
+ Inst.setOpcode(ARM::tLDRpci);
+ else
+ Inst.setOpcode(ARM::t2LDRpci);
+ return true;
+ case ARM::t2LDRBpcrel:
+ Inst.setOpcode(ARM::t2LDRBpci);
+ return true;
+ case ARM::t2LDRHpcrel:
+ Inst.setOpcode(ARM::t2LDRHpci);
+ return true;
+ case ARM::t2LDRSBpcrel:
+ Inst.setOpcode(ARM::t2LDRSBpci);
+ return true;
+ case ARM::t2LDRSHpcrel:
+ Inst.setOpcode(ARM::t2LDRSHpci);
+ return true;
+ // Handle NEON VST complex aliases.
+ case ARM::VST1LNdWB_register_Asm_8:
+ case ARM::VST1LNdWB_register_Asm_16:
+ case ARM::VST1LNdWB_register_Asm_32: {
+ MCInst TmpInst;
+ // Shuffle the operands around so the lane index operand is in the
+ // right place.
+ unsigned Spacing;
+ TmpInst.setOpcode(getRealVSTOpcode(Inst.getOpcode(), Spacing));
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn
+ TmpInst.addOperand(Inst.getOperand(3)); // alignment
+ TmpInst.addOperand(Inst.getOperand(4)); // Rm
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(Inst.getOperand(1)); // lane
+ TmpInst.addOperand(Inst.getOperand(5)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(6));
+ Inst = TmpInst;
+ return true;
+ }
+
+ case ARM::VST2LNdWB_register_Asm_8:
+ case ARM::VST2LNdWB_register_Asm_16:
+ case ARM::VST2LNdWB_register_Asm_32:
+ case ARM::VST2LNqWB_register_Asm_16:
+ case ARM::VST2LNqWB_register_Asm_32: {
+ MCInst TmpInst;
+ // Shuffle the operands around so the lane index operand is in the
+ // right place.
+ unsigned Spacing;
+ TmpInst.setOpcode(getRealVSTOpcode(Inst.getOpcode(), Spacing));
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn
+ TmpInst.addOperand(Inst.getOperand(3)); // alignment
+ TmpInst.addOperand(Inst.getOperand(4)); // Rm
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
+ Spacing));
+ TmpInst.addOperand(Inst.getOperand(1)); // lane
+ TmpInst.addOperand(Inst.getOperand(5)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(6));
+ Inst = TmpInst;
+ return true;
+ }
+
+ case ARM::VST3LNdWB_register_Asm_8:
+ case ARM::VST3LNdWB_register_Asm_16:
+ case ARM::VST3LNdWB_register_Asm_32:
+ case ARM::VST3LNqWB_register_Asm_16:
+ case ARM::VST3LNqWB_register_Asm_32: {
+ MCInst TmpInst;
+ // Shuffle the operands around so the lane index operand is in the
+ // right place.
+ unsigned Spacing;
+ TmpInst.setOpcode(getRealVSTOpcode(Inst.getOpcode(), Spacing));
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn
+ TmpInst.addOperand(Inst.getOperand(3)); // alignment
+ TmpInst.addOperand(Inst.getOperand(4)); // Rm
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
+ Spacing));
+ TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
+ Spacing * 2));
+ TmpInst.addOperand(Inst.getOperand(1)); // lane
+ TmpInst.addOperand(Inst.getOperand(5)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(6));
+ Inst = TmpInst;
+ return true;
+ }
+
+ case ARM::VST4LNdWB_register_Asm_8:
+ case ARM::VST4LNdWB_register_Asm_16:
+ case ARM::VST4LNdWB_register_Asm_32:
+ case ARM::VST4LNqWB_register_Asm_16:
+ case ARM::VST4LNqWB_register_Asm_32: {
+ MCInst TmpInst;
+ // Shuffle the operands around so the lane index operand is in the
+ // right place.
+ unsigned Spacing;
+ TmpInst.setOpcode(getRealVSTOpcode(Inst.getOpcode(), Spacing));
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn
+ TmpInst.addOperand(Inst.getOperand(3)); // alignment
+ TmpInst.addOperand(Inst.getOperand(4)); // Rm
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
+ Spacing));
+ TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
+ Spacing * 2));
+ TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
+ Spacing * 3));
+ TmpInst.addOperand(Inst.getOperand(1)); // lane
+ TmpInst.addOperand(Inst.getOperand(5)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(6));
+ Inst = TmpInst;
+ return true;
+ }
+
+ case ARM::VST1LNdWB_fixed_Asm_8:
+ case ARM::VST1LNdWB_fixed_Asm_16:
+ case ARM::VST1LNdWB_fixed_Asm_32: {
+ MCInst TmpInst;
+ // Shuffle the operands around so the lane index operand is in the
+ // right place.
+ unsigned Spacing;
+ TmpInst.setOpcode(getRealVSTOpcode(Inst.getOpcode(), Spacing));
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn
+ TmpInst.addOperand(Inst.getOperand(3)); // alignment
+ TmpInst.addOperand(MCOperand::createReg(0)); // Rm
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(Inst.getOperand(1)); // lane
+ TmpInst.addOperand(Inst.getOperand(4)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(5));
+ Inst = TmpInst;
+ return true;
+ }
+
+ case ARM::VST2LNdWB_fixed_Asm_8:
+ case ARM::VST2LNdWB_fixed_Asm_16:
+ case ARM::VST2LNdWB_fixed_Asm_32:
+ case ARM::VST2LNqWB_fixed_Asm_16:
+ case ARM::VST2LNqWB_fixed_Asm_32: {
+ MCInst TmpInst;
+ // Shuffle the operands around so the lane index operand is in the
+ // right place.
+ unsigned Spacing;
+ TmpInst.setOpcode(getRealVSTOpcode(Inst.getOpcode(), Spacing));
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn
+ TmpInst.addOperand(Inst.getOperand(3)); // alignment
+ TmpInst.addOperand(MCOperand::createReg(0)); // Rm
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
+ Spacing));
+ TmpInst.addOperand(Inst.getOperand(1)); // lane
+ TmpInst.addOperand(Inst.getOperand(4)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(5));
+ Inst = TmpInst;
+ return true;
+ }
+
+ case ARM::VST3LNdWB_fixed_Asm_8:
+ case ARM::VST3LNdWB_fixed_Asm_16:
+ case ARM::VST3LNdWB_fixed_Asm_32:
+ case ARM::VST3LNqWB_fixed_Asm_16:
+ case ARM::VST3LNqWB_fixed_Asm_32: {
+ MCInst TmpInst;
+ // Shuffle the operands around so the lane index operand is in the
+ // right place.
+ unsigned Spacing;
+ TmpInst.setOpcode(getRealVSTOpcode(Inst.getOpcode(), Spacing));
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn
+ TmpInst.addOperand(Inst.getOperand(3)); // alignment
+ TmpInst.addOperand(MCOperand::createReg(0)); // Rm
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
+ Spacing));
+ TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
+ Spacing * 2));
+ TmpInst.addOperand(Inst.getOperand(1)); // lane
+ TmpInst.addOperand(Inst.getOperand(4)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(5));
+ Inst = TmpInst;
+ return true;
+ }
+
+ case ARM::VST4LNdWB_fixed_Asm_8:
+ case ARM::VST4LNdWB_fixed_Asm_16:
+ case ARM::VST4LNdWB_fixed_Asm_32:
+ case ARM::VST4LNqWB_fixed_Asm_16:
+ case ARM::VST4LNqWB_fixed_Asm_32: {
+ MCInst TmpInst;
+ // Shuffle the operands around so the lane index operand is in the
+ // right place.
+ unsigned Spacing;
+ TmpInst.setOpcode(getRealVSTOpcode(Inst.getOpcode(), Spacing));
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn
+ TmpInst.addOperand(Inst.getOperand(3)); // alignment
+ TmpInst.addOperand(MCOperand::createReg(0)); // Rm
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
+ Spacing));
+ TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
+ Spacing * 2));
+ TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
+ Spacing * 3));
+ TmpInst.addOperand(Inst.getOperand(1)); // lane
+ TmpInst.addOperand(Inst.getOperand(4)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(5));
+ Inst = TmpInst;
+ return true;
+ }
+
+ case ARM::VST1LNdAsm_8:
+ case ARM::VST1LNdAsm_16:
+ case ARM::VST1LNdAsm_32: {
+ MCInst TmpInst;
+ // Shuffle the operands around so the lane index operand is in the
+ // right place.
+ unsigned Spacing;
+ TmpInst.setOpcode(getRealVSTOpcode(Inst.getOpcode(), Spacing));
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn
+ TmpInst.addOperand(Inst.getOperand(3)); // alignment
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(Inst.getOperand(1)); // lane
+ TmpInst.addOperand(Inst.getOperand(4)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(5));
+ Inst = TmpInst;
+ return true;
+ }
+
+ case ARM::VST2LNdAsm_8:
+ case ARM::VST2LNdAsm_16:
+ case ARM::VST2LNdAsm_32:
+ case ARM::VST2LNqAsm_16:
+ case ARM::VST2LNqAsm_32: {
+ MCInst TmpInst;
+ // Shuffle the operands around so the lane index operand is in the
+ // right place.
+ unsigned Spacing;
+ TmpInst.setOpcode(getRealVSTOpcode(Inst.getOpcode(), Spacing));
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn
+ TmpInst.addOperand(Inst.getOperand(3)); // alignment
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
+ Spacing));
+ TmpInst.addOperand(Inst.getOperand(1)); // lane
+ TmpInst.addOperand(Inst.getOperand(4)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(5));
+ Inst = TmpInst;
+ return true;
+ }
+
+ case ARM::VST3LNdAsm_8:
+ case ARM::VST3LNdAsm_16:
+ case ARM::VST3LNdAsm_32:
+ case ARM::VST3LNqAsm_16:
+ case ARM::VST3LNqAsm_32: {
+ MCInst TmpInst;
+ // Shuffle the operands around so the lane index operand is in the
+ // right place.
+ unsigned Spacing;
+ TmpInst.setOpcode(getRealVSTOpcode(Inst.getOpcode(), Spacing));
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn
+ TmpInst.addOperand(Inst.getOperand(3)); // alignment
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
+ Spacing));
+ TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
+ Spacing * 2));
+ TmpInst.addOperand(Inst.getOperand(1)); // lane
+ TmpInst.addOperand(Inst.getOperand(4)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(5));
+ Inst = TmpInst;
+ return true;
+ }
+
+ case ARM::VST4LNdAsm_8:
+ case ARM::VST4LNdAsm_16:
+ case ARM::VST4LNdAsm_32:
+ case ARM::VST4LNqAsm_16:
+ case ARM::VST4LNqAsm_32: {
+ MCInst TmpInst;
+ // Shuffle the operands around so the lane index operand is in the
+ // right place.
+ unsigned Spacing;
+ TmpInst.setOpcode(getRealVSTOpcode(Inst.getOpcode(), Spacing));
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn
+ TmpInst.addOperand(Inst.getOperand(3)); // alignment
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
+ Spacing));
+ TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
+ Spacing * 2));
+ TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
+ Spacing * 3));
+ TmpInst.addOperand(Inst.getOperand(1)); // lane
+ TmpInst.addOperand(Inst.getOperand(4)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(5));
+ Inst = TmpInst;
+ return true;
+ }
+
+ // Handle NEON VLD complex aliases.
+ case ARM::VLD1LNdWB_register_Asm_8:
+ case ARM::VLD1LNdWB_register_Asm_16:
+ case ARM::VLD1LNdWB_register_Asm_32: {
+ MCInst TmpInst;
+ // Shuffle the operands around so the lane index operand is in the
+ // right place.
+ unsigned Spacing;
+ TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing));
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn
+ TmpInst.addOperand(Inst.getOperand(3)); // alignment
+ TmpInst.addOperand(Inst.getOperand(4)); // Rm
+ TmpInst.addOperand(Inst.getOperand(0)); // Tied operand src (== Vd)
+ TmpInst.addOperand(Inst.getOperand(1)); // lane
+ TmpInst.addOperand(Inst.getOperand(5)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(6));
+ Inst = TmpInst;
+ return true;
+ }
+
+ case ARM::VLD2LNdWB_register_Asm_8:
+ case ARM::VLD2LNdWB_register_Asm_16:
+ case ARM::VLD2LNdWB_register_Asm_32:
+ case ARM::VLD2LNqWB_register_Asm_16:
+ case ARM::VLD2LNqWB_register_Asm_32: {
+ MCInst TmpInst;
+ // Shuffle the operands around so the lane index operand is in the
+ // right place.
+ unsigned Spacing;
+ TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing));
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
+ Spacing));
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn
+ TmpInst.addOperand(Inst.getOperand(3)); // alignment
+ TmpInst.addOperand(Inst.getOperand(4)); // Rm
+ TmpInst.addOperand(Inst.getOperand(0)); // Tied operand src (== Vd)
+ TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
+ Spacing));
+ TmpInst.addOperand(Inst.getOperand(1)); // lane
+ TmpInst.addOperand(Inst.getOperand(5)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(6));
+ Inst = TmpInst;
+ return true;
+ }
+
+ case ARM::VLD3LNdWB_register_Asm_8:
+ case ARM::VLD3LNdWB_register_Asm_16:
+ case ARM::VLD3LNdWB_register_Asm_32:
+ case ARM::VLD3LNqWB_register_Asm_16:
+ case ARM::VLD3LNqWB_register_Asm_32: {
+ MCInst TmpInst;
+ // Shuffle the operands around so the lane index operand is in the
+ // right place.
+ unsigned Spacing;
+ TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing));
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
+ Spacing));
+ TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
+ Spacing * 2));
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn
+ TmpInst.addOperand(Inst.getOperand(3)); // alignment
+ TmpInst.addOperand(Inst.getOperand(4)); // Rm
+ TmpInst.addOperand(Inst.getOperand(0)); // Tied operand src (== Vd)
+ TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
+ Spacing));
+ TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
+ Spacing * 2));
+ TmpInst.addOperand(Inst.getOperand(1)); // lane
+ TmpInst.addOperand(Inst.getOperand(5)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(6));
+ Inst = TmpInst;
+ return true;
+ }
+
+ case ARM::VLD4LNdWB_register_Asm_8:
+ case ARM::VLD4LNdWB_register_Asm_16:
+ case ARM::VLD4LNdWB_register_Asm_32:
+ case ARM::VLD4LNqWB_register_Asm_16:
+ case ARM::VLD4LNqWB_register_Asm_32: {
+ MCInst TmpInst;
+ // Shuffle the operands around so the lane index operand is in the
+ // right place.
+ unsigned Spacing;
+ TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing));
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
+ Spacing));
+ TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
+ Spacing * 2));
+ TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
+ Spacing * 3));
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn
+ TmpInst.addOperand(Inst.getOperand(3)); // alignment
+ TmpInst.addOperand(Inst.getOperand(4)); // Rm
+ TmpInst.addOperand(Inst.getOperand(0)); // Tied operand src (== Vd)
+ TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
+ Spacing));
+ TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
+ Spacing * 2));
+ TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
+ Spacing * 3));
+ TmpInst.addOperand(Inst.getOperand(1)); // lane
+ TmpInst.addOperand(Inst.getOperand(5)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(6));
+ Inst = TmpInst;
+ return true;
+ }
+
+ case ARM::VLD1LNdWB_fixed_Asm_8:
+ case ARM::VLD1LNdWB_fixed_Asm_16:
+ case ARM::VLD1LNdWB_fixed_Asm_32: {
+ MCInst TmpInst;
+ // Shuffle the operands around so the lane index operand is in the
+ // right place.
+ unsigned Spacing;
+ TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing));
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn
+ TmpInst.addOperand(Inst.getOperand(3)); // alignment
+ TmpInst.addOperand(MCOperand::createReg(0)); // Rm
+ TmpInst.addOperand(Inst.getOperand(0)); // Tied operand src (== Vd)
+ TmpInst.addOperand(Inst.getOperand(1)); // lane
+ TmpInst.addOperand(Inst.getOperand(4)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(5));
+ Inst = TmpInst;
+ return true;
+ }
+
+ case ARM::VLD2LNdWB_fixed_Asm_8:
+ case ARM::VLD2LNdWB_fixed_Asm_16:
+ case ARM::VLD2LNdWB_fixed_Asm_32:
+ case ARM::VLD2LNqWB_fixed_Asm_16:
+ case ARM::VLD2LNqWB_fixed_Asm_32: {
+ MCInst TmpInst;
+ // Shuffle the operands around so the lane index operand is in the
+ // right place.
+ unsigned Spacing;
+ TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing));
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
+ Spacing));
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn
+ TmpInst.addOperand(Inst.getOperand(3)); // alignment
+ TmpInst.addOperand(MCOperand::createReg(0)); // Rm
+ TmpInst.addOperand(Inst.getOperand(0)); // Tied operand src (== Vd)
+ TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
+ Spacing));
+ TmpInst.addOperand(Inst.getOperand(1)); // lane
+ TmpInst.addOperand(Inst.getOperand(4)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(5));
+ Inst = TmpInst;
+ return true;
+ }
+
+ case ARM::VLD3LNdWB_fixed_Asm_8:
+ case ARM::VLD3LNdWB_fixed_Asm_16:
+ case ARM::VLD3LNdWB_fixed_Asm_32:
+ case ARM::VLD3LNqWB_fixed_Asm_16:
+ case ARM::VLD3LNqWB_fixed_Asm_32: {
+ MCInst TmpInst;
+ // Shuffle the operands around so the lane index operand is in the
+ // right place.
+ unsigned Spacing;
+ TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing));
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
+ Spacing));
+ TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
+ Spacing * 2));
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn
+ TmpInst.addOperand(Inst.getOperand(3)); // alignment
+ TmpInst.addOperand(MCOperand::createReg(0)); // Rm
+ TmpInst.addOperand(Inst.getOperand(0)); // Tied operand src (== Vd)
+ TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
+ Spacing));
+ TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
+ Spacing * 2));
+ TmpInst.addOperand(Inst.getOperand(1)); // lane
+ TmpInst.addOperand(Inst.getOperand(4)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(5));
+ Inst = TmpInst;
+ return true;
+ }
+
+ case ARM::VLD4LNdWB_fixed_Asm_8:
+ case ARM::VLD4LNdWB_fixed_Asm_16:
+ case ARM::VLD4LNdWB_fixed_Asm_32:
+ case ARM::VLD4LNqWB_fixed_Asm_16:
+ case ARM::VLD4LNqWB_fixed_Asm_32: {
+ MCInst TmpInst;
+ // Shuffle the operands around so the lane index operand is in the
+ // right place.
+ unsigned Spacing;
+ TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing));
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
+ Spacing));
+ TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
+ Spacing * 2));
+ TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
+ Spacing * 3));
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn
+ TmpInst.addOperand(Inst.getOperand(3)); // alignment
+ TmpInst.addOperand(MCOperand::createReg(0)); // Rm
+ TmpInst.addOperand(Inst.getOperand(0)); // Tied operand src (== Vd)
+ TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
+ Spacing));
+ TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
+ Spacing * 2));
+ TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
+ Spacing * 3));
+ TmpInst.addOperand(Inst.getOperand(1)); // lane
+ TmpInst.addOperand(Inst.getOperand(4)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(5));
+ Inst = TmpInst;
+ return true;
+ }
+
+ case ARM::VLD1LNdAsm_8:
+ case ARM::VLD1LNdAsm_16:
+ case ARM::VLD1LNdAsm_32: {
+ MCInst TmpInst;
+ // Shuffle the operands around so the lane index operand is in the
+ // right place.
+ unsigned Spacing;
+ TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing));
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn
+ TmpInst.addOperand(Inst.getOperand(3)); // alignment
+ TmpInst.addOperand(Inst.getOperand(0)); // Tied operand src (== Vd)
+ TmpInst.addOperand(Inst.getOperand(1)); // lane
+ TmpInst.addOperand(Inst.getOperand(4)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(5));
+ Inst = TmpInst;
+ return true;
+ }
+
+ case ARM::VLD2LNdAsm_8:
+ case ARM::VLD2LNdAsm_16:
+ case ARM::VLD2LNdAsm_32:
+ case ARM::VLD2LNqAsm_16:
+ case ARM::VLD2LNqAsm_32: {
+ MCInst TmpInst;
+ // Shuffle the operands around so the lane index operand is in the
+ // right place.
+ unsigned Spacing;
+ TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing));
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
+ Spacing));
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn
+ TmpInst.addOperand(Inst.getOperand(3)); // alignment
+ TmpInst.addOperand(Inst.getOperand(0)); // Tied operand src (== Vd)
+ TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
+ Spacing));
+ TmpInst.addOperand(Inst.getOperand(1)); // lane
+ TmpInst.addOperand(Inst.getOperand(4)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(5));
+ Inst = TmpInst;
+ return true;
+ }
+
+ case ARM::VLD3LNdAsm_8:
+ case ARM::VLD3LNdAsm_16:
+ case ARM::VLD3LNdAsm_32:
+ case ARM::VLD3LNqAsm_16:
+ case ARM::VLD3LNqAsm_32: {
+ MCInst TmpInst;
+ // Shuffle the operands around so the lane index operand is in the
+ // right place.
+ unsigned Spacing;
+ TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing));
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
+ Spacing));
+ TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
+ Spacing * 2));
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn
+ TmpInst.addOperand(Inst.getOperand(3)); // alignment
+ TmpInst.addOperand(Inst.getOperand(0)); // Tied operand src (== Vd)
+ TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
+ Spacing));
+ TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
+ Spacing * 2));
+ TmpInst.addOperand(Inst.getOperand(1)); // lane
+ TmpInst.addOperand(Inst.getOperand(4)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(5));
+ Inst = TmpInst;
+ return true;
+ }
+
+ case ARM::VLD4LNdAsm_8:
+ case ARM::VLD4LNdAsm_16:
+ case ARM::VLD4LNdAsm_32:
+ case ARM::VLD4LNqAsm_16:
+ case ARM::VLD4LNqAsm_32: {
+ MCInst TmpInst;
+ // Shuffle the operands around so the lane index operand is in the
+ // right place.
+ unsigned Spacing;
+ TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing));
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
+ Spacing));
+ TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
+ Spacing * 2));
+ TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
+ Spacing * 3));
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn
+ TmpInst.addOperand(Inst.getOperand(3)); // alignment
+ TmpInst.addOperand(Inst.getOperand(0)); // Tied operand src (== Vd)
+ TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
+ Spacing));
+ TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
+ Spacing * 2));
+ TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
+ Spacing * 3));
+ TmpInst.addOperand(Inst.getOperand(1)); // lane
+ TmpInst.addOperand(Inst.getOperand(4)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(5));
+ Inst = TmpInst;
+ return true;
+ }
+
+ // VLD3DUP single 3-element structure to all lanes instructions.
+ case ARM::VLD3DUPdAsm_8:
+ case ARM::VLD3DUPdAsm_16:
+ case ARM::VLD3DUPdAsm_32:
+ case ARM::VLD3DUPqAsm_8:
+ case ARM::VLD3DUPqAsm_16:
+ case ARM::VLD3DUPqAsm_32: {
+ MCInst TmpInst;
+ unsigned Spacing;
+ TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing));
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
+ Spacing));
+ TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
+ Spacing * 2));
+ TmpInst.addOperand(Inst.getOperand(1)); // Rn
+ TmpInst.addOperand(Inst.getOperand(2)); // alignment
+ TmpInst.addOperand(Inst.getOperand(3)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(4));
+ Inst = TmpInst;
+ return true;
+ }
+
+ case ARM::VLD3DUPdWB_fixed_Asm_8:
+ case ARM::VLD3DUPdWB_fixed_Asm_16:
+ case ARM::VLD3DUPdWB_fixed_Asm_32:
+ case ARM::VLD3DUPqWB_fixed_Asm_8:
+ case ARM::VLD3DUPqWB_fixed_Asm_16:
+ case ARM::VLD3DUPqWB_fixed_Asm_32: {
+ MCInst TmpInst;
+ unsigned Spacing;
+ TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing));
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
+ Spacing));
+ TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
+ Spacing * 2));
+ TmpInst.addOperand(Inst.getOperand(1)); // Rn
+ TmpInst.addOperand(Inst.getOperand(1)); // Rn_wb == tied Rn
+ TmpInst.addOperand(Inst.getOperand(2)); // alignment
+ TmpInst.addOperand(MCOperand::createReg(0)); // Rm
+ TmpInst.addOperand(Inst.getOperand(3)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(4));
+ Inst = TmpInst;
+ return true;
+ }
+
+ case ARM::VLD3DUPdWB_register_Asm_8:
+ case ARM::VLD3DUPdWB_register_Asm_16:
+ case ARM::VLD3DUPdWB_register_Asm_32:
+ case ARM::VLD3DUPqWB_register_Asm_8:
+ case ARM::VLD3DUPqWB_register_Asm_16:
+ case ARM::VLD3DUPqWB_register_Asm_32: {
+ MCInst TmpInst;
+ unsigned Spacing;
+ TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing));
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
+ Spacing));
+ TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
+ Spacing * 2));
+ TmpInst.addOperand(Inst.getOperand(1)); // Rn
+ TmpInst.addOperand(Inst.getOperand(1)); // Rn_wb == tied Rn
+ TmpInst.addOperand(Inst.getOperand(2)); // alignment
+ TmpInst.addOperand(Inst.getOperand(3)); // Rm
+ TmpInst.addOperand(Inst.getOperand(4)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(5));
+ Inst = TmpInst;
+ return true;
+ }
+
+ // VLD3 multiple 3-element structure instructions.
+ case ARM::VLD3dAsm_8:
+ case ARM::VLD3dAsm_16:
+ case ARM::VLD3dAsm_32:
+ case ARM::VLD3qAsm_8:
+ case ARM::VLD3qAsm_16:
+ case ARM::VLD3qAsm_32: {
+ MCInst TmpInst;
+ unsigned Spacing;
+ TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing));
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
+ Spacing));
+ TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
+ Spacing * 2));
+ TmpInst.addOperand(Inst.getOperand(1)); // Rn
+ TmpInst.addOperand(Inst.getOperand(2)); // alignment
+ TmpInst.addOperand(Inst.getOperand(3)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(4));
+ Inst = TmpInst;
+ return true;
+ }
+
+ case ARM::VLD3dWB_fixed_Asm_8:
+ case ARM::VLD3dWB_fixed_Asm_16:
+ case ARM::VLD3dWB_fixed_Asm_32:
+ case ARM::VLD3qWB_fixed_Asm_8:
+ case ARM::VLD3qWB_fixed_Asm_16:
+ case ARM::VLD3qWB_fixed_Asm_32: {
+ MCInst TmpInst;
+ unsigned Spacing;
+ TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing));
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
+ Spacing));
+ TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
+ Spacing * 2));
+ TmpInst.addOperand(Inst.getOperand(1)); // Rn
+ TmpInst.addOperand(Inst.getOperand(1)); // Rn_wb == tied Rn
+ TmpInst.addOperand(Inst.getOperand(2)); // alignment
+ TmpInst.addOperand(MCOperand::createReg(0)); // Rm
+ TmpInst.addOperand(Inst.getOperand(3)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(4));
+ Inst = TmpInst;
+ return true;
+ }
+
+ case ARM::VLD3dWB_register_Asm_8:
+ case ARM::VLD3dWB_register_Asm_16:
+ case ARM::VLD3dWB_register_Asm_32:
+ case ARM::VLD3qWB_register_Asm_8:
+ case ARM::VLD3qWB_register_Asm_16:
+ case ARM::VLD3qWB_register_Asm_32: {
+ MCInst TmpInst;
+ unsigned Spacing;
+ TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing));
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
+ Spacing));
+ TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
+ Spacing * 2));
+ TmpInst.addOperand(Inst.getOperand(1)); // Rn
+ TmpInst.addOperand(Inst.getOperand(1)); // Rn_wb == tied Rn
+ TmpInst.addOperand(Inst.getOperand(2)); // alignment
+ TmpInst.addOperand(Inst.getOperand(3)); // Rm
+ TmpInst.addOperand(Inst.getOperand(4)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(5));
+ Inst = TmpInst;
+ return true;
+ }
+
+ // VLD4DUP single 3-element structure to all lanes instructions.
+ case ARM::VLD4DUPdAsm_8:
+ case ARM::VLD4DUPdAsm_16:
+ case ARM::VLD4DUPdAsm_32:
+ case ARM::VLD4DUPqAsm_8:
+ case ARM::VLD4DUPqAsm_16:
+ case ARM::VLD4DUPqAsm_32: {
+ MCInst TmpInst;
+ unsigned Spacing;
+ TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing));
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
+ Spacing));
+ TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
+ Spacing * 2));
+ TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
+ Spacing * 3));
+ TmpInst.addOperand(Inst.getOperand(1)); // Rn
+ TmpInst.addOperand(Inst.getOperand(2)); // alignment
+ TmpInst.addOperand(Inst.getOperand(3)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(4));
+ Inst = TmpInst;
+ return true;
+ }
+
+ case ARM::VLD4DUPdWB_fixed_Asm_8:
+ case ARM::VLD4DUPdWB_fixed_Asm_16:
+ case ARM::VLD4DUPdWB_fixed_Asm_32:
+ case ARM::VLD4DUPqWB_fixed_Asm_8:
+ case ARM::VLD4DUPqWB_fixed_Asm_16:
+ case ARM::VLD4DUPqWB_fixed_Asm_32: {
+ MCInst TmpInst;
+ unsigned Spacing;
+ TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing));
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
+ Spacing));
+ TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
+ Spacing * 2));
+ TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
+ Spacing * 3));
+ TmpInst.addOperand(Inst.getOperand(1)); // Rn
+ TmpInst.addOperand(Inst.getOperand(1)); // Rn_wb == tied Rn
+ TmpInst.addOperand(Inst.getOperand(2)); // alignment
+ TmpInst.addOperand(MCOperand::createReg(0)); // Rm
+ TmpInst.addOperand(Inst.getOperand(3)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(4));
+ Inst = TmpInst;
+ return true;
+ }
+
+ case ARM::VLD4DUPdWB_register_Asm_8:
+ case ARM::VLD4DUPdWB_register_Asm_16:
+ case ARM::VLD4DUPdWB_register_Asm_32:
+ case ARM::VLD4DUPqWB_register_Asm_8:
+ case ARM::VLD4DUPqWB_register_Asm_16:
+ case ARM::VLD4DUPqWB_register_Asm_32: {
+ MCInst TmpInst;
+ unsigned Spacing;
+ TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing));
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
+ Spacing));
+ TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
+ Spacing * 2));
+ TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
+ Spacing * 3));
+ TmpInst.addOperand(Inst.getOperand(1)); // Rn
+ TmpInst.addOperand(Inst.getOperand(1)); // Rn_wb == tied Rn
+ TmpInst.addOperand(Inst.getOperand(2)); // alignment
+ TmpInst.addOperand(Inst.getOperand(3)); // Rm
+ TmpInst.addOperand(Inst.getOperand(4)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(5));
+ Inst = TmpInst;
+ return true;
+ }
+
+ // VLD4 multiple 4-element structure instructions.
+ case ARM::VLD4dAsm_8:
+ case ARM::VLD4dAsm_16:
+ case ARM::VLD4dAsm_32:
+ case ARM::VLD4qAsm_8:
+ case ARM::VLD4qAsm_16:
+ case ARM::VLD4qAsm_32: {
+ MCInst TmpInst;
+ unsigned Spacing;
+ TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing));
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
+ Spacing));
+ TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
+ Spacing * 2));
+ TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
+ Spacing * 3));
+ TmpInst.addOperand(Inst.getOperand(1)); // Rn
+ TmpInst.addOperand(Inst.getOperand(2)); // alignment
+ TmpInst.addOperand(Inst.getOperand(3)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(4));
+ Inst = TmpInst;
+ return true;
+ }
+
+ case ARM::VLD4dWB_fixed_Asm_8:
+ case ARM::VLD4dWB_fixed_Asm_16:
+ case ARM::VLD4dWB_fixed_Asm_32:
+ case ARM::VLD4qWB_fixed_Asm_8:
+ case ARM::VLD4qWB_fixed_Asm_16:
+ case ARM::VLD4qWB_fixed_Asm_32: {
+ MCInst TmpInst;
+ unsigned Spacing;
+ TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing));
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
+ Spacing));
+ TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
+ Spacing * 2));
+ TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
+ Spacing * 3));
+ TmpInst.addOperand(Inst.getOperand(1)); // Rn
+ TmpInst.addOperand(Inst.getOperand(1)); // Rn_wb == tied Rn
+ TmpInst.addOperand(Inst.getOperand(2)); // alignment
+ TmpInst.addOperand(MCOperand::createReg(0)); // Rm
+ TmpInst.addOperand(Inst.getOperand(3)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(4));
+ Inst = TmpInst;
+ return true;
+ }
+
+ case ARM::VLD4dWB_register_Asm_8:
+ case ARM::VLD4dWB_register_Asm_16:
+ case ARM::VLD4dWB_register_Asm_32:
+ case ARM::VLD4qWB_register_Asm_8:
+ case ARM::VLD4qWB_register_Asm_16:
+ case ARM::VLD4qWB_register_Asm_32: {
+ MCInst TmpInst;
+ unsigned Spacing;
+ TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing));
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
+ Spacing));
+ TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
+ Spacing * 2));
+ TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
+ Spacing * 3));
+ TmpInst.addOperand(Inst.getOperand(1)); // Rn
+ TmpInst.addOperand(Inst.getOperand(1)); // Rn_wb == tied Rn
+ TmpInst.addOperand(Inst.getOperand(2)); // alignment
+ TmpInst.addOperand(Inst.getOperand(3)); // Rm
+ TmpInst.addOperand(Inst.getOperand(4)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(5));
+ Inst = TmpInst;
+ return true;
+ }
+
+ // VST3 multiple 3-element structure instructions.
+ case ARM::VST3dAsm_8:
+ case ARM::VST3dAsm_16:
+ case ARM::VST3dAsm_32:
+ case ARM::VST3qAsm_8:
+ case ARM::VST3qAsm_16:
+ case ARM::VST3qAsm_32: {
+ MCInst TmpInst;
+ unsigned Spacing;
+ TmpInst.setOpcode(getRealVSTOpcode(Inst.getOpcode(), Spacing));
+ TmpInst.addOperand(Inst.getOperand(1)); // Rn
+ TmpInst.addOperand(Inst.getOperand(2)); // alignment
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
+ Spacing));
+ TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
+ Spacing * 2));
+ TmpInst.addOperand(Inst.getOperand(3)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(4));
+ Inst = TmpInst;
+ return true;
+ }
+
+ case ARM::VST3dWB_fixed_Asm_8:
+ case ARM::VST3dWB_fixed_Asm_16:
+ case ARM::VST3dWB_fixed_Asm_32:
+ case ARM::VST3qWB_fixed_Asm_8:
+ case ARM::VST3qWB_fixed_Asm_16:
+ case ARM::VST3qWB_fixed_Asm_32: {
+ MCInst TmpInst;
+ unsigned Spacing;
+ TmpInst.setOpcode(getRealVSTOpcode(Inst.getOpcode(), Spacing));
+ TmpInst.addOperand(Inst.getOperand(1)); // Rn
+ TmpInst.addOperand(Inst.getOperand(1)); // Rn_wb == tied Rn
+ TmpInst.addOperand(Inst.getOperand(2)); // alignment
+ TmpInst.addOperand(MCOperand::createReg(0)); // Rm
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
+ Spacing));
+ TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
+ Spacing * 2));
+ TmpInst.addOperand(Inst.getOperand(3)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(4));
+ Inst = TmpInst;
+ return true;
+ }
+
+ case ARM::VST3dWB_register_Asm_8:
+ case ARM::VST3dWB_register_Asm_16:
+ case ARM::VST3dWB_register_Asm_32:
+ case ARM::VST3qWB_register_Asm_8:
+ case ARM::VST3qWB_register_Asm_16:
+ case ARM::VST3qWB_register_Asm_32: {
+ MCInst TmpInst;
+ unsigned Spacing;
+ TmpInst.setOpcode(getRealVSTOpcode(Inst.getOpcode(), Spacing));
+ TmpInst.addOperand(Inst.getOperand(1)); // Rn
+ TmpInst.addOperand(Inst.getOperand(1)); // Rn_wb == tied Rn
+ TmpInst.addOperand(Inst.getOperand(2)); // alignment
+ TmpInst.addOperand(Inst.getOperand(3)); // Rm
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
+ Spacing));
+ TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
+ Spacing * 2));
+ TmpInst.addOperand(Inst.getOperand(4)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(5));
+ Inst = TmpInst;
+ return true;
+ }
+
+ // VST4 multiple 3-element structure instructions.
+ case ARM::VST4dAsm_8:
+ case ARM::VST4dAsm_16:
+ case ARM::VST4dAsm_32:
+ case ARM::VST4qAsm_8:
+ case ARM::VST4qAsm_16:
+ case ARM::VST4qAsm_32: {
+ MCInst TmpInst;
+ unsigned Spacing;
+ TmpInst.setOpcode(getRealVSTOpcode(Inst.getOpcode(), Spacing));
+ TmpInst.addOperand(Inst.getOperand(1)); // Rn
+ TmpInst.addOperand(Inst.getOperand(2)); // alignment
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
+ Spacing));
+ TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
+ Spacing * 2));
+ TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
+ Spacing * 3));
+ TmpInst.addOperand(Inst.getOperand(3)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(4));
+ Inst = TmpInst;
+ return true;
+ }
+
+ case ARM::VST4dWB_fixed_Asm_8:
+ case ARM::VST4dWB_fixed_Asm_16:
+ case ARM::VST4dWB_fixed_Asm_32:
+ case ARM::VST4qWB_fixed_Asm_8:
+ case ARM::VST4qWB_fixed_Asm_16:
+ case ARM::VST4qWB_fixed_Asm_32: {
+ MCInst TmpInst;
+ unsigned Spacing;
+ TmpInst.setOpcode(getRealVSTOpcode(Inst.getOpcode(), Spacing));
+ TmpInst.addOperand(Inst.getOperand(1)); // Rn
+ TmpInst.addOperand(Inst.getOperand(1)); // Rn_wb == tied Rn
+ TmpInst.addOperand(Inst.getOperand(2)); // alignment
+ TmpInst.addOperand(MCOperand::createReg(0)); // Rm
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
+ Spacing));
+ TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
+ Spacing * 2));
+ TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
+ Spacing * 3));
+ TmpInst.addOperand(Inst.getOperand(3)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(4));
+ Inst = TmpInst;
+ return true;
+ }
+
+ case ARM::VST4dWB_register_Asm_8:
+ case ARM::VST4dWB_register_Asm_16:
+ case ARM::VST4dWB_register_Asm_32:
+ case ARM::VST4qWB_register_Asm_8:
+ case ARM::VST4qWB_register_Asm_16:
+ case ARM::VST4qWB_register_Asm_32: {
+ MCInst TmpInst;
+ unsigned Spacing;
+ TmpInst.setOpcode(getRealVSTOpcode(Inst.getOpcode(), Spacing));
+ TmpInst.addOperand(Inst.getOperand(1)); // Rn
+ TmpInst.addOperand(Inst.getOperand(1)); // Rn_wb == tied Rn
+ TmpInst.addOperand(Inst.getOperand(2)); // alignment
+ TmpInst.addOperand(Inst.getOperand(3)); // Rm
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
+ Spacing));
+ TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
+ Spacing * 2));
+ TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
+ Spacing * 3));
+ TmpInst.addOperand(Inst.getOperand(4)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(5));
+ Inst = TmpInst;
+ return true;
+ }
+
+ // Handle encoding choice for the shift-immediate instructions.
+ case ARM::t2LSLri:
+ case ARM::t2LSRri:
+ case ARM::t2ASRri: {
+ if (isARMLowRegister(Inst.getOperand(0).getReg()) &&
+ Inst.getOperand(0).getReg() == Inst.getOperand(1).getReg() &&
+ Inst.getOperand(5).getReg() == (inITBlock() ? 0 : ARM::CPSR) &&
+ !(static_cast<ARMOperand &>(*Operands[3]).isToken() &&
+ static_cast<ARMOperand &>(*Operands[3]).getToken() == ".w")) {
+ unsigned NewOpc;
+ switch (Inst.getOpcode()) {
+ default: llvm_unreachable("unexpected opcode");
+ case ARM::t2LSLri: NewOpc = ARM::tLSLri; break;
+ case ARM::t2LSRri: NewOpc = ARM::tLSRri; break;
+ case ARM::t2ASRri: NewOpc = ARM::tASRri; break;
+ }
+ // The Thumb1 operands aren't in the same order. Awesome, eh?
+ MCInst TmpInst;
+ TmpInst.setOpcode(NewOpc);
+ TmpInst.addOperand(Inst.getOperand(0));
+ TmpInst.addOperand(Inst.getOperand(5));
+ TmpInst.addOperand(Inst.getOperand(1));
+ TmpInst.addOperand(Inst.getOperand(2));
+ TmpInst.addOperand(Inst.getOperand(3));
+ TmpInst.addOperand(Inst.getOperand(4));
+ Inst = TmpInst;
+ return true;
+ }
+ return false;
+ }
+
+ // Handle the Thumb2 mode MOV complex aliases.
+ case ARM::t2MOVsr:
+ case ARM::t2MOVSsr: {
+ // Which instruction to expand to depends on the CCOut operand and
+ // whether we're in an IT block if the register operands are low
+ // registers.
+ bool isNarrow = false;
+ if (isARMLowRegister(Inst.getOperand(0).getReg()) &&
+ isARMLowRegister(Inst.getOperand(1).getReg()) &&
+ isARMLowRegister(Inst.getOperand(2).getReg()) &&
+ Inst.getOperand(0).getReg() == Inst.getOperand(1).getReg() &&
+ inITBlock() == (Inst.getOpcode() == ARM::t2MOVsr))
+ isNarrow = true;
+ MCInst TmpInst;
+ unsigned newOpc;
+ switch(ARM_AM::getSORegShOp(Inst.getOperand(3).getImm())) {
+ default: llvm_unreachable("unexpected opcode!");
+ case ARM_AM::asr: newOpc = isNarrow ? ARM::tASRrr : ARM::t2ASRrr; break;
+ case ARM_AM::lsr: newOpc = isNarrow ? ARM::tLSRrr : ARM::t2LSRrr; break;
+ case ARM_AM::lsl: newOpc = isNarrow ? ARM::tLSLrr : ARM::t2LSLrr; break;
+ case ARM_AM::ror: newOpc = isNarrow ? ARM::tROR : ARM::t2RORrr; break;
+ }
+ TmpInst.setOpcode(newOpc);
+ TmpInst.addOperand(Inst.getOperand(0)); // Rd
+ if (isNarrow)
+ TmpInst.addOperand(MCOperand::createReg(
+ Inst.getOpcode() == ARM::t2MOVSsr ? ARM::CPSR : 0));
+ TmpInst.addOperand(Inst.getOperand(1)); // Rn
+ TmpInst.addOperand(Inst.getOperand(2)); // Rm
+ TmpInst.addOperand(Inst.getOperand(4)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(5));
+ if (!isNarrow)
+ TmpInst.addOperand(MCOperand::createReg(
+ Inst.getOpcode() == ARM::t2MOVSsr ? ARM::CPSR : 0));
+ Inst = TmpInst;
+ return true;
+ }
+ case ARM::t2MOVsi:
+ case ARM::t2MOVSsi: {
+ // Which instruction to expand to depends on the CCOut operand and
+ // whether we're in an IT block if the register operands are low
+ // registers.
+ bool isNarrow = false;
+ if (isARMLowRegister(Inst.getOperand(0).getReg()) &&
+ isARMLowRegister(Inst.getOperand(1).getReg()) &&
+ inITBlock() == (Inst.getOpcode() == ARM::t2MOVsi))
+ isNarrow = true;
+ MCInst TmpInst;
+ unsigned newOpc;
+ switch(ARM_AM::getSORegShOp(Inst.getOperand(2).getImm())) {
+ default: llvm_unreachable("unexpected opcode!");
+ case ARM_AM::asr: newOpc = isNarrow ? ARM::tASRri : ARM::t2ASRri; break;
+ case ARM_AM::lsr: newOpc = isNarrow ? ARM::tLSRri : ARM::t2LSRri; break;
+ case ARM_AM::lsl: newOpc = isNarrow ? ARM::tLSLri : ARM::t2LSLri; break;
+ case ARM_AM::ror: newOpc = ARM::t2RORri; isNarrow = false; break;
+ case ARM_AM::rrx: isNarrow = false; newOpc = ARM::t2RRX; break;
+ }
+ unsigned Amount = ARM_AM::getSORegOffset(Inst.getOperand(2).getImm());
+ if (Amount == 32) Amount = 0;
+ TmpInst.setOpcode(newOpc);
+ TmpInst.addOperand(Inst.getOperand(0)); // Rd
+ if (isNarrow)
+ TmpInst.addOperand(MCOperand::createReg(
+ Inst.getOpcode() == ARM::t2MOVSsi ? ARM::CPSR : 0));
+ TmpInst.addOperand(Inst.getOperand(1)); // Rn
+ if (newOpc != ARM::t2RRX)
+ TmpInst.addOperand(MCOperand::createImm(Amount));
+ TmpInst.addOperand(Inst.getOperand(3)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(4));
+ if (!isNarrow)
+ TmpInst.addOperand(MCOperand::createReg(
+ Inst.getOpcode() == ARM::t2MOVSsi ? ARM::CPSR : 0));
+ Inst = TmpInst;
+ return true;
+ }
+ // Handle the ARM mode MOV complex aliases.
+ case ARM::ASRr:
+ case ARM::LSRr:
+ case ARM::LSLr:
+ case ARM::RORr: {
+ ARM_AM::ShiftOpc ShiftTy;
+ switch(Inst.getOpcode()) {
+ default: llvm_unreachable("unexpected opcode!");
+ case ARM::ASRr: ShiftTy = ARM_AM::asr; break;
+ case ARM::LSRr: ShiftTy = ARM_AM::lsr; break;
+ case ARM::LSLr: ShiftTy = ARM_AM::lsl; break;
+ case ARM::RORr: ShiftTy = ARM_AM::ror; break;
+ }
+ unsigned Shifter = ARM_AM::getSORegOpc(ShiftTy, 0);
+ MCInst TmpInst;
+ TmpInst.setOpcode(ARM::MOVsr);
+ TmpInst.addOperand(Inst.getOperand(0)); // Rd
+ TmpInst.addOperand(Inst.getOperand(1)); // Rn
+ TmpInst.addOperand(Inst.getOperand(2)); // Rm
+ TmpInst.addOperand(MCOperand::createImm(Shifter)); // Shift value and ty
+ TmpInst.addOperand(Inst.getOperand(3)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(4));
+ TmpInst.addOperand(Inst.getOperand(5)); // cc_out
+ Inst = TmpInst;
+ return true;
+ }
+ case ARM::ASRi:
+ case ARM::LSRi:
+ case ARM::LSLi:
+ case ARM::RORi: {
+ ARM_AM::ShiftOpc ShiftTy;
+ switch(Inst.getOpcode()) {
+ default: llvm_unreachable("unexpected opcode!");
+ case ARM::ASRi: ShiftTy = ARM_AM::asr; break;
+ case ARM::LSRi: ShiftTy = ARM_AM::lsr; break;
+ case ARM::LSLi: ShiftTy = ARM_AM::lsl; break;
+ case ARM::RORi: ShiftTy = ARM_AM::ror; break;
+ }
+ // A shift by zero is a plain MOVr, not a MOVsi.
+ unsigned Amt = Inst.getOperand(2).getImm();
+ unsigned Opc = Amt == 0 ? ARM::MOVr : ARM::MOVsi;
+ // A shift by 32 should be encoded as 0 when permitted
+ if (Amt == 32 && (ShiftTy == ARM_AM::lsr || ShiftTy == ARM_AM::asr))
+ Amt = 0;
+ unsigned Shifter = ARM_AM::getSORegOpc(ShiftTy, Amt);
+ MCInst TmpInst;
+ TmpInst.setOpcode(Opc);
+ TmpInst.addOperand(Inst.getOperand(0)); // Rd
+ TmpInst.addOperand(Inst.getOperand(1)); // Rn
+ if (Opc == ARM::MOVsi)
+ TmpInst.addOperand(MCOperand::createImm(Shifter)); // Shift value and ty
+ TmpInst.addOperand(Inst.getOperand(3)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(4));
+ TmpInst.addOperand(Inst.getOperand(5)); // cc_out
+ Inst = TmpInst;
+ return true;
+ }
+ case ARM::RRXi: {
+ unsigned Shifter = ARM_AM::getSORegOpc(ARM_AM::rrx, 0);
+ MCInst TmpInst;
+ TmpInst.setOpcode(ARM::MOVsi);
+ TmpInst.addOperand(Inst.getOperand(0)); // Rd
+ TmpInst.addOperand(Inst.getOperand(1)); // Rn
+ TmpInst.addOperand(MCOperand::createImm(Shifter)); // Shift value and ty
+ TmpInst.addOperand(Inst.getOperand(2)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(3));
+ TmpInst.addOperand(Inst.getOperand(4)); // cc_out
+ Inst = TmpInst;
+ return true;
+ }
+ case ARM::t2LDMIA_UPD: {
+ // If this is a load of a single register, then we should use
+ // a post-indexed LDR instruction instead, per the ARM ARM.
+ if (Inst.getNumOperands() != 5)
+ return false;
+ MCInst TmpInst;
+ TmpInst.setOpcode(ARM::t2LDR_POST);
+ TmpInst.addOperand(Inst.getOperand(4)); // Rt
+ TmpInst.addOperand(Inst.getOperand(0)); // Rn_wb
+ TmpInst.addOperand(Inst.getOperand(1)); // Rn
+ TmpInst.addOperand(MCOperand::createImm(4));
+ TmpInst.addOperand(Inst.getOperand(2)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(3));
+ Inst = TmpInst;
+ return true;
+ }
+ case ARM::t2STMDB_UPD: {
+ // If this is a store of a single register, then we should use
+ // a pre-indexed STR instruction instead, per the ARM ARM.
+ if (Inst.getNumOperands() != 5)
+ return false;
+ MCInst TmpInst;
+ TmpInst.setOpcode(ARM::t2STR_PRE);
+ TmpInst.addOperand(Inst.getOperand(0)); // Rn_wb
+ TmpInst.addOperand(Inst.getOperand(4)); // Rt
+ TmpInst.addOperand(Inst.getOperand(1)); // Rn
+ TmpInst.addOperand(MCOperand::createImm(-4));
+ TmpInst.addOperand(Inst.getOperand(2)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(3));
+ Inst = TmpInst;
+ return true;
+ }
+ case ARM::LDMIA_UPD:
+ // If this is a load of a single register via a 'pop', then we should use
+ // a post-indexed LDR instruction instead, per the ARM ARM.
+ if (static_cast<ARMOperand &>(*Operands[0]).getToken() == "pop" &&
+ Inst.getNumOperands() == 5) {
+ MCInst TmpInst;
+ TmpInst.setOpcode(ARM::LDR_POST_IMM);
+ TmpInst.addOperand(Inst.getOperand(4)); // Rt
+ TmpInst.addOperand(Inst.getOperand(0)); // Rn_wb
+ TmpInst.addOperand(Inst.getOperand(1)); // Rn
+ TmpInst.addOperand(MCOperand::createReg(0)); // am2offset
+ TmpInst.addOperand(MCOperand::createImm(4));
+ TmpInst.addOperand(Inst.getOperand(2)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(3));
+ Inst = TmpInst;
+ return true;
+ }
+ break;
+ case ARM::STMDB_UPD:
+ // If this is a store of a single register via a 'push', then we should use
+ // a pre-indexed STR instruction instead, per the ARM ARM.
+ if (static_cast<ARMOperand &>(*Operands[0]).getToken() == "push" &&
+ Inst.getNumOperands() == 5) {
+ MCInst TmpInst;
+ TmpInst.setOpcode(ARM::STR_PRE_IMM);
+ TmpInst.addOperand(Inst.getOperand(0)); // Rn_wb
+ TmpInst.addOperand(Inst.getOperand(4)); // Rt
+ TmpInst.addOperand(Inst.getOperand(1)); // addrmode_imm12
+ TmpInst.addOperand(MCOperand::createImm(-4));
+ TmpInst.addOperand(Inst.getOperand(2)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(3));
+ Inst = TmpInst;
+ }
+ break;
+ case ARM::t2ADDri12:
+ // If the immediate fits for encoding T3 (t2ADDri) and the generic "add"
+ // mnemonic was used (not "addw"), encoding T3 is preferred.
+ if (static_cast<ARMOperand &>(*Operands[0]).getToken() != "add" ||
+ ARM_AM::getT2SOImmVal(Inst.getOperand(2).getImm()) == -1)
+ break;
+ Inst.setOpcode(ARM::t2ADDri);
+ Inst.addOperand(MCOperand::createReg(0)); // cc_out
+ break;
+ case ARM::t2SUBri12:
+ // If the immediate fits for encoding T3 (t2SUBri) and the generic "sub"
+ // mnemonic was used (not "subw"), encoding T3 is preferred.
+ if (static_cast<ARMOperand &>(*Operands[0]).getToken() != "sub" ||
+ ARM_AM::getT2SOImmVal(Inst.getOperand(2).getImm()) == -1)
+ break;
+ Inst.setOpcode(ARM::t2SUBri);
+ Inst.addOperand(MCOperand::createReg(0)); // cc_out
+ break;
+ case ARM::tADDi8:
+ // If the immediate is in the range 0-7, we want tADDi3 iff Rd was
+ // explicitly specified. From the ARM ARM: "Encoding T1 is preferred
+ // to encoding T2 if <Rd> is specified and encoding T2 is preferred
+ // to encoding T1 if <Rd> is omitted."
+ if ((unsigned)Inst.getOperand(3).getImm() < 8 && Operands.size() == 6) {
+ Inst.setOpcode(ARM::tADDi3);
+ return true;
+ }
+ break;
+ case ARM::tSUBi8:
+ // If the immediate is in the range 0-7, we want tADDi3 iff Rd was
+ // explicitly specified. From the ARM ARM: "Encoding T1 is preferred
+ // to encoding T2 if <Rd> is specified and encoding T2 is preferred
+ // to encoding T1 if <Rd> is omitted."
+ if ((unsigned)Inst.getOperand(3).getImm() < 8 && Operands.size() == 6) {
+ Inst.setOpcode(ARM::tSUBi3);
+ return true;
+ }
+ break;
+ case ARM::t2ADDri:
+ case ARM::t2SUBri: {
+ // If the destination and first source operand are the same, and
+ // the flags are compatible with the current IT status, use encoding T2
+ // instead of T3. For compatibility with the system 'as'. Make sure the
+ // wide encoding wasn't explicit.
+ if (Inst.getOperand(0).getReg() != Inst.getOperand(1).getReg() ||
+ !isARMLowRegister(Inst.getOperand(0).getReg()) ||
+ (unsigned)Inst.getOperand(2).getImm() > 255 ||
+ ((!inITBlock() && Inst.getOperand(5).getReg() != ARM::CPSR) ||
+ (inITBlock() && Inst.getOperand(5).getReg() != 0)) ||
+ (static_cast<ARMOperand &>(*Operands[3]).isToken() &&
+ static_cast<ARMOperand &>(*Operands[3]).getToken() == ".w"))
+ break;
+ MCInst TmpInst;
+ TmpInst.setOpcode(Inst.getOpcode() == ARM::t2ADDri ?
+ ARM::tADDi8 : ARM::tSUBi8);
+ TmpInst.addOperand(Inst.getOperand(0));
+ TmpInst.addOperand(Inst.getOperand(5));
+ TmpInst.addOperand(Inst.getOperand(0));
+ TmpInst.addOperand(Inst.getOperand(2));
+ TmpInst.addOperand(Inst.getOperand(3));
+ TmpInst.addOperand(Inst.getOperand(4));
+ Inst = TmpInst;
+ return true;
+ }
+ case ARM::t2ADDrr: {
+ // If the destination and first source operand are the same, and
+ // there's no setting of the flags, use encoding T2 instead of T3.
+ // Note that this is only for ADD, not SUB. This mirrors the system
+ // 'as' behaviour. Also take advantage of ADD being commutative.
+ // Make sure the wide encoding wasn't explicit.
+ bool Swap = false;
+ auto DestReg = Inst.getOperand(0).getReg();
+ bool Transform = DestReg == Inst.getOperand(1).getReg();
+ if (!Transform && DestReg == Inst.getOperand(2).getReg()) {
+ Transform = true;
+ Swap = true;
+ }
+ if (!Transform ||
+ Inst.getOperand(5).getReg() != 0 ||
+ (static_cast<ARMOperand &>(*Operands[3]).isToken() &&
+ static_cast<ARMOperand &>(*Operands[3]).getToken() == ".w"))
+ break;
+ MCInst TmpInst;
+ TmpInst.setOpcode(ARM::tADDhirr);
+ TmpInst.addOperand(Inst.getOperand(0));
+ TmpInst.addOperand(Inst.getOperand(0));
+ TmpInst.addOperand(Inst.getOperand(Swap ? 1 : 2));
+ TmpInst.addOperand(Inst.getOperand(3));
+ TmpInst.addOperand(Inst.getOperand(4));
+ Inst = TmpInst;
+ return true;
+ }
+ case ARM::tADDrSP: {
+ // If the non-SP source operand and the destination operand are not the
+ // same, we need to use the 32-bit encoding if it's available.
+ if (Inst.getOperand(0).getReg() != Inst.getOperand(2).getReg()) {
+ Inst.setOpcode(ARM::t2ADDrr);
+ Inst.addOperand(MCOperand::createReg(0)); // cc_out
+ return true;
+ }
+ break;
+ }
+ case ARM::tB:
+ // A Thumb conditional branch outside of an IT block is a tBcc.
+ if (Inst.getOperand(1).getImm() != ARMCC::AL && !inITBlock()) {
+ Inst.setOpcode(ARM::tBcc);
+ return true;
+ }
+ break;
+ case ARM::t2B:
+ // A Thumb2 conditional branch outside of an IT block is a t2Bcc.
+ if (Inst.getOperand(1).getImm() != ARMCC::AL && !inITBlock()){
+ Inst.setOpcode(ARM::t2Bcc);
+ return true;
+ }
+ break;
+ case ARM::t2Bcc:
+ // If the conditional is AL or we're in an IT block, we really want t2B.
+ if (Inst.getOperand(1).getImm() == ARMCC::AL || inITBlock()) {
+ Inst.setOpcode(ARM::t2B);
+ return true;
+ }
+ break;
+ case ARM::tBcc:
+ // If the conditional is AL, we really want tB.
+ if (Inst.getOperand(1).getImm() == ARMCC::AL) {
+ Inst.setOpcode(ARM::tB);
+ return true;
+ }
+ break;
+ case ARM::tLDMIA: {
+ // If the register list contains any high registers, or if the writeback
+ // doesn't match what tLDMIA can do, we need to use the 32-bit encoding
+ // instead if we're in Thumb2. Otherwise, this should have generated
+ // an error in validateInstruction().
+ unsigned Rn = Inst.getOperand(0).getReg();
+ bool hasWritebackToken =
+ (static_cast<ARMOperand &>(*Operands[3]).isToken() &&
+ static_cast<ARMOperand &>(*Operands[3]).getToken() == "!");
+ bool listContainsBase;
+ if (checkLowRegisterList(Inst, 3, Rn, 0, listContainsBase) ||
+ (!listContainsBase && !hasWritebackToken) ||
+ (listContainsBase && hasWritebackToken)) {
+ // 16-bit encoding isn't sufficient. Switch to the 32-bit version.
+ assert (isThumbTwo());
+ Inst.setOpcode(hasWritebackToken ? ARM::t2LDMIA_UPD : ARM::t2LDMIA);
+ // If we're switching to the updating version, we need to insert
+ // the writeback tied operand.
+ if (hasWritebackToken)
+ Inst.insert(Inst.begin(),
+ MCOperand::createReg(Inst.getOperand(0).getReg()));
+ return true;
+ }
+ break;
+ }
+ case ARM::tSTMIA_UPD: {
+ // If the register list contains any high registers, we need to use
+ // the 32-bit encoding instead if we're in Thumb2. Otherwise, this
+ // should have generated an error in validateInstruction().
+ unsigned Rn = Inst.getOperand(0).getReg();
+ bool listContainsBase;
+ if (checkLowRegisterList(Inst, 4, Rn, 0, listContainsBase)) {
+ // 16-bit encoding isn't sufficient. Switch to the 32-bit version.
+ assert (isThumbTwo());
+ Inst.setOpcode(ARM::t2STMIA_UPD);
+ return true;
+ }
+ break;
+ }
+ case ARM::tPOP: {
+ bool listContainsBase;
+ // If the register list contains any high registers, we need to use
+ // the 32-bit encoding instead if we're in Thumb2. Otherwise, this
+ // should have generated an error in validateInstruction().
+ if (!checkLowRegisterList(Inst, 2, 0, ARM::PC, listContainsBase))
+ return false;
+ assert (isThumbTwo());
+ Inst.setOpcode(ARM::t2LDMIA_UPD);
+ // Add the base register and writeback operands.
+ Inst.insert(Inst.begin(), MCOperand::createReg(ARM::SP));
+ Inst.insert(Inst.begin(), MCOperand::createReg(ARM::SP));
+ return true;
+ }
+ case ARM::tPUSH: {
+ bool listContainsBase;
+ if (!checkLowRegisterList(Inst, 2, 0, ARM::LR, listContainsBase))
+ return false;
+ assert (isThumbTwo());
+ Inst.setOpcode(ARM::t2STMDB_UPD);
+ // Add the base register and writeback operands.
+ Inst.insert(Inst.begin(), MCOperand::createReg(ARM::SP));
+ Inst.insert(Inst.begin(), MCOperand::createReg(ARM::SP));
+ return true;
+ }
+ case ARM::t2MOVi: {
+ // If we can use the 16-bit encoding and the user didn't explicitly
+ // request the 32-bit variant, transform it here.
+ if (isARMLowRegister(Inst.getOperand(0).getReg()) &&
+ (unsigned)Inst.getOperand(1).getImm() <= 255 &&
+ ((!inITBlock() && Inst.getOperand(2).getImm() == ARMCC::AL &&
+ Inst.getOperand(4).getReg() == ARM::CPSR) ||
+ (inITBlock() && Inst.getOperand(4).getReg() == 0)) &&
+ (!static_cast<ARMOperand &>(*Operands[2]).isToken() ||
+ static_cast<ARMOperand &>(*Operands[2]).getToken() != ".w")) {
+ // The operands aren't in the same order for tMOVi8...
+ MCInst TmpInst;
+ TmpInst.setOpcode(ARM::tMOVi8);
+ TmpInst.addOperand(Inst.getOperand(0));
+ TmpInst.addOperand(Inst.getOperand(4));
+ TmpInst.addOperand(Inst.getOperand(1));
+ TmpInst.addOperand(Inst.getOperand(2));
+ TmpInst.addOperand(Inst.getOperand(3));
+ Inst = TmpInst;
+ return true;
+ }
+ break;
+ }
+ case ARM::t2MOVr: {
+ // If we can use the 16-bit encoding and the user didn't explicitly
+ // request the 32-bit variant, transform it here.
+ if (isARMLowRegister(Inst.getOperand(0).getReg()) &&
+ isARMLowRegister(Inst.getOperand(1).getReg()) &&
+ Inst.getOperand(2).getImm() == ARMCC::AL &&
+ Inst.getOperand(4).getReg() == ARM::CPSR &&
+ (!static_cast<ARMOperand &>(*Operands[2]).isToken() ||
+ static_cast<ARMOperand &>(*Operands[2]).getToken() != ".w")) {
+ // The operands aren't the same for tMOV[S]r... (no cc_out)
+ MCInst TmpInst;
+ TmpInst.setOpcode(Inst.getOperand(4).getReg() ? ARM::tMOVSr : ARM::tMOVr);
+ TmpInst.addOperand(Inst.getOperand(0));
+ TmpInst.addOperand(Inst.getOperand(1));
+ TmpInst.addOperand(Inst.getOperand(2));
+ TmpInst.addOperand(Inst.getOperand(3));
+ Inst = TmpInst;
+ return true;
+ }
+ break;
+ }
+ case ARM::t2SXTH:
+ case ARM::t2SXTB:
+ case ARM::t2UXTH:
+ case ARM::t2UXTB: {
+ // If we can use the 16-bit encoding and the user didn't explicitly
+ // request the 32-bit variant, transform it here.
+ if (isARMLowRegister(Inst.getOperand(0).getReg()) &&
+ isARMLowRegister(Inst.getOperand(1).getReg()) &&
+ Inst.getOperand(2).getImm() == 0 &&
+ (!static_cast<ARMOperand &>(*Operands[2]).isToken() ||
+ static_cast<ARMOperand &>(*Operands[2]).getToken() != ".w")) {
+ unsigned NewOpc;
+ switch (Inst.getOpcode()) {
+ default: llvm_unreachable("Illegal opcode!");
+ case ARM::t2SXTH: NewOpc = ARM::tSXTH; break;
+ case ARM::t2SXTB: NewOpc = ARM::tSXTB; break;
+ case ARM::t2UXTH: NewOpc = ARM::tUXTH; break;
+ case ARM::t2UXTB: NewOpc = ARM::tUXTB; break;
+ }
+ // The operands aren't the same for thumb1 (no rotate operand).
+ MCInst TmpInst;
+ TmpInst.setOpcode(NewOpc);
+ TmpInst.addOperand(Inst.getOperand(0));
+ TmpInst.addOperand(Inst.getOperand(1));
+ TmpInst.addOperand(Inst.getOperand(3));
+ TmpInst.addOperand(Inst.getOperand(4));
+ Inst = TmpInst;
+ return true;
+ }
+ break;
+ }
+ case ARM::MOVsi: {
+ ARM_AM::ShiftOpc SOpc = ARM_AM::getSORegShOp(Inst.getOperand(2).getImm());
+ // rrx shifts and asr/lsr of #32 is encoded as 0
+ if (SOpc == ARM_AM::rrx || SOpc == ARM_AM::asr || SOpc == ARM_AM::lsr)
+ return false;
+ if (ARM_AM::getSORegOffset(Inst.getOperand(2).getImm()) == 0) {
+ // Shifting by zero is accepted as a vanilla 'MOVr'
+ MCInst TmpInst;
+ TmpInst.setOpcode(ARM::MOVr);
+ TmpInst.addOperand(Inst.getOperand(0));
+ TmpInst.addOperand(Inst.getOperand(1));
+ TmpInst.addOperand(Inst.getOperand(3));
+ TmpInst.addOperand(Inst.getOperand(4));
+ TmpInst.addOperand(Inst.getOperand(5));
+ Inst = TmpInst;
+ return true;
+ }
+ return false;
+ }
+ case ARM::ANDrsi:
+ case ARM::ORRrsi:
+ case ARM::EORrsi:
+ case ARM::BICrsi:
+ case ARM::SUBrsi:
+ case ARM::ADDrsi: {
+ unsigned newOpc;
+ ARM_AM::ShiftOpc SOpc = ARM_AM::getSORegShOp(Inst.getOperand(3).getImm());
+ if (SOpc == ARM_AM::rrx) return false;
+ switch (Inst.getOpcode()) {
+ default: llvm_unreachable("unexpected opcode!");
+ case ARM::ANDrsi: newOpc = ARM::ANDrr; break;
+ case ARM::ORRrsi: newOpc = ARM::ORRrr; break;
+ case ARM::EORrsi: newOpc = ARM::EORrr; break;
+ case ARM::BICrsi: newOpc = ARM::BICrr; break;
+ case ARM::SUBrsi: newOpc = ARM::SUBrr; break;
+ case ARM::ADDrsi: newOpc = ARM::ADDrr; break;
+ }
+ // If the shift is by zero, use the non-shifted instruction definition.
+ // The exception is for right shifts, where 0 == 32
+ if (ARM_AM::getSORegOffset(Inst.getOperand(3).getImm()) == 0 &&
+ !(SOpc == ARM_AM::lsr || SOpc == ARM_AM::asr)) {
+ MCInst TmpInst;
+ TmpInst.setOpcode(newOpc);
+ TmpInst.addOperand(Inst.getOperand(0));
+ TmpInst.addOperand(Inst.getOperand(1));
+ TmpInst.addOperand(Inst.getOperand(2));
+ TmpInst.addOperand(Inst.getOperand(4));
+ TmpInst.addOperand(Inst.getOperand(5));
+ TmpInst.addOperand(Inst.getOperand(6));
+ Inst = TmpInst;
+ return true;
+ }
+ return false;
+ }
+ case ARM::ITasm:
+ case ARM::t2IT: {
+ // The mask bits for all but the first condition are represented as
+ // the low bit of the condition code value implies 't'. We currently
+ // always have 1 implies 't', so XOR toggle the bits if the low bit
+ // of the condition code is zero.
+ MCOperand &MO = Inst.getOperand(1);
+ unsigned Mask = MO.getImm();
+ unsigned OrigMask = Mask;
+ unsigned TZ = countTrailingZeros(Mask);
+ if ((Inst.getOperand(0).getImm() & 1) == 0) {
+ assert(Mask && TZ <= 3 && "illegal IT mask value!");
+ Mask ^= (0xE << TZ) & 0xF;
+ }
+ MO.setImm(Mask);
+
+ // Set up the IT block state according to the IT instruction we just
+ // matched.
+ assert(!inITBlock() && "nested IT blocks?!");
+ ITState.Cond = ARMCC::CondCodes(Inst.getOperand(0).getImm());
+ ITState.Mask = OrigMask; // Use the original mask, not the updated one.
+ ITState.CurPosition = 0;
+ ITState.FirstCond = true;
+ break;
+ }
+ case ARM::t2LSLrr:
+ case ARM::t2LSRrr:
+ case ARM::t2ASRrr:
+ case ARM::t2SBCrr:
+ case ARM::t2RORrr:
+ case ARM::t2BICrr:
+ {
+ // Assemblers should use the narrow encodings of these instructions when permissible.
+ if ((isARMLowRegister(Inst.getOperand(1).getReg()) &&
+ isARMLowRegister(Inst.getOperand(2).getReg())) &&
+ Inst.getOperand(0).getReg() == Inst.getOperand(1).getReg() &&
+ ((!inITBlock() && Inst.getOperand(5).getReg() == ARM::CPSR) ||
+ (inITBlock() && Inst.getOperand(5).getReg() != ARM::CPSR)) &&
+ (!static_cast<ARMOperand &>(*Operands[3]).isToken() ||
+ !static_cast<ARMOperand &>(*Operands[3]).getToken().equals_lower(
+ ".w"))) {
+ unsigned NewOpc;
+ switch (Inst.getOpcode()) {
+ default: llvm_unreachable("unexpected opcode");
+ case ARM::t2LSLrr: NewOpc = ARM::tLSLrr; break;
+ case ARM::t2LSRrr: NewOpc = ARM::tLSRrr; break;
+ case ARM::t2ASRrr: NewOpc = ARM::tASRrr; break;
+ case ARM::t2SBCrr: NewOpc = ARM::tSBC; break;
+ case ARM::t2RORrr: NewOpc = ARM::tROR; break;
+ case ARM::t2BICrr: NewOpc = ARM::tBIC; break;
+ }
+ MCInst TmpInst;
+ TmpInst.setOpcode(NewOpc);
+ TmpInst.addOperand(Inst.getOperand(0));
+ TmpInst.addOperand(Inst.getOperand(5));
+ TmpInst.addOperand(Inst.getOperand(1));
+ TmpInst.addOperand(Inst.getOperand(2));
+ TmpInst.addOperand(Inst.getOperand(3));
+ TmpInst.addOperand(Inst.getOperand(4));
+ Inst = TmpInst;
+ return true;
+ }
+ return false;
+ }
+ case ARM::t2ANDrr:
+ case ARM::t2EORrr:
+ case ARM::t2ADCrr:
+ case ARM::t2ORRrr:
+ {
+ // Assemblers should use the narrow encodings of these instructions when permissible.
+ // These instructions are special in that they are commutable, so shorter encodings
+ // are available more often.
+ if ((isARMLowRegister(Inst.getOperand(1).getReg()) &&
+ isARMLowRegister(Inst.getOperand(2).getReg())) &&
+ (Inst.getOperand(0).getReg() == Inst.getOperand(1).getReg() ||
+ Inst.getOperand(0).getReg() == Inst.getOperand(2).getReg()) &&
+ ((!inITBlock() && Inst.getOperand(5).getReg() == ARM::CPSR) ||
+ (inITBlock() && Inst.getOperand(5).getReg() != ARM::CPSR)) &&
+ (!static_cast<ARMOperand &>(*Operands[3]).isToken() ||
+ !static_cast<ARMOperand &>(*Operands[3]).getToken().equals_lower(
+ ".w"))) {
+ unsigned NewOpc;
+ switch (Inst.getOpcode()) {
+ default: llvm_unreachable("unexpected opcode");
+ case ARM::t2ADCrr: NewOpc = ARM::tADC; break;
+ case ARM::t2ANDrr: NewOpc = ARM::tAND; break;
+ case ARM::t2EORrr: NewOpc = ARM::tEOR; break;
+ case ARM::t2ORRrr: NewOpc = ARM::tORR; break;
+ }
+ MCInst TmpInst;
+ TmpInst.setOpcode(NewOpc);
+ TmpInst.addOperand(Inst.getOperand(0));
+ TmpInst.addOperand(Inst.getOperand(5));
+ if (Inst.getOperand(0).getReg() == Inst.getOperand(1).getReg()) {
+ TmpInst.addOperand(Inst.getOperand(1));
+ TmpInst.addOperand(Inst.getOperand(2));
+ } else {
+ TmpInst.addOperand(Inst.getOperand(2));
+ TmpInst.addOperand(Inst.getOperand(1));
+ }
+ TmpInst.addOperand(Inst.getOperand(3));
+ TmpInst.addOperand(Inst.getOperand(4));
+ Inst = TmpInst;
+ return true;
+ }
+ return false;
+ }
+ }
+ return false;
+}
+
+unsigned ARMAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
+ // 16-bit thumb arithmetic instructions either require or preclude the 'S'
+ // suffix depending on whether they're in an IT block or not.
+ unsigned Opc = Inst.getOpcode();
+ const MCInstrDesc &MCID = MII.get(Opc);
+ if (MCID.TSFlags & ARMII::ThumbArithFlagSetting) {
+ assert(MCID.hasOptionalDef() &&
+ "optionally flag setting instruction missing optional def operand");
+ assert(MCID.NumOperands == Inst.getNumOperands() &&
+ "operand count mismatch!");
+ // Find the optional-def operand (cc_out).
+ unsigned OpNo;
+ for (OpNo = 0;
+ !MCID.OpInfo[OpNo].isOptionalDef() && OpNo < MCID.NumOperands;
+ ++OpNo)
+ ;
+ // If we're parsing Thumb1, reject it completely.
+ if (isThumbOne() && Inst.getOperand(OpNo).getReg() != ARM::CPSR)
+ return Match_MnemonicFail;
+ // If we're parsing Thumb2, which form is legal depends on whether we're
+ // in an IT block.
+ if (isThumbTwo() && Inst.getOperand(OpNo).getReg() != ARM::CPSR &&
+ !inITBlock())
+ return Match_RequiresITBlock;
+ if (isThumbTwo() && Inst.getOperand(OpNo).getReg() == ARM::CPSR &&
+ inITBlock())
+ return Match_RequiresNotITBlock;
+ } else if (isThumbOne()) {
+ // Some high-register supporting Thumb1 encodings only allow both registers
+ // to be from r0-r7 when in Thumb2.
+ if (Opc == ARM::tADDhirr && !hasV6MOps() &&
+ isARMLowRegister(Inst.getOperand(1).getReg()) &&
+ isARMLowRegister(Inst.getOperand(2).getReg()))
+ return Match_RequiresThumb2;
+ // Others only require ARMv6 or later.
+ else if (Opc == ARM::tMOVr && !hasV6Ops() &&
+ isARMLowRegister(Inst.getOperand(0).getReg()) &&
+ isARMLowRegister(Inst.getOperand(1).getReg()))
+ return Match_RequiresV6;
+ }
+
+ for (unsigned I = 0; I < MCID.NumOperands; ++I)
+ if (MCID.OpInfo[I].RegClass == ARM::rGPRRegClassID) {
+ // rGPRRegClass excludes PC, and also excluded SP before ARMv8
+ if ((Inst.getOperand(I).getReg() == ARM::SP) && !hasV8Ops())
+ return Match_RequiresV8;
+ else if (Inst.getOperand(I).getReg() == ARM::PC)
+ return Match_InvalidOperand;
+ }
+
+ return Match_Success;
+}
+
+namespace llvm {
+template <> inline bool IsCPSRDead<MCInst>(MCInst *Instr) {
+ return true; // In an assembly source, no need to second-guess
+}
+}
+
+static const char *getSubtargetFeatureName(uint64_t Val);
+bool ARMAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
+ OperandVector &Operands,
+ MCStreamer &Out, uint64_t &ErrorInfo,
+ bool MatchingInlineAsm) {
+ MCInst Inst;
+ unsigned MatchResult;
+
+ MatchResult = MatchInstructionImpl(Operands, Inst, ErrorInfo,
+ MatchingInlineAsm);
+ switch (MatchResult) {
+ case Match_Success:
+ // Context sensitive operand constraints aren't handled by the matcher,
+ // so check them here.
+ if (validateInstruction(Inst, Operands)) {
+ // Still progress the IT block, otherwise one wrong condition causes
+ // nasty cascading errors.
+ forwardITPosition();
+ return true;
+ }
+
+ { // processInstruction() updates inITBlock state, we need to save it away
+ bool wasInITBlock = inITBlock();
+
+ // Some instructions need post-processing to, for example, tweak which
+ // encoding is selected. Loop on it while changes happen so the
+ // individual transformations can chain off each other. E.g.,
+ // tPOP(r8)->t2LDMIA_UPD(sp,r8)->t2STR_POST(sp,r8)
+ while (processInstruction(Inst, Operands, Out))
+ ;
+
+ // Only after the instruction is fully processed, we can validate it
+ if (wasInITBlock && hasV8Ops() && isThumb() &&
+ !isV8EligibleForIT(&Inst)) {
+ Warning(IDLoc, "deprecated instruction in IT block");
+ }
+ }
+
+ // Only move forward at the very end so that everything in validate
+ // and process gets a consistent answer about whether we're in an IT
+ // block.
+ forwardITPosition();
+
+ // ITasm is an ARM mode pseudo-instruction that just sets the ITblock and
+ // doesn't actually encode.
+ if (Inst.getOpcode() == ARM::ITasm)
+ return false;
+
+ Inst.setLoc(IDLoc);
+ Out.EmitInstruction(Inst, getSTI());
+ return false;
+ case Match_MissingFeature: {
+ assert(ErrorInfo && "Unknown missing feature!");
+ // Special case the error message for the very common case where only
+ // a single subtarget feature is missing (Thumb vs. ARM, e.g.).
+ std::string Msg = "instruction requires:";
+ uint64_t Mask = 1;
+ for (unsigned i = 0; i < (sizeof(ErrorInfo)*8-1); ++i) {
+ if (ErrorInfo & Mask) {
+ Msg += " ";
+ Msg += getSubtargetFeatureName(ErrorInfo & Mask);
+ }
+ Mask <<= 1;
+ }
+ return Error(IDLoc, Msg);
+ }
+ case Match_InvalidOperand: {
+ SMLoc ErrorLoc = IDLoc;
+ if (ErrorInfo != ~0ULL) {
+ if (ErrorInfo >= Operands.size())
+ return Error(IDLoc, "too few operands for instruction");
+
+ ErrorLoc = ((ARMOperand &)*Operands[ErrorInfo]).getStartLoc();
+ if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc;
+ }
+
+ return Error(ErrorLoc, "invalid operand for instruction");
+ }
+ case Match_MnemonicFail:
+ return Error(IDLoc, "invalid instruction",
+ ((ARMOperand &)*Operands[0]).getLocRange());
+ case Match_RequiresNotITBlock:
+ return Error(IDLoc, "flag setting instruction only valid outside IT block");
+ case Match_RequiresITBlock:
+ return Error(IDLoc, "instruction only valid inside IT block");
+ case Match_RequiresV6:
+ return Error(IDLoc, "instruction variant requires ARMv6 or later");
+ case Match_RequiresThumb2:
+ return Error(IDLoc, "instruction variant requires Thumb2");
+ case Match_RequiresV8:
+ return Error(IDLoc, "instruction variant requires ARMv8 or later");
+ case Match_ImmRange0_15: {
+ SMLoc ErrorLoc = ((ARMOperand &)*Operands[ErrorInfo]).getStartLoc();
+ if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc;
+ return Error(ErrorLoc, "immediate operand must be in the range [0,15]");
+ }
+ case Match_ImmRange0_239: {
+ SMLoc ErrorLoc = ((ARMOperand &)*Operands[ErrorInfo]).getStartLoc();
+ if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc;
+ return Error(ErrorLoc, "immediate operand must be in the range [0,239]");
+ }
+ case Match_AlignedMemoryRequiresNone:
+ case Match_DupAlignedMemoryRequiresNone:
+ case Match_AlignedMemoryRequires16:
+ case Match_DupAlignedMemoryRequires16:
+ case Match_AlignedMemoryRequires32:
+ case Match_DupAlignedMemoryRequires32:
+ case Match_AlignedMemoryRequires64:
+ case Match_DupAlignedMemoryRequires64:
+ case Match_AlignedMemoryRequires64or128:
+ case Match_DupAlignedMemoryRequires64or128:
+ case Match_AlignedMemoryRequires64or128or256:
+ {
+ SMLoc ErrorLoc = ((ARMOperand &)*Operands[ErrorInfo]).getAlignmentLoc();
+ if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc;
+ switch (MatchResult) {
+ default:
+ llvm_unreachable("Missing Match_Aligned type");
+ case Match_AlignedMemoryRequiresNone:
+ case Match_DupAlignedMemoryRequiresNone:
+ return Error(ErrorLoc, "alignment must be omitted");
+ case Match_AlignedMemoryRequires16:
+ case Match_DupAlignedMemoryRequires16:
+ return Error(ErrorLoc, "alignment must be 16 or omitted");
+ case Match_AlignedMemoryRequires32:
+ case Match_DupAlignedMemoryRequires32:
+ return Error(ErrorLoc, "alignment must be 32 or omitted");
+ case Match_AlignedMemoryRequires64:
+ case Match_DupAlignedMemoryRequires64:
+ return Error(ErrorLoc, "alignment must be 64 or omitted");
+ case Match_AlignedMemoryRequires64or128:
+ case Match_DupAlignedMemoryRequires64or128:
+ return Error(ErrorLoc, "alignment must be 64, 128 or omitted");
+ case Match_AlignedMemoryRequires64or128or256:
+ return Error(ErrorLoc, "alignment must be 64, 128, 256 or omitted");
+ }
+ }
+ }
+
+ llvm_unreachable("Implement any new match types added!");
+}
+
+/// parseDirective parses the arm specific directives
+bool ARMAsmParser::ParseDirective(AsmToken DirectiveID) {
+ const MCObjectFileInfo::Environment Format =
+ getContext().getObjectFileInfo()->getObjectFileType();
+ bool IsMachO = Format == MCObjectFileInfo::IsMachO;
+ bool IsCOFF = Format == MCObjectFileInfo::IsCOFF;
+
+ StringRef IDVal = DirectiveID.getIdentifier();
+ if (IDVal == ".word")
+ return parseLiteralValues(4, DirectiveID.getLoc());
+ else if (IDVal == ".short" || IDVal == ".hword")
+ return parseLiteralValues(2, DirectiveID.getLoc());
+ else if (IDVal == ".thumb")
+ return parseDirectiveThumb(DirectiveID.getLoc());
+ else if (IDVal == ".arm")
+ return parseDirectiveARM(DirectiveID.getLoc());
+ else if (IDVal == ".thumb_func")
+ return parseDirectiveThumbFunc(DirectiveID.getLoc());
+ else if (IDVal == ".code")
+ return parseDirectiveCode(DirectiveID.getLoc());
+ else if (IDVal == ".syntax")
+ return parseDirectiveSyntax(DirectiveID.getLoc());
+ else if (IDVal == ".unreq")
+ return parseDirectiveUnreq(DirectiveID.getLoc());
+ else if (IDVal == ".fnend")
+ return parseDirectiveFnEnd(DirectiveID.getLoc());
+ else if (IDVal == ".cantunwind")
+ return parseDirectiveCantUnwind(DirectiveID.getLoc());
+ else if (IDVal == ".personality")
+ return parseDirectivePersonality(DirectiveID.getLoc());
+ else if (IDVal == ".handlerdata")
+ return parseDirectiveHandlerData(DirectiveID.getLoc());
+ else if (IDVal == ".setfp")
+ return parseDirectiveSetFP(DirectiveID.getLoc());
+ else if (IDVal == ".pad")
+ return parseDirectivePad(DirectiveID.getLoc());
+ else if (IDVal == ".save")
+ return parseDirectiveRegSave(DirectiveID.getLoc(), false);
+ else if (IDVal == ".vsave")
+ return parseDirectiveRegSave(DirectiveID.getLoc(), true);
+ else if (IDVal == ".ltorg" || IDVal == ".pool")
+ return parseDirectiveLtorg(DirectiveID.getLoc());
+ else if (IDVal == ".even")
+ return parseDirectiveEven(DirectiveID.getLoc());
+ else if (IDVal == ".personalityindex")
+ return parseDirectivePersonalityIndex(DirectiveID.getLoc());
+ else if (IDVal == ".unwind_raw")
+ return parseDirectiveUnwindRaw(DirectiveID.getLoc());
+ else if (IDVal == ".movsp")
+ return parseDirectiveMovSP(DirectiveID.getLoc());
+ else if (IDVal == ".arch_extension")
+ return parseDirectiveArchExtension(DirectiveID.getLoc());
+ else if (IDVal == ".align")
+ return parseDirectiveAlign(DirectiveID.getLoc());
+ else if (IDVal == ".thumb_set")
+ return parseDirectiveThumbSet(DirectiveID.getLoc());
+
+ if (!IsMachO && !IsCOFF) {
+ if (IDVal == ".arch")
+ return parseDirectiveArch(DirectiveID.getLoc());
+ else if (IDVal == ".cpu")
+ return parseDirectiveCPU(DirectiveID.getLoc());
+ else if (IDVal == ".eabi_attribute")
+ return parseDirectiveEabiAttr(DirectiveID.getLoc());
+ else if (IDVal == ".fpu")
+ return parseDirectiveFPU(DirectiveID.getLoc());
+ else if (IDVal == ".fnstart")
+ return parseDirectiveFnStart(DirectiveID.getLoc());
+ else if (IDVal == ".inst")
+ return parseDirectiveInst(DirectiveID.getLoc());
+ else if (IDVal == ".inst.n")
+ return parseDirectiveInst(DirectiveID.getLoc(), 'n');
+ else if (IDVal == ".inst.w")
+ return parseDirectiveInst(DirectiveID.getLoc(), 'w');
+ else if (IDVal == ".object_arch")
+ return parseDirectiveObjectArch(DirectiveID.getLoc());
+ else if (IDVal == ".tlsdescseq")
+ return parseDirectiveTLSDescSeq(DirectiveID.getLoc());
+ }
+
+ return true;
+}
+
+/// parseLiteralValues
+/// ::= .hword expression [, expression]*
+/// ::= .short expression [, expression]*
+/// ::= .word expression [, expression]*
+bool ARMAsmParser::parseLiteralValues(unsigned Size, SMLoc L) {
+ MCAsmParser &Parser = getParser();
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ for (;;) {
+ const MCExpr *Value;
+ if (getParser().parseExpression(Value)) {
+ Parser.eatToEndOfStatement();
+ return false;
+ }
+
+ getParser().getStreamer().EmitValue(Value, Size, L);
+
+ if (getLexer().is(AsmToken::EndOfStatement))
+ break;
+
+ // FIXME: Improve diagnostic.
+ if (getLexer().isNot(AsmToken::Comma)) {
+ Error(L, "unexpected token in directive");
+ return false;
+ }
+ Parser.Lex();
+ }
+ }
+
+ Parser.Lex();
+ return false;
+}
+
+/// parseDirectiveThumb
+/// ::= .thumb
+bool ARMAsmParser::parseDirectiveThumb(SMLoc L) {
+ MCAsmParser &Parser = getParser();
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ Error(L, "unexpected token in directive");
+ return false;
+ }
+ Parser.Lex();
+
+ if (!hasThumb()) {
+ Error(L, "target does not support Thumb mode");
+ return false;
+ }
+
+ if (!isThumb())
+ SwitchMode();
+
+ getParser().getStreamer().EmitAssemblerFlag(MCAF_Code16);
+ return false;
+}
+
+/// parseDirectiveARM
+/// ::= .arm
+bool ARMAsmParser::parseDirectiveARM(SMLoc L) {
+ MCAsmParser &Parser = getParser();
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ Error(L, "unexpected token in directive");
+ return false;
+ }
+ Parser.Lex();
+
+ if (!hasARM()) {
+ Error(L, "target does not support ARM mode");
+ return false;
+ }
+
+ if (isThumb())
+ SwitchMode();
+
+ getParser().getStreamer().EmitAssemblerFlag(MCAF_Code32);
+ return false;
+}
+
+void ARMAsmParser::onLabelParsed(MCSymbol *Symbol) {
+ if (NextSymbolIsThumb) {
+ getParser().getStreamer().EmitThumbFunc(Symbol);
+ NextSymbolIsThumb = false;
+ }
+}
+
+/// parseDirectiveThumbFunc
+/// ::= .thumbfunc symbol_name
+bool ARMAsmParser::parseDirectiveThumbFunc(SMLoc L) {
+ MCAsmParser &Parser = getParser();
+ const auto Format = getContext().getObjectFileInfo()->getObjectFileType();
+ bool IsMachO = Format == MCObjectFileInfo::IsMachO;
+
+ // Darwin asm has (optionally) function name after .thumb_func direction
+ // ELF doesn't
+ if (IsMachO) {
+ const AsmToken &Tok = Parser.getTok();
+ if (Tok.isNot(AsmToken::EndOfStatement)) {
+ if (Tok.isNot(AsmToken::Identifier) && Tok.isNot(AsmToken::String)) {
+ Error(L, "unexpected token in .thumb_func directive");
+ return false;
+ }
+
+ MCSymbol *Func =
+ getParser().getContext().getOrCreateSymbol(Tok.getIdentifier());
+ getParser().getStreamer().EmitThumbFunc(Func);
+ Parser.Lex(); // Consume the identifier token.
+ return false;
+ }
+ }
+
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ Error(Parser.getTok().getLoc(), "unexpected token in directive");
+ Parser.eatToEndOfStatement();
+ return false;
+ }
+
+ NextSymbolIsThumb = true;
+ return false;
+}
+
+/// parseDirectiveSyntax
+/// ::= .syntax unified | divided
+bool ARMAsmParser::parseDirectiveSyntax(SMLoc L) {
+ MCAsmParser &Parser = getParser();
+ const AsmToken &Tok = Parser.getTok();
+ if (Tok.isNot(AsmToken::Identifier)) {
+ Error(L, "unexpected token in .syntax directive");
+ return false;
+ }
+
+ StringRef Mode = Tok.getString();
+ if (Mode == "unified" || Mode == "UNIFIED") {
+ Parser.Lex();
+ } else if (Mode == "divided" || Mode == "DIVIDED") {
+ Error(L, "'.syntax divided' arm asssembly not supported");
+ return false;
+ } else {
+ Error(L, "unrecognized syntax mode in .syntax directive");
+ return false;
+ }
+
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ Error(Parser.getTok().getLoc(), "unexpected token in directive");
+ return false;
+ }
+ Parser.Lex();
+
+ // TODO tell the MC streamer the mode
+ // getParser().getStreamer().Emit???();
+ return false;
+}
+
+/// parseDirectiveCode
+/// ::= .code 16 | 32
+bool ARMAsmParser::parseDirectiveCode(SMLoc L) {
+ MCAsmParser &Parser = getParser();
+ const AsmToken &Tok = Parser.getTok();
+ if (Tok.isNot(AsmToken::Integer)) {
+ Error(L, "unexpected token in .code directive");
+ return false;
+ }
+ int64_t Val = Parser.getTok().getIntVal();
+ if (Val != 16 && Val != 32) {
+ Error(L, "invalid operand to .code directive");
+ return false;
+ }
+ Parser.Lex();
+
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ Error(Parser.getTok().getLoc(), "unexpected token in directive");
+ return false;
+ }
+ Parser.Lex();
+
+ if (Val == 16) {
+ if (!hasThumb()) {
+ Error(L, "target does not support Thumb mode");
+ return false;
+ }
+
+ if (!isThumb())
+ SwitchMode();
+ getParser().getStreamer().EmitAssemblerFlag(MCAF_Code16);
+ } else {
+ if (!hasARM()) {
+ Error(L, "target does not support ARM mode");
+ return false;
+ }
+
+ if (isThumb())
+ SwitchMode();
+ getParser().getStreamer().EmitAssemblerFlag(MCAF_Code32);
+ }
+
+ return false;
+}
+
+/// parseDirectiveReq
+/// ::= name .req registername
+bool ARMAsmParser::parseDirectiveReq(StringRef Name, SMLoc L) {
+ MCAsmParser &Parser = getParser();
+ Parser.Lex(); // Eat the '.req' token.
+ unsigned Reg;
+ SMLoc SRegLoc, ERegLoc;
+ if (ParseRegister(Reg, SRegLoc, ERegLoc)) {
+ Parser.eatToEndOfStatement();
+ Error(SRegLoc, "register name expected");
+ return false;
+ }
+
+ // Shouldn't be anything else.
+ if (Parser.getTok().isNot(AsmToken::EndOfStatement)) {
+ Parser.eatToEndOfStatement();
+ Error(Parser.getTok().getLoc(), "unexpected input in .req directive.");
+ return false;
+ }
+
+ Parser.Lex(); // Consume the EndOfStatement
+
+ if (RegisterReqs.insert(std::make_pair(Name, Reg)).first->second != Reg) {
+ Error(SRegLoc, "redefinition of '" + Name + "' does not match original.");
+ return false;
+ }
+
+ return false;
+}
+
+/// parseDirectiveUneq
+/// ::= .unreq registername
+bool ARMAsmParser::parseDirectiveUnreq(SMLoc L) {
+ MCAsmParser &Parser = getParser();
+ if (Parser.getTok().isNot(AsmToken::Identifier)) {
+ Parser.eatToEndOfStatement();
+ Error(L, "unexpected input in .unreq directive.");
+ return false;
+ }
+ RegisterReqs.erase(Parser.getTok().getIdentifier().lower());
+ Parser.Lex(); // Eat the identifier.
+ return false;
+}
+
+/// parseDirectiveArch
+/// ::= .arch token
+bool ARMAsmParser::parseDirectiveArch(SMLoc L) {
+ StringRef Arch = getParser().parseStringToEndOfStatement().trim();
+
+ unsigned ID = ARM::parseArch(Arch);
+
+ if (ID == ARM::AK_INVALID) {
+ Error(L, "Unknown arch name");
+ return false;
+ }
+
+ Triple T;
+ MCSubtargetInfo &STI = copySTI();
+ STI.setDefaultFeatures("", ("+" + ARM::getArchName(ID)).str());
+ setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
+
+ getTargetStreamer().emitArch(ID);
+ return false;
+}
+
+/// parseDirectiveEabiAttr
+/// ::= .eabi_attribute int, int [, "str"]
+/// ::= .eabi_attribute Tag_name, int [, "str"]
+bool ARMAsmParser::parseDirectiveEabiAttr(SMLoc L) {
+ MCAsmParser &Parser = getParser();
+ int64_t Tag;
+ SMLoc TagLoc;
+ TagLoc = Parser.getTok().getLoc();
+ if (Parser.getTok().is(AsmToken::Identifier)) {
+ StringRef Name = Parser.getTok().getIdentifier();
+ Tag = ARMBuildAttrs::AttrTypeFromString(Name);
+ if (Tag == -1) {
+ Error(TagLoc, "attribute name not recognised: " + Name);
+ Parser.eatToEndOfStatement();
+ return false;
+ }
+ Parser.Lex();
+ } else {
+ const MCExpr *AttrExpr;
+
+ TagLoc = Parser.getTok().getLoc();
+ if (Parser.parseExpression(AttrExpr)) {
+ Parser.eatToEndOfStatement();
+ return false;
+ }
+
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(AttrExpr);
+ if (!CE) {
+ Error(TagLoc, "expected numeric constant");
+ Parser.eatToEndOfStatement();
+ return false;
+ }
+
+ Tag = CE->getValue();
+ }
+
+ if (Parser.getTok().isNot(AsmToken::Comma)) {
+ Error(Parser.getTok().getLoc(), "comma expected");
+ Parser.eatToEndOfStatement();
+ return false;
+ }
+ Parser.Lex(); // skip comma
+
+ StringRef StringValue = "";
+ bool IsStringValue = false;
+
+ int64_t IntegerValue = 0;
+ bool IsIntegerValue = false;
+
+ if (Tag == ARMBuildAttrs::CPU_raw_name || Tag == ARMBuildAttrs::CPU_name)
+ IsStringValue = true;
+ else if (Tag == ARMBuildAttrs::compatibility) {
+ IsStringValue = true;
+ IsIntegerValue = true;
+ } else if (Tag < 32 || Tag % 2 == 0)
+ IsIntegerValue = true;
+ else if (Tag % 2 == 1)
+ IsStringValue = true;
+ else
+ llvm_unreachable("invalid tag type");
+
+ if (IsIntegerValue) {
+ const MCExpr *ValueExpr;
+ SMLoc ValueExprLoc = Parser.getTok().getLoc();
+ if (Parser.parseExpression(ValueExpr)) {
+ Parser.eatToEndOfStatement();
+ return false;
+ }
+
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(ValueExpr);
+ if (!CE) {
+ Error(ValueExprLoc, "expected numeric constant");
+ Parser.eatToEndOfStatement();
+ return false;
+ }
+
+ IntegerValue = CE->getValue();
+ }
+
+ if (Tag == ARMBuildAttrs::compatibility) {
+ if (Parser.getTok().isNot(AsmToken::Comma))
+ IsStringValue = false;
+ if (Parser.getTok().isNot(AsmToken::Comma)) {
+ Error(Parser.getTok().getLoc(), "comma expected");
+ Parser.eatToEndOfStatement();
+ return false;
+ } else {
+ Parser.Lex();
+ }
+ }
+
+ if (IsStringValue) {
+ if (Parser.getTok().isNot(AsmToken::String)) {
+ Error(Parser.getTok().getLoc(), "bad string constant");
+ Parser.eatToEndOfStatement();
+ return false;
+ }
+
+ StringValue = Parser.getTok().getStringContents();
+ Parser.Lex();
+ }
+
+ if (IsIntegerValue && IsStringValue) {
+ assert(Tag == ARMBuildAttrs::compatibility);
+ getTargetStreamer().emitIntTextAttribute(Tag, IntegerValue, StringValue);
+ } else if (IsIntegerValue)
+ getTargetStreamer().emitAttribute(Tag, IntegerValue);
+ else if (IsStringValue)
+ getTargetStreamer().emitTextAttribute(Tag, StringValue);
+ return false;
+}
+
+/// parseDirectiveCPU
+/// ::= .cpu str
+bool ARMAsmParser::parseDirectiveCPU(SMLoc L) {
+ StringRef CPU = getParser().parseStringToEndOfStatement().trim();
+ getTargetStreamer().emitTextAttribute(ARMBuildAttrs::CPU_name, CPU);
+
+ // FIXME: This is using table-gen data, but should be moved to
+ // ARMTargetParser once that is table-gen'd.
+ if (!getSTI().isCPUStringValid(CPU)) {
+ Error(L, "Unknown CPU name");
+ return false;
+ }
+
+ MCSubtargetInfo &STI = copySTI();
+ STI.setDefaultFeatures(CPU, "");
+ setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
+
+ return false;
+}
+/// parseDirectiveFPU
+/// ::= .fpu str
+bool ARMAsmParser::parseDirectiveFPU(SMLoc L) {
+ SMLoc FPUNameLoc = getTok().getLoc();
+ StringRef FPU = getParser().parseStringToEndOfStatement().trim();
+
+ unsigned ID = ARM::parseFPU(FPU);
+ std::vector<const char *> Features;
+ if (!ARM::getFPUFeatures(ID, Features)) {
+ Error(FPUNameLoc, "Unknown FPU name");
+ return false;
+ }
+
+ MCSubtargetInfo &STI = copySTI();
+ for (auto Feature : Features)
+ STI.ApplyFeatureFlag(Feature);
+ setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
+
+ getTargetStreamer().emitFPU(ID);
+ return false;
+}
+
+/// parseDirectiveFnStart
+/// ::= .fnstart
+bool ARMAsmParser::parseDirectiveFnStart(SMLoc L) {
+ if (UC.hasFnStart()) {
+ Error(L, ".fnstart starts before the end of previous one");
+ UC.emitFnStartLocNotes();
+ return false;
+ }
+
+ // Reset the unwind directives parser state
+ UC.reset();
+
+ getTargetStreamer().emitFnStart();
+
+ UC.recordFnStart(L);
+ return false;
+}
+
+/// parseDirectiveFnEnd
+/// ::= .fnend
+bool ARMAsmParser::parseDirectiveFnEnd(SMLoc L) {
+ // Check the ordering of unwind directives
+ if (!UC.hasFnStart()) {
+ Error(L, ".fnstart must precede .fnend directive");
+ return false;
+ }
+
+ // Reset the unwind directives parser state
+ getTargetStreamer().emitFnEnd();
+
+ UC.reset();
+ return false;
+}
+
+/// parseDirectiveCantUnwind
+/// ::= .cantunwind
+bool ARMAsmParser::parseDirectiveCantUnwind(SMLoc L) {
+ UC.recordCantUnwind(L);
+
+ // Check the ordering of unwind directives
+ if (!UC.hasFnStart()) {
+ Error(L, ".fnstart must precede .cantunwind directive");
+ return false;
+ }
+ if (UC.hasHandlerData()) {
+ Error(L, ".cantunwind can't be used with .handlerdata directive");
+ UC.emitHandlerDataLocNotes();
+ return false;
+ }
+ if (UC.hasPersonality()) {
+ Error(L, ".cantunwind can't be used with .personality directive");
+ UC.emitPersonalityLocNotes();
+ return false;
+ }
+
+ getTargetStreamer().emitCantUnwind();
+ return false;
+}
+
+/// parseDirectivePersonality
+/// ::= .personality name
+bool ARMAsmParser::parseDirectivePersonality(SMLoc L) {
+ MCAsmParser &Parser = getParser();
+ bool HasExistingPersonality = UC.hasPersonality();
+
+ UC.recordPersonality(L);
+
+ // Check the ordering of unwind directives
+ if (!UC.hasFnStart()) {
+ Error(L, ".fnstart must precede .personality directive");
+ return false;
+ }
+ if (UC.cantUnwind()) {
+ Error(L, ".personality can't be used with .cantunwind directive");
+ UC.emitCantUnwindLocNotes();
+ return false;
+ }
+ if (UC.hasHandlerData()) {
+ Error(L, ".personality must precede .handlerdata directive");
+ UC.emitHandlerDataLocNotes();
+ return false;
+ }
+ if (HasExistingPersonality) {
+ Parser.eatToEndOfStatement();
+ Error(L, "multiple personality directives");
+ UC.emitPersonalityLocNotes();
+ return false;
+ }
+
+ // Parse the name of the personality routine
+ if (Parser.getTok().isNot(AsmToken::Identifier)) {
+ Parser.eatToEndOfStatement();
+ Error(L, "unexpected input in .personality directive.");
+ return false;
+ }
+ StringRef Name(Parser.getTok().getIdentifier());
+ Parser.Lex();
+
+ MCSymbol *PR = getParser().getContext().getOrCreateSymbol(Name);
+ getTargetStreamer().emitPersonality(PR);
+ return false;
+}
+
+/// parseDirectiveHandlerData
+/// ::= .handlerdata
+bool ARMAsmParser::parseDirectiveHandlerData(SMLoc L) {
+ UC.recordHandlerData(L);
+
+ // Check the ordering of unwind directives
+ if (!UC.hasFnStart()) {
+ Error(L, ".fnstart must precede .personality directive");
+ return false;
+ }
+ if (UC.cantUnwind()) {
+ Error(L, ".handlerdata can't be used with .cantunwind directive");
+ UC.emitCantUnwindLocNotes();
+ return false;
+ }
+
+ getTargetStreamer().emitHandlerData();
+ return false;
+}
+
+/// parseDirectiveSetFP
+/// ::= .setfp fpreg, spreg [, offset]
+bool ARMAsmParser::parseDirectiveSetFP(SMLoc L) {
+ MCAsmParser &Parser = getParser();
+ // Check the ordering of unwind directives
+ if (!UC.hasFnStart()) {
+ Error(L, ".fnstart must precede .setfp directive");
+ return false;
+ }
+ if (UC.hasHandlerData()) {
+ Error(L, ".setfp must precede .handlerdata directive");
+ return false;
+ }
+
+ // Parse fpreg
+ SMLoc FPRegLoc = Parser.getTok().getLoc();
+ int FPReg = tryParseRegister();
+ if (FPReg == -1) {
+ Error(FPRegLoc, "frame pointer register expected");
+ return false;
+ }
+
+ // Consume comma
+ if (Parser.getTok().isNot(AsmToken::Comma)) {
+ Error(Parser.getTok().getLoc(), "comma expected");
+ return false;
+ }
+ Parser.Lex(); // skip comma
+
+ // Parse spreg
+ SMLoc SPRegLoc = Parser.getTok().getLoc();
+ int SPReg = tryParseRegister();
+ if (SPReg == -1) {
+ Error(SPRegLoc, "stack pointer register expected");
+ return false;
+ }
+
+ if (SPReg != ARM::SP && SPReg != UC.getFPReg()) {
+ Error(SPRegLoc, "register should be either $sp or the latest fp register");
+ return false;
+ }
+
+ // Update the frame pointer register
+ UC.saveFPReg(FPReg);
+
+ // Parse offset
+ int64_t Offset = 0;
+ if (Parser.getTok().is(AsmToken::Comma)) {
+ Parser.Lex(); // skip comma
+
+ if (Parser.getTok().isNot(AsmToken::Hash) &&
+ Parser.getTok().isNot(AsmToken::Dollar)) {
+ Error(Parser.getTok().getLoc(), "'#' expected");
+ return false;
+ }
+ Parser.Lex(); // skip hash token.
+
+ const MCExpr *OffsetExpr;
+ SMLoc ExLoc = Parser.getTok().getLoc();
+ SMLoc EndLoc;
+ if (getParser().parseExpression(OffsetExpr, EndLoc)) {
+ Error(ExLoc, "malformed setfp offset");
+ return false;
+ }
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(OffsetExpr);
+ if (!CE) {
+ Error(ExLoc, "setfp offset must be an immediate");
+ return false;
+ }
+
+ Offset = CE->getValue();
+ }
+
+ getTargetStreamer().emitSetFP(static_cast<unsigned>(FPReg),
+ static_cast<unsigned>(SPReg), Offset);
+ return false;
+}
+
+/// parseDirective
+/// ::= .pad offset
+bool ARMAsmParser::parseDirectivePad(SMLoc L) {
+ MCAsmParser &Parser = getParser();
+ // Check the ordering of unwind directives
+ if (!UC.hasFnStart()) {
+ Error(L, ".fnstart must precede .pad directive");
+ return false;
+ }
+ if (UC.hasHandlerData()) {
+ Error(L, ".pad must precede .handlerdata directive");
+ return false;
+ }
+
+ // Parse the offset
+ if (Parser.getTok().isNot(AsmToken::Hash) &&
+ Parser.getTok().isNot(AsmToken::Dollar)) {
+ Error(Parser.getTok().getLoc(), "'#' expected");
+ return false;
+ }
+ Parser.Lex(); // skip hash token.
+
+ const MCExpr *OffsetExpr;
+ SMLoc ExLoc = Parser.getTok().getLoc();
+ SMLoc EndLoc;
+ if (getParser().parseExpression(OffsetExpr, EndLoc)) {
+ Error(ExLoc, "malformed pad offset");
+ return false;
+ }
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(OffsetExpr);
+ if (!CE) {
+ Error(ExLoc, "pad offset must be an immediate");
+ return false;
+ }
+
+ getTargetStreamer().emitPad(CE->getValue());
+ return false;
+}
+
+/// parseDirectiveRegSave
+/// ::= .save { registers }
+/// ::= .vsave { registers }
+bool ARMAsmParser::parseDirectiveRegSave(SMLoc L, bool IsVector) {
+ // Check the ordering of unwind directives
+ if (!UC.hasFnStart()) {
+ Error(L, ".fnstart must precede .save or .vsave directives");
+ return false;
+ }
+ if (UC.hasHandlerData()) {
+ Error(L, ".save or .vsave must precede .handlerdata directive");
+ return false;
+ }
+
+ // RAII object to make sure parsed operands are deleted.
+ SmallVector<std::unique_ptr<MCParsedAsmOperand>, 1> Operands;
+
+ // Parse the register list
+ if (parseRegisterList(Operands))
+ return false;
+ ARMOperand &Op = (ARMOperand &)*Operands[0];
+ if (!IsVector && !Op.isRegList()) {
+ Error(L, ".save expects GPR registers");
+ return false;
+ }
+ if (IsVector && !Op.isDPRRegList()) {
+ Error(L, ".vsave expects DPR registers");
+ return false;
+ }
+
+ getTargetStreamer().emitRegSave(Op.getRegList(), IsVector);
+ return false;
+}
+
+/// parseDirectiveInst
+/// ::= .inst opcode [, ...]
+/// ::= .inst.n opcode [, ...]
+/// ::= .inst.w opcode [, ...]
+bool ARMAsmParser::parseDirectiveInst(SMLoc Loc, char Suffix) {
+ MCAsmParser &Parser = getParser();
+ int Width;
+
+ if (isThumb()) {
+ switch (Suffix) {
+ case 'n':
+ Width = 2;
+ break;
+ case 'w':
+ Width = 4;
+ break;
+ default:
+ Parser.eatToEndOfStatement();
+ Error(Loc, "cannot determine Thumb instruction size, "
+ "use inst.n/inst.w instead");
+ return false;
+ }
+ } else {
+ if (Suffix) {
+ Parser.eatToEndOfStatement();
+ Error(Loc, "width suffixes are invalid in ARM mode");
+ return false;
+ }
+ Width = 4;
+ }
+
+ if (getLexer().is(AsmToken::EndOfStatement)) {
+ Parser.eatToEndOfStatement();
+ Error(Loc, "expected expression following directive");
+ return false;
+ }
+
+ for (;;) {
+ const MCExpr *Expr;
+
+ if (getParser().parseExpression(Expr)) {
+ Error(Loc, "expected expression");
+ return false;
+ }
+
+ const MCConstantExpr *Value = dyn_cast_or_null<MCConstantExpr>(Expr);
+ if (!Value) {
+ Error(Loc, "expected constant expression");
+ return false;
+ }
+
+ switch (Width) {
+ case 2:
+ if (Value->getValue() > 0xffff) {
+ Error(Loc, "inst.n operand is too big, use inst.w instead");
+ return false;
+ }
+ break;
+ case 4:
+ if (Value->getValue() > 0xffffffff) {
+ Error(Loc,
+ StringRef(Suffix ? "inst.w" : "inst") + " operand is too big");
+ return false;
+ }
+ break;
+ default:
+ llvm_unreachable("only supported widths are 2 and 4");
+ }
+
+ getTargetStreamer().emitInst(Value->getValue(), Suffix);
+
+ if (getLexer().is(AsmToken::EndOfStatement))
+ break;
+
+ if (getLexer().isNot(AsmToken::Comma)) {
+ Error(Loc, "unexpected token in directive");
+ return false;
+ }
+
+ Parser.Lex();
+ }
+
+ Parser.Lex();
+ return false;
+}
+
+/// parseDirectiveLtorg
+/// ::= .ltorg | .pool
+bool ARMAsmParser::parseDirectiveLtorg(SMLoc L) {
+ getTargetStreamer().emitCurrentConstantPool();
+ return false;
+}
+
+bool ARMAsmParser::parseDirectiveEven(SMLoc L) {
+ const MCSection *Section = getStreamer().getCurrentSection().first;
+
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ TokError("unexpected token in directive");
+ return false;
+ }
+
+ if (!Section) {
+ getStreamer().InitSections(false);
+ Section = getStreamer().getCurrentSection().first;
+ }
+
+ assert(Section && "must have section to emit alignment");
+ if (Section->UseCodeAlign())
+ getStreamer().EmitCodeAlignment(2);
+ else
+ getStreamer().EmitValueToAlignment(2);
+
+ return false;
+}
+
+/// parseDirectivePersonalityIndex
+/// ::= .personalityindex index
+bool ARMAsmParser::parseDirectivePersonalityIndex(SMLoc L) {
+ MCAsmParser &Parser = getParser();
+ bool HasExistingPersonality = UC.hasPersonality();
+
+ UC.recordPersonalityIndex(L);
+
+ if (!UC.hasFnStart()) {
+ Parser.eatToEndOfStatement();
+ Error(L, ".fnstart must precede .personalityindex directive");
+ return false;
+ }
+ if (UC.cantUnwind()) {
+ Parser.eatToEndOfStatement();
+ Error(L, ".personalityindex cannot be used with .cantunwind");
+ UC.emitCantUnwindLocNotes();
+ return false;
+ }
+ if (UC.hasHandlerData()) {
+ Parser.eatToEndOfStatement();
+ Error(L, ".personalityindex must precede .handlerdata directive");
+ UC.emitHandlerDataLocNotes();
+ return false;
+ }
+ if (HasExistingPersonality) {
+ Parser.eatToEndOfStatement();
+ Error(L, "multiple personality directives");
+ UC.emitPersonalityLocNotes();
+ return false;
+ }
+
+ const MCExpr *IndexExpression;
+ SMLoc IndexLoc = Parser.getTok().getLoc();
+ if (Parser.parseExpression(IndexExpression)) {
+ Parser.eatToEndOfStatement();
+ return false;
+ }
+
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(IndexExpression);
+ if (!CE) {
+ Parser.eatToEndOfStatement();
+ Error(IndexLoc, "index must be a constant number");
+ return false;
+ }
+ if (CE->getValue() < 0 ||
+ CE->getValue() >= ARM::EHABI::NUM_PERSONALITY_INDEX) {
+ Parser.eatToEndOfStatement();
+ Error(IndexLoc, "personality routine index should be in range [0-3]");
+ return false;
+ }
+
+ getTargetStreamer().emitPersonalityIndex(CE->getValue());
+ return false;
+}
+
+/// parseDirectiveUnwindRaw
+/// ::= .unwind_raw offset, opcode [, opcode...]
+bool ARMAsmParser::parseDirectiveUnwindRaw(SMLoc L) {
+ MCAsmParser &Parser = getParser();
+ if (!UC.hasFnStart()) {
+ Parser.eatToEndOfStatement();
+ Error(L, ".fnstart must precede .unwind_raw directives");
+ return false;
+ }
+
+ int64_t StackOffset;
+
+ const MCExpr *OffsetExpr;
+ SMLoc OffsetLoc = getLexer().getLoc();
+ if (getLexer().is(AsmToken::EndOfStatement) ||
+ getParser().parseExpression(OffsetExpr)) {
+ Error(OffsetLoc, "expected expression");
+ Parser.eatToEndOfStatement();
+ return false;
+ }
+
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(OffsetExpr);
+ if (!CE) {
+ Error(OffsetLoc, "offset must be a constant");
+ Parser.eatToEndOfStatement();
+ return false;
+ }
+
+ StackOffset = CE->getValue();
+
+ if (getLexer().isNot(AsmToken::Comma)) {
+ Error(getLexer().getLoc(), "expected comma");
+ Parser.eatToEndOfStatement();
+ return false;
+ }
+ Parser.Lex();
+
+ SmallVector<uint8_t, 16> Opcodes;
+ for (;;) {
+ const MCExpr *OE;
+
+ SMLoc OpcodeLoc = getLexer().getLoc();
+ if (getLexer().is(AsmToken::EndOfStatement) || Parser.parseExpression(OE)) {
+ Error(OpcodeLoc, "expected opcode expression");
+ Parser.eatToEndOfStatement();
+ return false;
+ }
+
+ const MCConstantExpr *OC = dyn_cast<MCConstantExpr>(OE);
+ if (!OC) {
+ Error(OpcodeLoc, "opcode value must be a constant");
+ Parser.eatToEndOfStatement();
+ return false;
+ }
+
+ const int64_t Opcode = OC->getValue();
+ if (Opcode & ~0xff) {
+ Error(OpcodeLoc, "invalid opcode");
+ Parser.eatToEndOfStatement();
+ return false;
+ }
+
+ Opcodes.push_back(uint8_t(Opcode));
+
+ if (getLexer().is(AsmToken::EndOfStatement))
+ break;
+
+ if (getLexer().isNot(AsmToken::Comma)) {
+ Error(getLexer().getLoc(), "unexpected token in directive");
+ Parser.eatToEndOfStatement();
+ return false;
+ }
+
+ Parser.Lex();
+ }
+
+ getTargetStreamer().emitUnwindRaw(StackOffset, Opcodes);
+
+ Parser.Lex();
+ return false;
+}
+
+/// parseDirectiveTLSDescSeq
+/// ::= .tlsdescseq tls-variable
+bool ARMAsmParser::parseDirectiveTLSDescSeq(SMLoc L) {
+ MCAsmParser &Parser = getParser();
+
+ if (getLexer().isNot(AsmToken::Identifier)) {
+ TokError("expected variable after '.tlsdescseq' directive");
+ Parser.eatToEndOfStatement();
+ return false;
+ }
+
+ const MCSymbolRefExpr *SRE =
+ MCSymbolRefExpr::create(Parser.getTok().getIdentifier(),
+ MCSymbolRefExpr::VK_ARM_TLSDESCSEQ, getContext());
+ Lex();
+
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ Error(Parser.getTok().getLoc(), "unexpected token");
+ Parser.eatToEndOfStatement();
+ return false;
+ }
+
+ getTargetStreamer().AnnotateTLSDescriptorSequence(SRE);
+ return false;
+}
+
+/// parseDirectiveMovSP
+/// ::= .movsp reg [, #offset]
+bool ARMAsmParser::parseDirectiveMovSP(SMLoc L) {
+ MCAsmParser &Parser = getParser();
+ if (!UC.hasFnStart()) {
+ Parser.eatToEndOfStatement();
+ Error(L, ".fnstart must precede .movsp directives");
+ return false;
+ }
+ if (UC.getFPReg() != ARM::SP) {
+ Parser.eatToEndOfStatement();
+ Error(L, "unexpected .movsp directive");
+ return false;
+ }
+
+ SMLoc SPRegLoc = Parser.getTok().getLoc();
+ int SPReg = tryParseRegister();
+ if (SPReg == -1) {
+ Parser.eatToEndOfStatement();
+ Error(SPRegLoc, "register expected");
+ return false;
+ }
+
+ if (SPReg == ARM::SP || SPReg == ARM::PC) {
+ Parser.eatToEndOfStatement();
+ Error(SPRegLoc, "sp and pc are not permitted in .movsp directive");
+ return false;
+ }
+
+ int64_t Offset = 0;
+ if (Parser.getTok().is(AsmToken::Comma)) {
+ Parser.Lex();
+
+ if (Parser.getTok().isNot(AsmToken::Hash)) {
+ Error(Parser.getTok().getLoc(), "expected #constant");
+ Parser.eatToEndOfStatement();
+ return false;
+ }
+ Parser.Lex();
+
+ const MCExpr *OffsetExpr;
+ SMLoc OffsetLoc = Parser.getTok().getLoc();
+ if (Parser.parseExpression(OffsetExpr)) {
+ Parser.eatToEndOfStatement();
+ Error(OffsetLoc, "malformed offset expression");
+ return false;
+ }
+
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(OffsetExpr);
+ if (!CE) {
+ Parser.eatToEndOfStatement();
+ Error(OffsetLoc, "offset must be an immediate constant");
+ return false;
+ }
+
+ Offset = CE->getValue();
+ }
+
+ getTargetStreamer().emitMovSP(SPReg, Offset);
+ UC.saveFPReg(SPReg);
+
+ return false;
+}
+
+/// parseDirectiveObjectArch
+/// ::= .object_arch name
+bool ARMAsmParser::parseDirectiveObjectArch(SMLoc L) {
+ MCAsmParser &Parser = getParser();
+ if (getLexer().isNot(AsmToken::Identifier)) {
+ Error(getLexer().getLoc(), "unexpected token");
+ Parser.eatToEndOfStatement();
+ return false;
+ }
+
+ StringRef Arch = Parser.getTok().getString();
+ SMLoc ArchLoc = Parser.getTok().getLoc();
+ getLexer().Lex();
+
+ unsigned ID = ARM::parseArch(Arch);
+
+ if (ID == ARM::AK_INVALID) {
+ Error(ArchLoc, "unknown architecture '" + Arch + "'");
+ Parser.eatToEndOfStatement();
+ return false;
+ }
+
+ getTargetStreamer().emitObjectArch(ID);
+
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ Error(getLexer().getLoc(), "unexpected token");
+ Parser.eatToEndOfStatement();
+ }
+
+ return false;
+}
+
+/// parseDirectiveAlign
+/// ::= .align
+bool ARMAsmParser::parseDirectiveAlign(SMLoc L) {
+ // NOTE: if this is not the end of the statement, fall back to the target
+ // agnostic handling for this directive which will correctly handle this.
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return true;
+
+ // '.align' is target specifically handled to mean 2**2 byte alignment.
+ if (getStreamer().getCurrentSection().first->UseCodeAlign())
+ getStreamer().EmitCodeAlignment(4, 0);
+ else
+ getStreamer().EmitValueToAlignment(4, 0, 1, 0);
+
+ return false;
+}
+
+/// parseDirectiveThumbSet
+/// ::= .thumb_set name, value
+bool ARMAsmParser::parseDirectiveThumbSet(SMLoc L) {
+ MCAsmParser &Parser = getParser();
+
+ StringRef Name;
+ if (Parser.parseIdentifier(Name)) {
+ TokError("expected identifier after '.thumb_set'");
+ Parser.eatToEndOfStatement();
+ return false;
+ }
+
+ if (getLexer().isNot(AsmToken::Comma)) {
+ TokError("expected comma after name '" + Name + "'");
+ Parser.eatToEndOfStatement();
+ return false;
+ }
+ Lex();
+
+ MCSymbol *Sym;
+ const MCExpr *Value;
+ if (MCParserUtils::parseAssignmentExpression(Name, /* allow_redef */ true,
+ Parser, Sym, Value))
+ return true;
+
+ getTargetStreamer().emitThumbSet(Sym, Value);
+ return false;
+}
+
+/// Force static initialization.
+extern "C" void LLVMInitializeARMAsmParser() {
+ RegisterMCAsmParser<ARMAsmParser> X(TheARMLETarget);
+ RegisterMCAsmParser<ARMAsmParser> Y(TheARMBETarget);
+ RegisterMCAsmParser<ARMAsmParser> A(TheThumbLETarget);
+ RegisterMCAsmParser<ARMAsmParser> B(TheThumbBETarget);
+}
+
+#define GET_REGISTER_MATCHER
+#define GET_SUBTARGET_FEATURE_NAME
+#define GET_MATCHER_IMPLEMENTATION
+#include "ARMGenAsmMatcher.inc"
+
+// FIXME: This structure should be moved inside ARMTargetParser
+// when we start to table-generate them, and we can use the ARM
+// flags below, that were generated by table-gen.
+static const struct {
+ const unsigned Kind;
+ const uint64_t ArchCheck;
+ const FeatureBitset Features;
+} Extensions[] = {
+ { ARM::AEK_CRC, Feature_HasV8, {ARM::FeatureCRC} },
+ { ARM::AEK_CRYPTO, Feature_HasV8,
+ {ARM::FeatureCrypto, ARM::FeatureNEON, ARM::FeatureFPARMv8} },
+ { ARM::AEK_FP, Feature_HasV8, {ARM::FeatureFPARMv8} },
+ { (ARM::AEK_HWDIV | ARM::AEK_HWDIVARM), Feature_HasV7 | Feature_IsNotMClass,
+ {ARM::FeatureHWDiv, ARM::FeatureHWDivARM} },
+ { ARM::AEK_MP, Feature_HasV7 | Feature_IsNotMClass, {ARM::FeatureMP} },
+ { ARM::AEK_SIMD, Feature_HasV8, {ARM::FeatureNEON, ARM::FeatureFPARMv8} },
+ { ARM::AEK_SEC, Feature_HasV6K, {ARM::FeatureTrustZone} },
+ // FIXME: Only available in A-class, isel not predicated
+ { ARM::AEK_VIRT, Feature_HasV7, {ARM::FeatureVirtualization} },
+ { ARM::AEK_FP16, Feature_HasV8_2a, {ARM::FeatureFPARMv8, ARM::FeatureFullFP16} },
+ // FIXME: Unsupported extensions.
+ { ARM::AEK_OS, Feature_None, {} },
+ { ARM::AEK_IWMMXT, Feature_None, {} },
+ { ARM::AEK_IWMMXT2, Feature_None, {} },
+ { ARM::AEK_MAVERICK, Feature_None, {} },
+ { ARM::AEK_XSCALE, Feature_None, {} },
+};
+
+/// parseDirectiveArchExtension
+/// ::= .arch_extension [no]feature
+bool ARMAsmParser::parseDirectiveArchExtension(SMLoc L) {
+ MCAsmParser &Parser = getParser();
+
+ if (getLexer().isNot(AsmToken::Identifier)) {
+ Error(getLexer().getLoc(), "unexpected token");
+ Parser.eatToEndOfStatement();
+ return false;
+ }
+
+ StringRef Name = Parser.getTok().getString();
+ SMLoc ExtLoc = Parser.getTok().getLoc();
+ getLexer().Lex();
+
+ bool EnableFeature = true;
+ if (Name.startswith_lower("no")) {
+ EnableFeature = false;
+ Name = Name.substr(2);
+ }
+ unsigned FeatureKind = ARM::parseArchExt(Name);
+ if (FeatureKind == ARM::AEK_INVALID)
+ Error(ExtLoc, "unknown architectural extension: " + Name);
+
+ for (const auto &Extension : Extensions) {
+ if (Extension.Kind != FeatureKind)
+ continue;
+
+ if (Extension.Features.none())
+ report_fatal_error("unsupported architectural extension: " + Name);
+
+ if ((getAvailableFeatures() & Extension.ArchCheck) != Extension.ArchCheck) {
+ Error(ExtLoc, "architectural extension '" + Name + "' is not "
+ "allowed for the current base architecture");
+ return false;
+ }
+
+ MCSubtargetInfo &STI = copySTI();
+ FeatureBitset ToggleFeatures = EnableFeature
+ ? (~STI.getFeatureBits() & Extension.Features)
+ : ( STI.getFeatureBits() & Extension.Features);
+
+ uint64_t Features =
+ ComputeAvailableFeatures(STI.ToggleFeature(ToggleFeatures));
+ setAvailableFeatures(Features);
+ return false;
+ }
+
+ Error(ExtLoc, "unknown architectural extension: " + Name);
+ Parser.eatToEndOfStatement();
+ return false;
+}
+
+// Define this matcher function after the auto-generated include so we
+// have the match class enum definitions.
+unsigned ARMAsmParser::validateTargetOperandClass(MCParsedAsmOperand &AsmOp,
+ unsigned Kind) {
+ ARMOperand &Op = static_cast<ARMOperand &>(AsmOp);
+ // If the kind is a token for a literal immediate, check if our asm
+ // operand matches. This is for InstAliases which have a fixed-value
+ // immediate in the syntax.
+ switch (Kind) {
+ default: break;
+ case MCK__35_0:
+ if (Op.isImm())
+ if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Op.getImm()))
+ if (CE->getValue() == 0)
+ return Match_Success;
+ break;
+ case MCK_ModImm:
+ if (Op.isImm()) {
+ const MCExpr *SOExpr = Op.getImm();
+ int64_t Value;
+ if (!SOExpr->evaluateAsAbsolute(Value))
+ return Match_Success;
+ assert((Value >= INT32_MIN && Value <= UINT32_MAX) &&
+ "expression value must be representable in 32 bits");
+ }
+ break;
+ case MCK_rGPR:
+ if (hasV8Ops() && Op.isReg() && Op.getReg() == ARM::SP)
+ return Match_Success;
+ break;
+ case MCK_GPRPair:
+ if (Op.isReg() &&
+ MRI->getRegClass(ARM::GPRRegClassID).contains(Op.getReg()))
+ return Match_Success;
+ break;
+ }
+ return Match_InvalidOperand;
+}
diff --git a/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
new file mode 100644
index 0000000..e63defe
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
@@ -0,0 +1,5223 @@
+//===-- ARMDisassembler.cpp - Disassembler for ARM/Thumb ISA --------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCDisassembler.h"
+#include "MCTargetDesc/ARMAddressingModes.h"
+#include "MCTargetDesc/ARMBaseInfo.h"
+#include "MCTargetDesc/ARMMCExpr.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCFixedLenDisassembler.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/LEB128.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
+#include <vector>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "arm-disassembler"
+
+typedef MCDisassembler::DecodeStatus DecodeStatus;
+
+namespace {
+ // Handles the condition code status of instructions in IT blocks
+ class ITStatus
+ {
+ public:
+ // Returns the condition code for instruction in IT block
+ unsigned getITCC() {
+ unsigned CC = ARMCC::AL;
+ if (instrInITBlock())
+ CC = ITStates.back();
+ return CC;
+ }
+
+ // Advances the IT block state to the next T or E
+ void advanceITState() {
+ ITStates.pop_back();
+ }
+
+ // Returns true if the current instruction is in an IT block
+ bool instrInITBlock() {
+ return !ITStates.empty();
+ }
+
+ // Returns true if current instruction is the last instruction in an IT block
+ bool instrLastInITBlock() {
+ return ITStates.size() == 1;
+ }
+
+ // Called when decoding an IT instruction. Sets the IT state for the following
+ // instructions that for the IT block. Firstcond and Mask correspond to the
+ // fields in the IT instruction encoding.
+ void setITState(char Firstcond, char Mask) {
+ // (3 - the number of trailing zeros) is the number of then / else.
+ unsigned CondBit0 = Firstcond & 1;
+ unsigned NumTZ = countTrailingZeros<uint8_t>(Mask);
+ unsigned char CCBits = static_cast<unsigned char>(Firstcond & 0xf);
+ assert(NumTZ <= 3 && "Invalid IT mask!");
+ // push condition codes onto the stack the correct order for the pops
+ for (unsigned Pos = NumTZ+1; Pos <= 3; ++Pos) {
+ bool T = ((Mask >> Pos) & 1) == CondBit0;
+ if (T)
+ ITStates.push_back(CCBits);
+ else
+ ITStates.push_back(CCBits ^ 1);
+ }
+ ITStates.push_back(CCBits);
+ }
+
+ private:
+ std::vector<unsigned char> ITStates;
+ };
+}
+
+namespace {
+/// ARM disassembler for all ARM platforms.
+class ARMDisassembler : public MCDisassembler {
+public:
+ ARMDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx) :
+ MCDisassembler(STI, Ctx) {
+ }
+
+ ~ARMDisassembler() override {}
+
+ DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size,
+ ArrayRef<uint8_t> Bytes, uint64_t Address,
+ raw_ostream &VStream,
+ raw_ostream &CStream) const override;
+};
+
+/// Thumb disassembler for all Thumb platforms.
+class ThumbDisassembler : public MCDisassembler {
+public:
+ ThumbDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx) :
+ MCDisassembler(STI, Ctx) {
+ }
+
+ ~ThumbDisassembler() override {}
+
+ DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size,
+ ArrayRef<uint8_t> Bytes, uint64_t Address,
+ raw_ostream &VStream,
+ raw_ostream &CStream) const override;
+
+private:
+ mutable ITStatus ITBlock;
+ DecodeStatus AddThumbPredicate(MCInst&) const;
+ void UpdateThumbVFPPredicate(MCInst&) const;
+};
+}
+
+static bool Check(DecodeStatus &Out, DecodeStatus In) {
+ switch (In) {
+ case MCDisassembler::Success:
+ // Out stays the same.
+ return true;
+ case MCDisassembler::SoftFail:
+ Out = In;
+ return true;
+ case MCDisassembler::Fail:
+ Out = In;
+ return false;
+ }
+ llvm_unreachable("Invalid DecodeStatus!");
+}
+
+
+// Forward declare these because the autogenerated code will reference them.
+// Definitions are further down.
+static DecodeStatus DecodeGPRRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeGPRnopcRegisterClass(MCInst &Inst,
+ unsigned RegNo, uint64_t Address,
+ const void *Decoder);
+static DecodeStatus DecodeGPRwithAPSRRegisterClass(MCInst &Inst,
+ unsigned RegNo, uint64_t Address,
+ const void *Decoder);
+static DecodeStatus DecodetGPRRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodetcGPRRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecoderGPRRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeGPRPairRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeSPRRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeDPRRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeDPR_8RegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeDPR_VFP2RegisterClass(MCInst &Inst,
+ unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder);
+static DecodeStatus DecodeQPRRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeDPairRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeDPairSpacedRegisterClass(MCInst &Inst,
+ unsigned RegNo, uint64_t Address,
+ const void *Decoder);
+
+static DecodeStatus DecodePredicateOperand(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeCCOutOperand(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeRegListOperand(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeSPRRegListOperand(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeDPRRegListOperand(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+
+static DecodeStatus DecodeBitfieldMaskOperand(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeCopMemInstruction(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeAddrMode2IdxInstruction(MCInst &Inst,
+ unsigned Insn,
+ uint64_t Address,
+ const void *Decoder);
+static DecodeStatus DecodeSORegMemOperand(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeAddrMode3Instruction(MCInst &Inst,unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeSORegImmOperand(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeSORegRegOperand(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+
+static DecodeStatus DecodeMemMultipleWritebackInstruction(MCInst & Inst,
+ unsigned Insn,
+ uint64_t Adddress,
+ const void *Decoder);
+static DecodeStatus DecodeT2MOVTWInstruction(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeArmMOVTWInstruction(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeSMLAInstruction(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeCPSInstruction(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeTSTInstruction(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeSETPANInstruction(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeT2CPSInstruction(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeAddrModeImm12Operand(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeAddrMode5Operand(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeAddrMode7Operand(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeT2BInstruction(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeBranchImmInstruction(MCInst &Inst,unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeAddrMode6Operand(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeVLDST1Instruction(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeVLDST2Instruction(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeVLDST3Instruction(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeVLDST4Instruction(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeVLDInstruction(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeVSTInstruction(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeVLD1DupInstruction(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeVLD2DupInstruction(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeVLD3DupInstruction(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeVLD4DupInstruction(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeNEONModImmInstruction(MCInst &Inst,unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeVSHLMaxInstruction(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeShiftRight8Imm(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeShiftRight16Imm(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeShiftRight32Imm(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeShiftRight64Imm(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeTBLInstruction(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodePostIdxReg(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeCoprocessor(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeMemBarrierOption(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeInstSyncBarrierOption(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeMSRMask(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeBankedReg(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeDoubleRegLoad(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeDoubleRegStore(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeLDRPreImm(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeLDRPreReg(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeSTRPreImm(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeSTRPreReg(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeVLD1LN(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeVLD2LN(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeVLD3LN(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeVLD4LN(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeVST1LN(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeVST2LN(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeVST3LN(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeVST4LN(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeVMOVSRR(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeVMOVRRS(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeSwap(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeVCVTD(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeVCVTQ(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+
+
+static DecodeStatus DecodeThumbAddSpecialReg(MCInst &Inst, uint16_t Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeThumbBROperand(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeT2BROperand(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeThumbCmpBROperand(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeThumbAddrModeRR(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeThumbAddrModeIS(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeThumbAddrModePC(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeThumbAddrModeSP(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeT2AddrModeSOReg(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeT2LoadShift(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeT2LoadImm8(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void* Decoder);
+static DecodeStatus DecodeT2LoadImm12(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void* Decoder);
+static DecodeStatus DecodeT2LoadT(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void* Decoder);
+static DecodeStatus DecodeT2LoadLabel(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void* Decoder);
+static DecodeStatus DecodeT2Imm8S4(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeT2AddrModeImm8s4(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeT2AddrModeImm0_1020s4(MCInst &Inst,unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeT2Imm8(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeT2AddrModeImm8(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeThumbAddSPImm(MCInst &Inst, uint16_t Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeThumbAddSPReg(MCInst &Inst, uint16_t Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeThumbCPS(MCInst &Inst, uint16_t Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeQADDInstruction(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeThumbBLXOffset(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeT2AddrModeImm12(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeThumbTableBranch(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeThumb2BCCInstruction(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeT2SOImm(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeThumbBCCTargetOperand(MCInst &Inst,unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeThumbBLTargetOperand(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeIT(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeT2LDRDPreInstruction(MCInst &Inst,unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeT2STRDPreInstruction(MCInst &Inst,unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeT2Adr(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeT2LdStPre(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeT2ShifterImmOperand(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+
+static DecodeStatus DecodeLDR(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeMRRC2(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+#include "ARMGenDisassemblerTables.inc"
+
+static MCDisassembler *createARMDisassembler(const Target &T,
+ const MCSubtargetInfo &STI,
+ MCContext &Ctx) {
+ return new ARMDisassembler(STI, Ctx);
+}
+
+static MCDisassembler *createThumbDisassembler(const Target &T,
+ const MCSubtargetInfo &STI,
+ MCContext &Ctx) {
+ return new ThumbDisassembler(STI, Ctx);
+}
+
+// Post-decoding checks
+static DecodeStatus checkDecodedInstruction(MCInst &MI, uint64_t &Size,
+ uint64_t Address, raw_ostream &OS,
+ raw_ostream &CS,
+ uint32_t Insn,
+ DecodeStatus Result)
+{
+ switch (MI.getOpcode()) {
+ case ARM::HVC: {
+ // HVC is undefined if condition = 0xf otherwise upredictable
+ // if condition != 0xe
+ uint32_t Cond = (Insn >> 28) & 0xF;
+ if (Cond == 0xF)
+ return MCDisassembler::Fail;
+ if (Cond != 0xE)
+ return MCDisassembler::SoftFail;
+ return Result;
+ }
+ default: return Result;
+ }
+}
+
+DecodeStatus ARMDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
+ ArrayRef<uint8_t> Bytes,
+ uint64_t Address, raw_ostream &OS,
+ raw_ostream &CS) const {
+ CommentStream = &CS;
+
+ assert(!STI.getFeatureBits()[ARM::ModeThumb] &&
+ "Asked to disassemble an ARM instruction but Subtarget is in Thumb "
+ "mode!");
+
+ // We want to read exactly 4 bytes of data.
+ if (Bytes.size() < 4) {
+ Size = 0;
+ return MCDisassembler::Fail;
+ }
+
+ // Encoded as a small-endian 32-bit word in the stream.
+ uint32_t Insn =
+ (Bytes[3] << 24) | (Bytes[2] << 16) | (Bytes[1] << 8) | (Bytes[0] << 0);
+
+ // Calling the auto-generated decoder function.
+ DecodeStatus Result =
+ decodeInstruction(DecoderTableARM32, MI, Insn, Address, this, STI);
+ if (Result != MCDisassembler::Fail) {
+ Size = 4;
+ return checkDecodedInstruction(MI, Size, Address, OS, CS, Insn, Result);
+ }
+
+ // VFP and NEON instructions, similarly, are shared between ARM
+ // and Thumb modes.
+ Result = decodeInstruction(DecoderTableVFP32, MI, Insn, Address, this, STI);
+ if (Result != MCDisassembler::Fail) {
+ Size = 4;
+ return Result;
+ }
+
+ Result = decodeInstruction(DecoderTableVFPV832, MI, Insn, Address, this, STI);
+ if (Result != MCDisassembler::Fail) {
+ Size = 4;
+ return Result;
+ }
+
+ Result =
+ decodeInstruction(DecoderTableNEONData32, MI, Insn, Address, this, STI);
+ if (Result != MCDisassembler::Fail) {
+ Size = 4;
+ // Add a fake predicate operand, because we share these instruction
+ // definitions with Thumb2 where these instructions are predicable.
+ if (!DecodePredicateOperand(MI, 0xE, Address, this))
+ return MCDisassembler::Fail;
+ return Result;
+ }
+
+ Result = decodeInstruction(DecoderTableNEONLoadStore32, MI, Insn, Address,
+ this, STI);
+ if (Result != MCDisassembler::Fail) {
+ Size = 4;
+ // Add a fake predicate operand, because we share these instruction
+ // definitions with Thumb2 where these instructions are predicable.
+ if (!DecodePredicateOperand(MI, 0xE, Address, this))
+ return MCDisassembler::Fail;
+ return Result;
+ }
+
+ Result =
+ decodeInstruction(DecoderTableNEONDup32, MI, Insn, Address, this, STI);
+ if (Result != MCDisassembler::Fail) {
+ Size = 4;
+ // Add a fake predicate operand, because we share these instruction
+ // definitions with Thumb2 where these instructions are predicable.
+ if (!DecodePredicateOperand(MI, 0xE, Address, this))
+ return MCDisassembler::Fail;
+ return Result;
+ }
+
+ Result =
+ decodeInstruction(DecoderTablev8NEON32, MI, Insn, Address, this, STI);
+ if (Result != MCDisassembler::Fail) {
+ Size = 4;
+ return Result;
+ }
+
+ Result =
+ decodeInstruction(DecoderTablev8Crypto32, MI, Insn, Address, this, STI);
+ if (Result != MCDisassembler::Fail) {
+ Size = 4;
+ return Result;
+ }
+
+ Size = 0;
+ return MCDisassembler::Fail;
+}
+
+namespace llvm {
+extern const MCInstrDesc ARMInsts[];
+}
+
+/// tryAddingSymbolicOperand - trys to add a symbolic operand in place of the
+/// immediate Value in the MCInst. The immediate Value has had any PC
+/// adjustment made by the caller. If the instruction is a branch instruction
+/// then isBranch is true, else false. If the getOpInfo() function was set as
+/// part of the setupForSymbolicDisassembly() call then that function is called
+/// to get any symbolic information at the Address for this instruction. If
+/// that returns non-zero then the symbolic information it returns is used to
+/// create an MCExpr and that is added as an operand to the MCInst. If
+/// getOpInfo() returns zero and isBranch is true then a symbol look up for
+/// Value is done and if a symbol is found an MCExpr is created with that, else
+/// an MCExpr with Value is created. This function returns true if it adds an
+/// operand to the MCInst and false otherwise.
+static bool tryAddingSymbolicOperand(uint64_t Address, int32_t Value,
+ bool isBranch, uint64_t InstSize,
+ MCInst &MI, const void *Decoder) {
+ const MCDisassembler *Dis = static_cast<const MCDisassembler*>(Decoder);
+ // FIXME: Does it make sense for value to be negative?
+ return Dis->tryAddingSymbolicOperand(MI, (uint32_t)Value, Address, isBranch,
+ /* Offset */ 0, InstSize);
+}
+
+/// tryAddingPcLoadReferenceComment - trys to add a comment as to what is being
+/// referenced by a load instruction with the base register that is the Pc.
+/// These can often be values in a literal pool near the Address of the
+/// instruction. The Address of the instruction and its immediate Value are
+/// used as a possible literal pool entry. The SymbolLookUp call back will
+/// return the name of a symbol referenced by the literal pool's entry if
+/// the referenced address is that of a symbol. Or it will return a pointer to
+/// a literal 'C' string if the referenced address of the literal pool's entry
+/// is an address into a section with 'C' string literals.
+static void tryAddingPcLoadReferenceComment(uint64_t Address, int Value,
+ const void *Decoder) {
+ const MCDisassembler *Dis = static_cast<const MCDisassembler*>(Decoder);
+ Dis->tryAddingPcLoadReferenceComment(Value, Address);
+}
+
+// Thumb1 instructions don't have explicit S bits. Rather, they
+// implicitly set CPSR. Since it's not represented in the encoding, the
+// auto-generated decoder won't inject the CPSR operand. We need to fix
+// that as a post-pass.
+static void AddThumb1SBit(MCInst &MI, bool InITBlock) {
+ const MCOperandInfo *OpInfo = ARMInsts[MI.getOpcode()].OpInfo;
+ unsigned short NumOps = ARMInsts[MI.getOpcode()].NumOperands;
+ MCInst::iterator I = MI.begin();
+ for (unsigned i = 0; i < NumOps; ++i, ++I) {
+ if (I == MI.end()) break;
+ if (OpInfo[i].isOptionalDef() && OpInfo[i].RegClass == ARM::CCRRegClassID) {
+ if (i > 0 && OpInfo[i-1].isPredicate()) continue;
+ MI.insert(I, MCOperand::createReg(InITBlock ? 0 : ARM::CPSR));
+ return;
+ }
+ }
+
+ MI.insert(I, MCOperand::createReg(InITBlock ? 0 : ARM::CPSR));
+}
+
+// Most Thumb instructions don't have explicit predicates in the
+// encoding, but rather get their predicates from IT context. We need
+// to fix up the predicate operands using this context information as a
+// post-pass.
+MCDisassembler::DecodeStatus
+ThumbDisassembler::AddThumbPredicate(MCInst &MI) const {
+ MCDisassembler::DecodeStatus S = Success;
+
+ // A few instructions actually have predicates encoded in them. Don't
+ // try to overwrite it if we're seeing one of those.
+ switch (MI.getOpcode()) {
+ case ARM::tBcc:
+ case ARM::t2Bcc:
+ case ARM::tCBZ:
+ case ARM::tCBNZ:
+ case ARM::tCPS:
+ case ARM::t2CPS3p:
+ case ARM::t2CPS2p:
+ case ARM::t2CPS1p:
+ case ARM::tMOVSr:
+ case ARM::tSETEND:
+ // Some instructions (mostly conditional branches) are not
+ // allowed in IT blocks.
+ if (ITBlock.instrInITBlock())
+ S = SoftFail;
+ else
+ return Success;
+ break;
+ case ARM::tB:
+ case ARM::t2B:
+ case ARM::t2TBB:
+ case ARM::t2TBH:
+ // Some instructions (mostly unconditional branches) can
+ // only appears at the end of, or outside of, an IT.
+ if (ITBlock.instrInITBlock() && !ITBlock.instrLastInITBlock())
+ S = SoftFail;
+ break;
+ default:
+ break;
+ }
+
+ // If we're in an IT block, base the predicate on that. Otherwise,
+ // assume a predicate of AL.
+ unsigned CC;
+ CC = ITBlock.getITCC();
+ if (CC == 0xF)
+ CC = ARMCC::AL;
+ if (ITBlock.instrInITBlock())
+ ITBlock.advanceITState();
+
+ const MCOperandInfo *OpInfo = ARMInsts[MI.getOpcode()].OpInfo;
+ unsigned short NumOps = ARMInsts[MI.getOpcode()].NumOperands;
+ MCInst::iterator I = MI.begin();
+ for (unsigned i = 0; i < NumOps; ++i, ++I) {
+ if (I == MI.end()) break;
+ if (OpInfo[i].isPredicate()) {
+ I = MI.insert(I, MCOperand::createImm(CC));
+ ++I;
+ if (CC == ARMCC::AL)
+ MI.insert(I, MCOperand::createReg(0));
+ else
+ MI.insert(I, MCOperand::createReg(ARM::CPSR));
+ return S;
+ }
+ }
+
+ I = MI.insert(I, MCOperand::createImm(CC));
+ ++I;
+ if (CC == ARMCC::AL)
+ MI.insert(I, MCOperand::createReg(0));
+ else
+ MI.insert(I, MCOperand::createReg(ARM::CPSR));
+
+ return S;
+}
+
+// Thumb VFP instructions are a special case. Because we share their
+// encodings between ARM and Thumb modes, and they are predicable in ARM
+// mode, the auto-generated decoder will give them an (incorrect)
+// predicate operand. We need to rewrite these operands based on the IT
+// context as a post-pass.
+void ThumbDisassembler::UpdateThumbVFPPredicate(MCInst &MI) const {
+ unsigned CC;
+ CC = ITBlock.getITCC();
+ if (ITBlock.instrInITBlock())
+ ITBlock.advanceITState();
+
+ const MCOperandInfo *OpInfo = ARMInsts[MI.getOpcode()].OpInfo;
+ MCInst::iterator I = MI.begin();
+ unsigned short NumOps = ARMInsts[MI.getOpcode()].NumOperands;
+ for (unsigned i = 0; i < NumOps; ++i, ++I) {
+ if (OpInfo[i].isPredicate() ) {
+ I->setImm(CC);
+ ++I;
+ if (CC == ARMCC::AL)
+ I->setReg(0);
+ else
+ I->setReg(ARM::CPSR);
+ return;
+ }
+ }
+}
+
+DecodeStatus ThumbDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
+ ArrayRef<uint8_t> Bytes,
+ uint64_t Address,
+ raw_ostream &OS,
+ raw_ostream &CS) const {
+ CommentStream = &CS;
+
+ assert(STI.getFeatureBits()[ARM::ModeThumb] &&
+ "Asked to disassemble in Thumb mode but Subtarget is in ARM mode!");
+
+ // We want to read exactly 2 bytes of data.
+ if (Bytes.size() < 2) {
+ Size = 0;
+ return MCDisassembler::Fail;
+ }
+
+ uint16_t Insn16 = (Bytes[1] << 8) | Bytes[0];
+ DecodeStatus Result =
+ decodeInstruction(DecoderTableThumb16, MI, Insn16, Address, this, STI);
+ if (Result != MCDisassembler::Fail) {
+ Size = 2;
+ Check(Result, AddThumbPredicate(MI));
+ return Result;
+ }
+
+ Result = decodeInstruction(DecoderTableThumbSBit16, MI, Insn16, Address, this,
+ STI);
+ if (Result) {
+ Size = 2;
+ bool InITBlock = ITBlock.instrInITBlock();
+ Check(Result, AddThumbPredicate(MI));
+ AddThumb1SBit(MI, InITBlock);
+ return Result;
+ }
+
+ Result =
+ decodeInstruction(DecoderTableThumb216, MI, Insn16, Address, this, STI);
+ if (Result != MCDisassembler::Fail) {
+ Size = 2;
+
+ // Nested IT blocks are UNPREDICTABLE. Must be checked before we add
+ // the Thumb predicate.
+ if (MI.getOpcode() == ARM::t2IT && ITBlock.instrInITBlock())
+ Result = MCDisassembler::SoftFail;
+
+ Check(Result, AddThumbPredicate(MI));
+
+ // If we find an IT instruction, we need to parse its condition
+ // code and mask operands so that we can apply them correctly
+ // to the subsequent instructions.
+ if (MI.getOpcode() == ARM::t2IT) {
+
+ unsigned Firstcond = MI.getOperand(0).getImm();
+ unsigned Mask = MI.getOperand(1).getImm();
+ ITBlock.setITState(Firstcond, Mask);
+ }
+
+ return Result;
+ }
+
+ // We want to read exactly 4 bytes of data.
+ if (Bytes.size() < 4) {
+ Size = 0;
+ return MCDisassembler::Fail;
+ }
+
+ uint32_t Insn32 =
+ (Bytes[3] << 8) | (Bytes[2] << 0) | (Bytes[1] << 24) | (Bytes[0] << 16);
+ Result =
+ decodeInstruction(DecoderTableThumb32, MI, Insn32, Address, this, STI);
+ if (Result != MCDisassembler::Fail) {
+ Size = 4;
+ bool InITBlock = ITBlock.instrInITBlock();
+ Check(Result, AddThumbPredicate(MI));
+ AddThumb1SBit(MI, InITBlock);
+ return Result;
+ }
+
+ Result =
+ decodeInstruction(DecoderTableThumb232, MI, Insn32, Address, this, STI);
+ if (Result != MCDisassembler::Fail) {
+ Size = 4;
+ Check(Result, AddThumbPredicate(MI));
+ return Result;
+ }
+
+ if (fieldFromInstruction(Insn32, 28, 4) == 0xE) {
+ Result =
+ decodeInstruction(DecoderTableVFP32, MI, Insn32, Address, this, STI);
+ if (Result != MCDisassembler::Fail) {
+ Size = 4;
+ UpdateThumbVFPPredicate(MI);
+ return Result;
+ }
+ }
+
+ Result =
+ decodeInstruction(DecoderTableVFPV832, MI, Insn32, Address, this, STI);
+ if (Result != MCDisassembler::Fail) {
+ Size = 4;
+ return Result;
+ }
+
+ if (fieldFromInstruction(Insn32, 28, 4) == 0xE) {
+ Result = decodeInstruction(DecoderTableNEONDup32, MI, Insn32, Address, this,
+ STI);
+ if (Result != MCDisassembler::Fail) {
+ Size = 4;
+ Check(Result, AddThumbPredicate(MI));
+ return Result;
+ }
+ }
+
+ if (fieldFromInstruction(Insn32, 24, 8) == 0xF9) {
+ uint32_t NEONLdStInsn = Insn32;
+ NEONLdStInsn &= 0xF0FFFFFF;
+ NEONLdStInsn |= 0x04000000;
+ Result = decodeInstruction(DecoderTableNEONLoadStore32, MI, NEONLdStInsn,
+ Address, this, STI);
+ if (Result != MCDisassembler::Fail) {
+ Size = 4;
+ Check(Result, AddThumbPredicate(MI));
+ return Result;
+ }
+ }
+
+ if (fieldFromInstruction(Insn32, 24, 4) == 0xF) {
+ uint32_t NEONDataInsn = Insn32;
+ NEONDataInsn &= 0xF0FFFFFF; // Clear bits 27-24
+ NEONDataInsn |= (NEONDataInsn & 0x10000000) >> 4; // Move bit 28 to bit 24
+ NEONDataInsn |= 0x12000000; // Set bits 28 and 25
+ Result = decodeInstruction(DecoderTableNEONData32, MI, NEONDataInsn,
+ Address, this, STI);
+ if (Result != MCDisassembler::Fail) {
+ Size = 4;
+ Check(Result, AddThumbPredicate(MI));
+ return Result;
+ }
+
+ uint32_t NEONCryptoInsn = Insn32;
+ NEONCryptoInsn &= 0xF0FFFFFF; // Clear bits 27-24
+ NEONCryptoInsn |= (NEONCryptoInsn & 0x10000000) >> 4; // Move bit 28 to bit 24
+ NEONCryptoInsn |= 0x12000000; // Set bits 28 and 25
+ Result = decodeInstruction(DecoderTablev8Crypto32, MI, NEONCryptoInsn,
+ Address, this, STI);
+ if (Result != MCDisassembler::Fail) {
+ Size = 4;
+ return Result;
+ }
+
+ uint32_t NEONv8Insn = Insn32;
+ NEONv8Insn &= 0xF3FFFFFF; // Clear bits 27-26
+ Result = decodeInstruction(DecoderTablev8NEON32, MI, NEONv8Insn, Address,
+ this, STI);
+ if (Result != MCDisassembler::Fail) {
+ Size = 4;
+ return Result;
+ }
+ }
+
+ Size = 0;
+ return MCDisassembler::Fail;
+}
+
+
+extern "C" void LLVMInitializeARMDisassembler() {
+ TargetRegistry::RegisterMCDisassembler(TheARMLETarget,
+ createARMDisassembler);
+ TargetRegistry::RegisterMCDisassembler(TheARMBETarget,
+ createARMDisassembler);
+ TargetRegistry::RegisterMCDisassembler(TheThumbLETarget,
+ createThumbDisassembler);
+ TargetRegistry::RegisterMCDisassembler(TheThumbBETarget,
+ createThumbDisassembler);
+}
+
+static const uint16_t GPRDecoderTable[] = {
+ ARM::R0, ARM::R1, ARM::R2, ARM::R3,
+ ARM::R4, ARM::R5, ARM::R6, ARM::R7,
+ ARM::R8, ARM::R9, ARM::R10, ARM::R11,
+ ARM::R12, ARM::SP, ARM::LR, ARM::PC
+};
+
+static DecodeStatus DecodeGPRRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder) {
+ if (RegNo > 15)
+ return MCDisassembler::Fail;
+
+ unsigned Register = GPRDecoderTable[RegNo];
+ Inst.addOperand(MCOperand::createReg(Register));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus
+DecodeGPRnopcRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ if (RegNo == 15)
+ S = MCDisassembler::SoftFail;
+
+ Check(S, DecodeGPRRegisterClass(Inst, RegNo, Address, Decoder));
+
+ return S;
+}
+
+static DecodeStatus
+DecodeGPRwithAPSRRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ if (RegNo == 15)
+ {
+ Inst.addOperand(MCOperand::createReg(ARM::APSR_NZCV));
+ return MCDisassembler::Success;
+ }
+
+ Check(S, DecodeGPRRegisterClass(Inst, RegNo, Address, Decoder));
+ return S;
+}
+
+static DecodeStatus DecodetGPRRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder) {
+ if (RegNo > 7)
+ return MCDisassembler::Fail;
+ return DecodeGPRRegisterClass(Inst, RegNo, Address, Decoder);
+}
+
+static const uint16_t GPRPairDecoderTable[] = {
+ ARM::R0_R1, ARM::R2_R3, ARM::R4_R5, ARM::R6_R7,
+ ARM::R8_R9, ARM::R10_R11, ARM::R12_SP
+};
+
+static DecodeStatus DecodeGPRPairRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ if (RegNo > 13)
+ return MCDisassembler::Fail;
+
+ if ((RegNo & 1) || RegNo == 0xe)
+ S = MCDisassembler::SoftFail;
+
+ unsigned RegisterPair = GPRPairDecoderTable[RegNo/2];
+ Inst.addOperand(MCOperand::createReg(RegisterPair));
+ return S;
+}
+
+static DecodeStatus DecodetcGPRRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder) {
+ unsigned Register = 0;
+ switch (RegNo) {
+ case 0:
+ Register = ARM::R0;
+ break;
+ case 1:
+ Register = ARM::R1;
+ break;
+ case 2:
+ Register = ARM::R2;
+ break;
+ case 3:
+ Register = ARM::R3;
+ break;
+ case 9:
+ Register = ARM::R9;
+ break;
+ case 12:
+ Register = ARM::R12;
+ break;
+ default:
+ return MCDisassembler::Fail;
+ }
+
+ Inst.addOperand(MCOperand::createReg(Register));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecoderGPRRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ const FeatureBitset &featureBits =
+ ((const MCDisassembler*)Decoder)->getSubtargetInfo().getFeatureBits();
+
+ if ((RegNo == 13 && !featureBits[ARM::HasV8Ops]) || RegNo == 15)
+ S = MCDisassembler::SoftFail;
+
+ Check(S, DecodeGPRRegisterClass(Inst, RegNo, Address, Decoder));
+ return S;
+}
+
+static const uint16_t SPRDecoderTable[] = {
+ ARM::S0, ARM::S1, ARM::S2, ARM::S3,
+ ARM::S4, ARM::S5, ARM::S6, ARM::S7,
+ ARM::S8, ARM::S9, ARM::S10, ARM::S11,
+ ARM::S12, ARM::S13, ARM::S14, ARM::S15,
+ ARM::S16, ARM::S17, ARM::S18, ARM::S19,
+ ARM::S20, ARM::S21, ARM::S22, ARM::S23,
+ ARM::S24, ARM::S25, ARM::S26, ARM::S27,
+ ARM::S28, ARM::S29, ARM::S30, ARM::S31
+};
+
+static DecodeStatus DecodeSPRRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder) {
+ if (RegNo > 31)
+ return MCDisassembler::Fail;
+
+ unsigned Register = SPRDecoderTable[RegNo];
+ Inst.addOperand(MCOperand::createReg(Register));
+ return MCDisassembler::Success;
+}
+
+static const uint16_t DPRDecoderTable[] = {
+ ARM::D0, ARM::D1, ARM::D2, ARM::D3,
+ ARM::D4, ARM::D5, ARM::D6, ARM::D7,
+ ARM::D8, ARM::D9, ARM::D10, ARM::D11,
+ ARM::D12, ARM::D13, ARM::D14, ARM::D15,
+ ARM::D16, ARM::D17, ARM::D18, ARM::D19,
+ ARM::D20, ARM::D21, ARM::D22, ARM::D23,
+ ARM::D24, ARM::D25, ARM::D26, ARM::D27,
+ ARM::D28, ARM::D29, ARM::D30, ARM::D31
+};
+
+static DecodeStatus DecodeDPRRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder) {
+ const FeatureBitset &featureBits =
+ ((const MCDisassembler*)Decoder)->getSubtargetInfo().getFeatureBits();
+
+ bool hasD16 = featureBits[ARM::FeatureD16];
+
+ if (RegNo > 31 || (hasD16 && RegNo > 15))
+ return MCDisassembler::Fail;
+
+ unsigned Register = DPRDecoderTable[RegNo];
+ Inst.addOperand(MCOperand::createReg(Register));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeDPR_8RegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder) {
+ if (RegNo > 7)
+ return MCDisassembler::Fail;
+ return DecodeDPRRegisterClass(Inst, RegNo, Address, Decoder);
+}
+
+static DecodeStatus
+DecodeDPR_VFP2RegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder) {
+ if (RegNo > 15)
+ return MCDisassembler::Fail;
+ return DecodeDPRRegisterClass(Inst, RegNo, Address, Decoder);
+}
+
+static const uint16_t QPRDecoderTable[] = {
+ ARM::Q0, ARM::Q1, ARM::Q2, ARM::Q3,
+ ARM::Q4, ARM::Q5, ARM::Q6, ARM::Q7,
+ ARM::Q8, ARM::Q9, ARM::Q10, ARM::Q11,
+ ARM::Q12, ARM::Q13, ARM::Q14, ARM::Q15
+};
+
+
+static DecodeStatus DecodeQPRRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder) {
+ if (RegNo > 31 || (RegNo & 1) != 0)
+ return MCDisassembler::Fail;
+ RegNo >>= 1;
+
+ unsigned Register = QPRDecoderTable[RegNo];
+ Inst.addOperand(MCOperand::createReg(Register));
+ return MCDisassembler::Success;
+}
+
+static const uint16_t DPairDecoderTable[] = {
+ ARM::Q0, ARM::D1_D2, ARM::Q1, ARM::D3_D4, ARM::Q2, ARM::D5_D6,
+ ARM::Q3, ARM::D7_D8, ARM::Q4, ARM::D9_D10, ARM::Q5, ARM::D11_D12,
+ ARM::Q6, ARM::D13_D14, ARM::Q7, ARM::D15_D16, ARM::Q8, ARM::D17_D18,
+ ARM::Q9, ARM::D19_D20, ARM::Q10, ARM::D21_D22, ARM::Q11, ARM::D23_D24,
+ ARM::Q12, ARM::D25_D26, ARM::Q13, ARM::D27_D28, ARM::Q14, ARM::D29_D30,
+ ARM::Q15
+};
+
+static DecodeStatus DecodeDPairRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder) {
+ if (RegNo > 30)
+ return MCDisassembler::Fail;
+
+ unsigned Register = DPairDecoderTable[RegNo];
+ Inst.addOperand(MCOperand::createReg(Register));
+ return MCDisassembler::Success;
+}
+
+static const uint16_t DPairSpacedDecoderTable[] = {
+ ARM::D0_D2, ARM::D1_D3, ARM::D2_D4, ARM::D3_D5,
+ ARM::D4_D6, ARM::D5_D7, ARM::D6_D8, ARM::D7_D9,
+ ARM::D8_D10, ARM::D9_D11, ARM::D10_D12, ARM::D11_D13,
+ ARM::D12_D14, ARM::D13_D15, ARM::D14_D16, ARM::D15_D17,
+ ARM::D16_D18, ARM::D17_D19, ARM::D18_D20, ARM::D19_D21,
+ ARM::D20_D22, ARM::D21_D23, ARM::D22_D24, ARM::D23_D25,
+ ARM::D24_D26, ARM::D25_D27, ARM::D26_D28, ARM::D27_D29,
+ ARM::D28_D30, ARM::D29_D31
+};
+
+static DecodeStatus DecodeDPairSpacedRegisterClass(MCInst &Inst,
+ unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ if (RegNo > 29)
+ return MCDisassembler::Fail;
+
+ unsigned Register = DPairSpacedDecoderTable[RegNo];
+ Inst.addOperand(MCOperand::createReg(Register));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodePredicateOperand(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ if (Val == 0xF) return MCDisassembler::Fail;
+ // AL predicate is not allowed on Thumb1 branches.
+ if (Inst.getOpcode() == ARM::tBcc && Val == 0xE)
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::createImm(Val));
+ if (Val == ARMCC::AL) {
+ Inst.addOperand(MCOperand::createReg(0));
+ } else
+ Inst.addOperand(MCOperand::createReg(ARM::CPSR));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeCCOutOperand(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ if (Val)
+ Inst.addOperand(MCOperand::createReg(ARM::CPSR));
+ else
+ Inst.addOperand(MCOperand::createReg(0));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeSORegImmOperand(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rm = fieldFromInstruction(Val, 0, 4);
+ unsigned type = fieldFromInstruction(Val, 5, 2);
+ unsigned imm = fieldFromInstruction(Val, 7, 5);
+
+ // Register-immediate
+ if (!Check(S, DecoderGPRRegisterClass(Inst, Rm, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ ARM_AM::ShiftOpc Shift = ARM_AM::lsl;
+ switch (type) {
+ case 0:
+ Shift = ARM_AM::lsl;
+ break;
+ case 1:
+ Shift = ARM_AM::lsr;
+ break;
+ case 2:
+ Shift = ARM_AM::asr;
+ break;
+ case 3:
+ Shift = ARM_AM::ror;
+ break;
+ }
+
+ if (Shift == ARM_AM::ror && imm == 0)
+ Shift = ARM_AM::rrx;
+
+ unsigned Op = Shift | (imm << 3);
+ Inst.addOperand(MCOperand::createImm(Op));
+
+ return S;
+}
+
+static DecodeStatus DecodeSORegRegOperand(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rm = fieldFromInstruction(Val, 0, 4);
+ unsigned type = fieldFromInstruction(Val, 5, 2);
+ unsigned Rs = fieldFromInstruction(Val, 8, 4);
+
+ // Register-register
+ if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rm, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rs, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ ARM_AM::ShiftOpc Shift = ARM_AM::lsl;
+ switch (type) {
+ case 0:
+ Shift = ARM_AM::lsl;
+ break;
+ case 1:
+ Shift = ARM_AM::lsr;
+ break;
+ case 2:
+ Shift = ARM_AM::asr;
+ break;
+ case 3:
+ Shift = ARM_AM::ror;
+ break;
+ }
+
+ Inst.addOperand(MCOperand::createImm(Shift));
+
+ return S;
+}
+
+static DecodeStatus DecodeRegListOperand(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ bool NeedDisjointWriteback = false;
+ unsigned WritebackReg = 0;
+ switch (Inst.getOpcode()) {
+ default:
+ break;
+ case ARM::LDMIA_UPD:
+ case ARM::LDMDB_UPD:
+ case ARM::LDMIB_UPD:
+ case ARM::LDMDA_UPD:
+ case ARM::t2LDMIA_UPD:
+ case ARM::t2LDMDB_UPD:
+ case ARM::t2STMIA_UPD:
+ case ARM::t2STMDB_UPD:
+ NeedDisjointWriteback = true;
+ WritebackReg = Inst.getOperand(0).getReg();
+ break;
+ }
+
+ // Empty register lists are not allowed.
+ if (Val == 0) return MCDisassembler::Fail;
+ for (unsigned i = 0; i < 16; ++i) {
+ if (Val & (1 << i)) {
+ if (!Check(S, DecodeGPRRegisterClass(Inst, i, Address, Decoder)))
+ return MCDisassembler::Fail;
+ // Writeback not allowed if Rn is in the target list.
+ if (NeedDisjointWriteback && WritebackReg == Inst.end()[-1].getReg())
+ Check(S, MCDisassembler::SoftFail);
+ }
+ }
+
+ return S;
+}
+
+static DecodeStatus DecodeSPRRegListOperand(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Vd = fieldFromInstruction(Val, 8, 5);
+ unsigned regs = fieldFromInstruction(Val, 0, 8);
+
+ // In case of unpredictable encoding, tweak the operands.
+ if (regs == 0 || (Vd + regs) > 32) {
+ regs = Vd + regs > 32 ? 32 - Vd : regs;
+ regs = std::max( 1u, regs);
+ S = MCDisassembler::SoftFail;
+ }
+
+ if (!Check(S, DecodeSPRRegisterClass(Inst, Vd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ for (unsigned i = 0; i < (regs - 1); ++i) {
+ if (!Check(S, DecodeSPRRegisterClass(Inst, ++Vd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ }
+
+ return S;
+}
+
+static DecodeStatus DecodeDPRRegListOperand(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Vd = fieldFromInstruction(Val, 8, 5);
+ unsigned regs = fieldFromInstruction(Val, 1, 7);
+
+ // In case of unpredictable encoding, tweak the operands.
+ if (regs == 0 || regs > 16 || (Vd + regs) > 32) {
+ regs = Vd + regs > 32 ? 32 - Vd : regs;
+ regs = std::max( 1u, regs);
+ regs = std::min(16u, regs);
+ S = MCDisassembler::SoftFail;
+ }
+
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Vd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ for (unsigned i = 0; i < (regs - 1); ++i) {
+ if (!Check(S, DecodeDPRRegisterClass(Inst, ++Vd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ }
+
+ return S;
+}
+
+static DecodeStatus DecodeBitfieldMaskOperand(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ // This operand encodes a mask of contiguous zeros between a specified MSB
+ // and LSB. To decode it, we create the mask of all bits MSB-and-lower,
+ // the mask of all bits LSB-and-lower, and then xor them to create
+ // the mask of that's all ones on [msb, lsb]. Finally we not it to
+ // create the final mask.
+ unsigned msb = fieldFromInstruction(Val, 5, 5);
+ unsigned lsb = fieldFromInstruction(Val, 0, 5);
+
+ DecodeStatus S = MCDisassembler::Success;
+ if (lsb > msb) {
+ Check(S, MCDisassembler::SoftFail);
+ // The check above will cause the warning for the "potentially undefined
+ // instruction encoding" but we can't build a bad MCOperand value here
+ // with a lsb > msb or else printing the MCInst will cause a crash.
+ lsb = msb;
+ }
+
+ uint32_t msb_mask = 0xFFFFFFFF;
+ if (msb != 31) msb_mask = (1U << (msb+1)) - 1;
+ uint32_t lsb_mask = (1U << lsb) - 1;
+
+ Inst.addOperand(MCOperand::createImm(~(msb_mask ^ lsb_mask)));
+ return S;
+}
+
+static DecodeStatus DecodeCopMemInstruction(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned pred = fieldFromInstruction(Insn, 28, 4);
+ unsigned CRd = fieldFromInstruction(Insn, 12, 4);
+ unsigned coproc = fieldFromInstruction(Insn, 8, 4);
+ unsigned imm = fieldFromInstruction(Insn, 0, 8);
+ unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+ unsigned U = fieldFromInstruction(Insn, 23, 1);
+
+ switch (Inst.getOpcode()) {
+ case ARM::LDC_OFFSET:
+ case ARM::LDC_PRE:
+ case ARM::LDC_POST:
+ case ARM::LDC_OPTION:
+ case ARM::LDCL_OFFSET:
+ case ARM::LDCL_PRE:
+ case ARM::LDCL_POST:
+ case ARM::LDCL_OPTION:
+ case ARM::STC_OFFSET:
+ case ARM::STC_PRE:
+ case ARM::STC_POST:
+ case ARM::STC_OPTION:
+ case ARM::STCL_OFFSET:
+ case ARM::STCL_PRE:
+ case ARM::STCL_POST:
+ case ARM::STCL_OPTION:
+ case ARM::t2LDC_OFFSET:
+ case ARM::t2LDC_PRE:
+ case ARM::t2LDC_POST:
+ case ARM::t2LDC_OPTION:
+ case ARM::t2LDCL_OFFSET:
+ case ARM::t2LDCL_PRE:
+ case ARM::t2LDCL_POST:
+ case ARM::t2LDCL_OPTION:
+ case ARM::t2STC_OFFSET:
+ case ARM::t2STC_PRE:
+ case ARM::t2STC_POST:
+ case ARM::t2STC_OPTION:
+ case ARM::t2STCL_OFFSET:
+ case ARM::t2STCL_PRE:
+ case ARM::t2STCL_POST:
+ case ARM::t2STCL_OPTION:
+ if (coproc == 0xA || coproc == 0xB)
+ return MCDisassembler::Fail;
+ break;
+ default:
+ break;
+ }
+
+ const FeatureBitset &featureBits =
+ ((const MCDisassembler*)Decoder)->getSubtargetInfo().getFeatureBits();
+ if (featureBits[ARM::HasV8Ops] && (coproc != 14))
+ return MCDisassembler::Fail;
+
+ Inst.addOperand(MCOperand::createImm(coproc));
+ Inst.addOperand(MCOperand::createImm(CRd));
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ switch (Inst.getOpcode()) {
+ case ARM::t2LDC2_OFFSET:
+ case ARM::t2LDC2L_OFFSET:
+ case ARM::t2LDC2_PRE:
+ case ARM::t2LDC2L_PRE:
+ case ARM::t2STC2_OFFSET:
+ case ARM::t2STC2L_OFFSET:
+ case ARM::t2STC2_PRE:
+ case ARM::t2STC2L_PRE:
+ case ARM::LDC2_OFFSET:
+ case ARM::LDC2L_OFFSET:
+ case ARM::LDC2_PRE:
+ case ARM::LDC2L_PRE:
+ case ARM::STC2_OFFSET:
+ case ARM::STC2L_OFFSET:
+ case ARM::STC2_PRE:
+ case ARM::STC2L_PRE:
+ case ARM::t2LDC_OFFSET:
+ case ARM::t2LDCL_OFFSET:
+ case ARM::t2LDC_PRE:
+ case ARM::t2LDCL_PRE:
+ case ARM::t2STC_OFFSET:
+ case ARM::t2STCL_OFFSET:
+ case ARM::t2STC_PRE:
+ case ARM::t2STCL_PRE:
+ case ARM::LDC_OFFSET:
+ case ARM::LDCL_OFFSET:
+ case ARM::LDC_PRE:
+ case ARM::LDCL_PRE:
+ case ARM::STC_OFFSET:
+ case ARM::STCL_OFFSET:
+ case ARM::STC_PRE:
+ case ARM::STCL_PRE:
+ imm = ARM_AM::getAM5Opc(U ? ARM_AM::add : ARM_AM::sub, imm);
+ Inst.addOperand(MCOperand::createImm(imm));
+ break;
+ case ARM::t2LDC2_POST:
+ case ARM::t2LDC2L_POST:
+ case ARM::t2STC2_POST:
+ case ARM::t2STC2L_POST:
+ case ARM::LDC2_POST:
+ case ARM::LDC2L_POST:
+ case ARM::STC2_POST:
+ case ARM::STC2L_POST:
+ case ARM::t2LDC_POST:
+ case ARM::t2LDCL_POST:
+ case ARM::t2STC_POST:
+ case ARM::t2STCL_POST:
+ case ARM::LDC_POST:
+ case ARM::LDCL_POST:
+ case ARM::STC_POST:
+ case ARM::STCL_POST:
+ imm |= U << 8;
+ // fall through.
+ default:
+ // The 'option' variant doesn't encode 'U' in the immediate since
+ // the immediate is unsigned [0,255].
+ Inst.addOperand(MCOperand::createImm(imm));
+ break;
+ }
+
+ switch (Inst.getOpcode()) {
+ case ARM::LDC_OFFSET:
+ case ARM::LDC_PRE:
+ case ARM::LDC_POST:
+ case ARM::LDC_OPTION:
+ case ARM::LDCL_OFFSET:
+ case ARM::LDCL_PRE:
+ case ARM::LDCL_POST:
+ case ARM::LDCL_OPTION:
+ case ARM::STC_OFFSET:
+ case ARM::STC_PRE:
+ case ARM::STC_POST:
+ case ARM::STC_OPTION:
+ case ARM::STCL_OFFSET:
+ case ARM::STCL_PRE:
+ case ARM::STCL_POST:
+ case ARM::STCL_OPTION:
+ if (!Check(S, DecodePredicateOperand(Inst, pred, Address, Decoder)))
+ return MCDisassembler::Fail;
+ break;
+ default:
+ break;
+ }
+
+ return S;
+}
+
+static DecodeStatus
+DecodeAddrMode2IdxInstruction(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+ unsigned Rt = fieldFromInstruction(Insn, 12, 4);
+ unsigned Rm = fieldFromInstruction(Insn, 0, 4);
+ unsigned imm = fieldFromInstruction(Insn, 0, 12);
+ unsigned pred = fieldFromInstruction(Insn, 28, 4);
+ unsigned reg = fieldFromInstruction(Insn, 25, 1);
+ unsigned P = fieldFromInstruction(Insn, 24, 1);
+ unsigned W = fieldFromInstruction(Insn, 21, 1);
+
+ // On stores, the writeback operand precedes Rt.
+ switch (Inst.getOpcode()) {
+ case ARM::STR_POST_IMM:
+ case ARM::STR_POST_REG:
+ case ARM::STRB_POST_IMM:
+ case ARM::STRB_POST_REG:
+ case ARM::STRT_POST_REG:
+ case ARM::STRT_POST_IMM:
+ case ARM::STRBT_POST_REG:
+ case ARM::STRBT_POST_IMM:
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ break;
+ default:
+ break;
+ }
+
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rt, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ // On loads, the writeback operand comes after Rt.
+ switch (Inst.getOpcode()) {
+ case ARM::LDR_POST_IMM:
+ case ARM::LDR_POST_REG:
+ case ARM::LDRB_POST_IMM:
+ case ARM::LDRB_POST_REG:
+ case ARM::LDRBT_POST_REG:
+ case ARM::LDRBT_POST_IMM:
+ case ARM::LDRT_POST_REG:
+ case ARM::LDRT_POST_IMM:
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ break;
+ default:
+ break;
+ }
+
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ ARM_AM::AddrOpc Op = ARM_AM::add;
+ if (!fieldFromInstruction(Insn, 23, 1))
+ Op = ARM_AM::sub;
+
+ bool writeback = (P == 0) || (W == 1);
+ unsigned idx_mode = 0;
+ if (P && writeback)
+ idx_mode = ARMII::IndexModePre;
+ else if (!P && writeback)
+ idx_mode = ARMII::IndexModePost;
+
+ if (writeback && (Rn == 15 || Rn == Rt))
+ S = MCDisassembler::SoftFail; // UNPREDICTABLE
+
+ if (reg) {
+ if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rm, Address, Decoder)))
+ return MCDisassembler::Fail;
+ ARM_AM::ShiftOpc Opc = ARM_AM::lsl;
+ switch( fieldFromInstruction(Insn, 5, 2)) {
+ case 0:
+ Opc = ARM_AM::lsl;
+ break;
+ case 1:
+ Opc = ARM_AM::lsr;
+ break;
+ case 2:
+ Opc = ARM_AM::asr;
+ break;
+ case 3:
+ Opc = ARM_AM::ror;
+ break;
+ default:
+ return MCDisassembler::Fail;
+ }
+ unsigned amt = fieldFromInstruction(Insn, 7, 5);
+ if (Opc == ARM_AM::ror && amt == 0)
+ Opc = ARM_AM::rrx;
+ unsigned imm = ARM_AM::getAM2Opc(Op, amt, Opc, idx_mode);
+
+ Inst.addOperand(MCOperand::createImm(imm));
+ } else {
+ Inst.addOperand(MCOperand::createReg(0));
+ unsigned tmp = ARM_AM::getAM2Opc(Op, imm, ARM_AM::lsl, idx_mode);
+ Inst.addOperand(MCOperand::createImm(tmp));
+ }
+
+ if (!Check(S, DecodePredicateOperand(Inst, pred, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ return S;
+}
+
+static DecodeStatus DecodeSORegMemOperand(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rn = fieldFromInstruction(Val, 13, 4);
+ unsigned Rm = fieldFromInstruction(Val, 0, 4);
+ unsigned type = fieldFromInstruction(Val, 5, 2);
+ unsigned imm = fieldFromInstruction(Val, 7, 5);
+ unsigned U = fieldFromInstruction(Val, 12, 1);
+
+ ARM_AM::ShiftOpc ShOp = ARM_AM::lsl;
+ switch (type) {
+ case 0:
+ ShOp = ARM_AM::lsl;
+ break;
+ case 1:
+ ShOp = ARM_AM::lsr;
+ break;
+ case 2:
+ ShOp = ARM_AM::asr;
+ break;
+ case 3:
+ ShOp = ARM_AM::ror;
+ break;
+ }
+
+ if (ShOp == ARM_AM::ror && imm == 0)
+ ShOp = ARM_AM::rrx;
+
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder)))
+ return MCDisassembler::Fail;
+ unsigned shift;
+ if (U)
+ shift = ARM_AM::getAM2Opc(ARM_AM::add, imm, ShOp);
+ else
+ shift = ARM_AM::getAM2Opc(ARM_AM::sub, imm, ShOp);
+ Inst.addOperand(MCOperand::createImm(shift));
+
+ return S;
+}
+
+static DecodeStatus
+DecodeAddrMode3Instruction(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rt = fieldFromInstruction(Insn, 12, 4);
+ unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+ unsigned Rm = fieldFromInstruction(Insn, 0, 4);
+ unsigned type = fieldFromInstruction(Insn, 22, 1);
+ unsigned imm = fieldFromInstruction(Insn, 8, 4);
+ unsigned U = ((~fieldFromInstruction(Insn, 23, 1)) & 1) << 8;
+ unsigned pred = fieldFromInstruction(Insn, 28, 4);
+ unsigned W = fieldFromInstruction(Insn, 21, 1);
+ unsigned P = fieldFromInstruction(Insn, 24, 1);
+ unsigned Rt2 = Rt + 1;
+
+ bool writeback = (W == 1) | (P == 0);
+
+ // For {LD,ST}RD, Rt must be even, else undefined.
+ switch (Inst.getOpcode()) {
+ case ARM::STRD:
+ case ARM::STRD_PRE:
+ case ARM::STRD_POST:
+ case ARM::LDRD:
+ case ARM::LDRD_PRE:
+ case ARM::LDRD_POST:
+ if (Rt & 0x1) S = MCDisassembler::SoftFail;
+ break;
+ default:
+ break;
+ }
+ switch (Inst.getOpcode()) {
+ case ARM::STRD:
+ case ARM::STRD_PRE:
+ case ARM::STRD_POST:
+ if (P == 0 && W == 1)
+ S = MCDisassembler::SoftFail;
+
+ if (writeback && (Rn == 15 || Rn == Rt || Rn == Rt2))
+ S = MCDisassembler::SoftFail;
+ if (type && Rm == 15)
+ S = MCDisassembler::SoftFail;
+ if (Rt2 == 15)
+ S = MCDisassembler::SoftFail;
+ if (!type && fieldFromInstruction(Insn, 8, 4))
+ S = MCDisassembler::SoftFail;
+ break;
+ case ARM::STRH:
+ case ARM::STRH_PRE:
+ case ARM::STRH_POST:
+ if (Rt == 15)
+ S = MCDisassembler::SoftFail;
+ if (writeback && (Rn == 15 || Rn == Rt))
+ S = MCDisassembler::SoftFail;
+ if (!type && Rm == 15)
+ S = MCDisassembler::SoftFail;
+ break;
+ case ARM::LDRD:
+ case ARM::LDRD_PRE:
+ case ARM::LDRD_POST:
+ if (type && Rn == 15){
+ if (Rt2 == 15)
+ S = MCDisassembler::SoftFail;
+ break;
+ }
+ if (P == 0 && W == 1)
+ S = MCDisassembler::SoftFail;
+ if (!type && (Rt2 == 15 || Rm == 15 || Rm == Rt || Rm == Rt2))
+ S = MCDisassembler::SoftFail;
+ if (!type && writeback && Rn == 15)
+ S = MCDisassembler::SoftFail;
+ if (writeback && (Rn == Rt || Rn == Rt2))
+ S = MCDisassembler::SoftFail;
+ break;
+ case ARM::LDRH:
+ case ARM::LDRH_PRE:
+ case ARM::LDRH_POST:
+ if (type && Rn == 15){
+ if (Rt == 15)
+ S = MCDisassembler::SoftFail;
+ break;
+ }
+ if (Rt == 15)
+ S = MCDisassembler::SoftFail;
+ if (!type && Rm == 15)
+ S = MCDisassembler::SoftFail;
+ if (!type && writeback && (Rn == 15 || Rn == Rt))
+ S = MCDisassembler::SoftFail;
+ break;
+ case ARM::LDRSH:
+ case ARM::LDRSH_PRE:
+ case ARM::LDRSH_POST:
+ case ARM::LDRSB:
+ case ARM::LDRSB_PRE:
+ case ARM::LDRSB_POST:
+ if (type && Rn == 15){
+ if (Rt == 15)
+ S = MCDisassembler::SoftFail;
+ break;
+ }
+ if (type && (Rt == 15 || (writeback && Rn == Rt)))
+ S = MCDisassembler::SoftFail;
+ if (!type && (Rt == 15 || Rm == 15))
+ S = MCDisassembler::SoftFail;
+ if (!type && writeback && (Rn == 15 || Rn == Rt))
+ S = MCDisassembler::SoftFail;
+ break;
+ default:
+ break;
+ }
+
+ if (writeback) { // Writeback
+ if (P)
+ U |= ARMII::IndexModePre << 9;
+ else
+ U |= ARMII::IndexModePost << 9;
+
+ // On stores, the writeback operand precedes Rt.
+ switch (Inst.getOpcode()) {
+ case ARM::STRD:
+ case ARM::STRD_PRE:
+ case ARM::STRD_POST:
+ case ARM::STRH:
+ case ARM::STRH_PRE:
+ case ARM::STRH_POST:
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ break;
+ default:
+ break;
+ }
+ }
+
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rt, Address, Decoder)))
+ return MCDisassembler::Fail;
+ switch (Inst.getOpcode()) {
+ case ARM::STRD:
+ case ARM::STRD_PRE:
+ case ARM::STRD_POST:
+ case ARM::LDRD:
+ case ARM::LDRD_PRE:
+ case ARM::LDRD_POST:
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rt+1, Address, Decoder)))
+ return MCDisassembler::Fail;
+ break;
+ default:
+ break;
+ }
+
+ if (writeback) {
+ // On loads, the writeback operand comes after Rt.
+ switch (Inst.getOpcode()) {
+ case ARM::LDRD:
+ case ARM::LDRD_PRE:
+ case ARM::LDRD_POST:
+ case ARM::LDRH:
+ case ARM::LDRH_PRE:
+ case ARM::LDRH_POST:
+ case ARM::LDRSH:
+ case ARM::LDRSH_PRE:
+ case ARM::LDRSH_POST:
+ case ARM::LDRSB:
+ case ARM::LDRSB_PRE:
+ case ARM::LDRSB_POST:
+ case ARM::LDRHTr:
+ case ARM::LDRSBTr:
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ break;
+ default:
+ break;
+ }
+ }
+
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ if (type) {
+ Inst.addOperand(MCOperand::createReg(0));
+ Inst.addOperand(MCOperand::createImm(U | (imm << 4) | Rm));
+ } else {
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder)))
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::createImm(U));
+ }
+
+ if (!Check(S, DecodePredicateOperand(Inst, pred, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ return S;
+}
+
+static DecodeStatus DecodeRFEInstruction(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+ unsigned mode = fieldFromInstruction(Insn, 23, 2);
+
+ switch (mode) {
+ case 0:
+ mode = ARM_AM::da;
+ break;
+ case 1:
+ mode = ARM_AM::ia;
+ break;
+ case 2:
+ mode = ARM_AM::db;
+ break;
+ case 3:
+ mode = ARM_AM::ib;
+ break;
+ }
+
+ Inst.addOperand(MCOperand::createImm(mode));
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ return S;
+}
+
+static DecodeStatus DecodeQADDInstruction(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rd = fieldFromInstruction(Insn, 12, 4);
+ unsigned Rm = fieldFromInstruction(Insn, 0, 4);
+ unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+ unsigned pred = fieldFromInstruction(Insn, 28, 4);
+
+ if (pred == 0xF)
+ return DecodeCPSInstruction(Inst, Insn, Address, Decoder);
+
+ if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rm, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodePredicateOperand(Inst, pred, Address, Decoder)))
+ return MCDisassembler::Fail;
+ return S;
+}
+
+static DecodeStatus DecodeMemMultipleWritebackInstruction(MCInst &Inst,
+ unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+ unsigned pred = fieldFromInstruction(Insn, 28, 4);
+ unsigned reglist = fieldFromInstruction(Insn, 0, 16);
+
+ if (pred == 0xF) {
+ // Ambiguous with RFE and SRS
+ switch (Inst.getOpcode()) {
+ case ARM::LDMDA:
+ Inst.setOpcode(ARM::RFEDA);
+ break;
+ case ARM::LDMDA_UPD:
+ Inst.setOpcode(ARM::RFEDA_UPD);
+ break;
+ case ARM::LDMDB:
+ Inst.setOpcode(ARM::RFEDB);
+ break;
+ case ARM::LDMDB_UPD:
+ Inst.setOpcode(ARM::RFEDB_UPD);
+ break;
+ case ARM::LDMIA:
+ Inst.setOpcode(ARM::RFEIA);
+ break;
+ case ARM::LDMIA_UPD:
+ Inst.setOpcode(ARM::RFEIA_UPD);
+ break;
+ case ARM::LDMIB:
+ Inst.setOpcode(ARM::RFEIB);
+ break;
+ case ARM::LDMIB_UPD:
+ Inst.setOpcode(ARM::RFEIB_UPD);
+ break;
+ case ARM::STMDA:
+ Inst.setOpcode(ARM::SRSDA);
+ break;
+ case ARM::STMDA_UPD:
+ Inst.setOpcode(ARM::SRSDA_UPD);
+ break;
+ case ARM::STMDB:
+ Inst.setOpcode(ARM::SRSDB);
+ break;
+ case ARM::STMDB_UPD:
+ Inst.setOpcode(ARM::SRSDB_UPD);
+ break;
+ case ARM::STMIA:
+ Inst.setOpcode(ARM::SRSIA);
+ break;
+ case ARM::STMIA_UPD:
+ Inst.setOpcode(ARM::SRSIA_UPD);
+ break;
+ case ARM::STMIB:
+ Inst.setOpcode(ARM::SRSIB);
+ break;
+ case ARM::STMIB_UPD:
+ Inst.setOpcode(ARM::SRSIB_UPD);
+ break;
+ default:
+ return MCDisassembler::Fail;
+ }
+
+ // For stores (which become SRS's, the only operand is the mode.
+ if (fieldFromInstruction(Insn, 20, 1) == 0) {
+ // Check SRS encoding constraints
+ if (!(fieldFromInstruction(Insn, 22, 1) == 1 &&
+ fieldFromInstruction(Insn, 20, 1) == 0))
+ return MCDisassembler::Fail;
+
+ Inst.addOperand(
+ MCOperand::createImm(fieldFromInstruction(Insn, 0, 4)));
+ return S;
+ }
+
+ return DecodeRFEInstruction(Inst, Insn, Address, Decoder);
+ }
+
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail; // Tied
+ if (!Check(S, DecodePredicateOperand(Inst, pred, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeRegListOperand(Inst, reglist, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ return S;
+}
+
+static DecodeStatus DecodeCPSInstruction(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ unsigned imod = fieldFromInstruction(Insn, 18, 2);
+ unsigned M = fieldFromInstruction(Insn, 17, 1);
+ unsigned iflags = fieldFromInstruction(Insn, 6, 3);
+ unsigned mode = fieldFromInstruction(Insn, 0, 5);
+
+ DecodeStatus S = MCDisassembler::Success;
+
+ // This decoder is called from multiple location that do not check
+ // the full encoding is valid before they do.
+ if (fieldFromInstruction(Insn, 5, 1) != 0 ||
+ fieldFromInstruction(Insn, 16, 1) != 0 ||
+ fieldFromInstruction(Insn, 20, 8) != 0x10)
+ return MCDisassembler::Fail;
+
+ // imod == '01' --> UNPREDICTABLE
+ // NOTE: Even though this is technically UNPREDICTABLE, we choose to
+ // return failure here. The '01' imod value is unprintable, so there's
+ // nothing useful we could do even if we returned UNPREDICTABLE.
+
+ if (imod == 1) return MCDisassembler::Fail;
+
+ if (imod && M) {
+ Inst.setOpcode(ARM::CPS3p);
+ Inst.addOperand(MCOperand::createImm(imod));
+ Inst.addOperand(MCOperand::createImm(iflags));
+ Inst.addOperand(MCOperand::createImm(mode));
+ } else if (imod && !M) {
+ Inst.setOpcode(ARM::CPS2p);
+ Inst.addOperand(MCOperand::createImm(imod));
+ Inst.addOperand(MCOperand::createImm(iflags));
+ if (mode) S = MCDisassembler::SoftFail;
+ } else if (!imod && M) {
+ Inst.setOpcode(ARM::CPS1p);
+ Inst.addOperand(MCOperand::createImm(mode));
+ if (iflags) S = MCDisassembler::SoftFail;
+ } else {
+ // imod == '00' && M == '0' --> UNPREDICTABLE
+ Inst.setOpcode(ARM::CPS1p);
+ Inst.addOperand(MCOperand::createImm(mode));
+ S = MCDisassembler::SoftFail;
+ }
+
+ return S;
+}
+
+static DecodeStatus DecodeT2CPSInstruction(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ unsigned imod = fieldFromInstruction(Insn, 9, 2);
+ unsigned M = fieldFromInstruction(Insn, 8, 1);
+ unsigned iflags = fieldFromInstruction(Insn, 5, 3);
+ unsigned mode = fieldFromInstruction(Insn, 0, 5);
+
+ DecodeStatus S = MCDisassembler::Success;
+
+ // imod == '01' --> UNPREDICTABLE
+ // NOTE: Even though this is technically UNPREDICTABLE, we choose to
+ // return failure here. The '01' imod value is unprintable, so there's
+ // nothing useful we could do even if we returned UNPREDICTABLE.
+
+ if (imod == 1) return MCDisassembler::Fail;
+
+ if (imod && M) {
+ Inst.setOpcode(ARM::t2CPS3p);
+ Inst.addOperand(MCOperand::createImm(imod));
+ Inst.addOperand(MCOperand::createImm(iflags));
+ Inst.addOperand(MCOperand::createImm(mode));
+ } else if (imod && !M) {
+ Inst.setOpcode(ARM::t2CPS2p);
+ Inst.addOperand(MCOperand::createImm(imod));
+ Inst.addOperand(MCOperand::createImm(iflags));
+ if (mode) S = MCDisassembler::SoftFail;
+ } else if (!imod && M) {
+ Inst.setOpcode(ARM::t2CPS1p);
+ Inst.addOperand(MCOperand::createImm(mode));
+ if (iflags) S = MCDisassembler::SoftFail;
+ } else {
+ // imod == '00' && M == '0' --> this is a HINT instruction
+ int imm = fieldFromInstruction(Insn, 0, 8);
+ // HINT are defined only for immediate in [0..4]
+ if(imm > 4) return MCDisassembler::Fail;
+ Inst.setOpcode(ARM::t2HINT);
+ Inst.addOperand(MCOperand::createImm(imm));
+ }
+
+ return S;
+}
+
+static DecodeStatus DecodeT2MOVTWInstruction(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rd = fieldFromInstruction(Insn, 8, 4);
+ unsigned imm = 0;
+
+ imm |= (fieldFromInstruction(Insn, 0, 8) << 0);
+ imm |= (fieldFromInstruction(Insn, 12, 3) << 8);
+ imm |= (fieldFromInstruction(Insn, 16, 4) << 12);
+ imm |= (fieldFromInstruction(Insn, 26, 1) << 11);
+
+ if (Inst.getOpcode() == ARM::t2MOVTi16)
+ if (!Check(S, DecoderGPRRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecoderGPRRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ if (!tryAddingSymbolicOperand(Address, imm, false, 4, Inst, Decoder))
+ Inst.addOperand(MCOperand::createImm(imm));
+
+ return S;
+}
+
+static DecodeStatus DecodeArmMOVTWInstruction(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rd = fieldFromInstruction(Insn, 12, 4);
+ unsigned pred = fieldFromInstruction(Insn, 28, 4);
+ unsigned imm = 0;
+
+ imm |= (fieldFromInstruction(Insn, 0, 12) << 0);
+ imm |= (fieldFromInstruction(Insn, 16, 4) << 12);
+
+ if (Inst.getOpcode() == ARM::MOVTi16)
+ if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ if (!tryAddingSymbolicOperand(Address, imm, false, 4, Inst, Decoder))
+ Inst.addOperand(MCOperand::createImm(imm));
+
+ if (!Check(S, DecodePredicateOperand(Inst, pred, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ return S;
+}
+
+static DecodeStatus DecodeSMLAInstruction(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rd = fieldFromInstruction(Insn, 16, 4);
+ unsigned Rn = fieldFromInstruction(Insn, 0, 4);
+ unsigned Rm = fieldFromInstruction(Insn, 8, 4);
+ unsigned Ra = fieldFromInstruction(Insn, 12, 4);
+ unsigned pred = fieldFromInstruction(Insn, 28, 4);
+
+ if (pred == 0xF)
+ return DecodeCPSInstruction(Inst, Insn, Address, Decoder);
+
+ if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rm, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Ra, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ if (!Check(S, DecodePredicateOperand(Inst, pred, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ return S;
+}
+
+static DecodeStatus DecodeTSTInstruction(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Pred = fieldFromInstruction(Insn, 28, 4);
+ unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+ unsigned Rm = fieldFromInstruction(Insn, 0, 4);
+
+ if (Pred == 0xF)
+ return DecodeSETPANInstruction(Inst, Insn, Address, Decoder);
+
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodePredicateOperand(Inst, Pred, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ return S;
+}
+
+static DecodeStatus DecodeSETPANInstruction(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Imm = fieldFromInstruction(Insn, 9, 1);
+
+ const MCDisassembler *Dis = static_cast<const MCDisassembler*>(Decoder);
+ const FeatureBitset &FeatureBits = Dis->getSubtargetInfo().getFeatureBits();
+
+ if (!FeatureBits[ARM::HasV8_1aOps] ||
+ !FeatureBits[ARM::HasV8Ops])
+ return MCDisassembler::Fail;
+
+ // Decoder can be called from DecodeTST, which does not check the full
+ // encoding is valid.
+ if (fieldFromInstruction(Insn, 20,12) != 0xf11 ||
+ fieldFromInstruction(Insn, 4,4) != 0)
+ return MCDisassembler::Fail;
+ if (fieldFromInstruction(Insn, 10,10) != 0 ||
+ fieldFromInstruction(Insn, 0,4) != 0)
+ S = MCDisassembler::SoftFail;
+
+ Inst.setOpcode(ARM::SETPAN);
+ Inst.addOperand(MCOperand::createImm(Imm));
+
+ return S;
+}
+
+static DecodeStatus DecodeAddrModeImm12Operand(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned add = fieldFromInstruction(Val, 12, 1);
+ unsigned imm = fieldFromInstruction(Val, 0, 12);
+ unsigned Rn = fieldFromInstruction(Val, 13, 4);
+
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ if (!add) imm *= -1;
+ if (imm == 0 && !add) imm = INT32_MIN;
+ Inst.addOperand(MCOperand::createImm(imm));
+ if (Rn == 15)
+ tryAddingPcLoadReferenceComment(Address, Address + imm + 8, Decoder);
+
+ return S;
+}
+
+static DecodeStatus DecodeAddrMode5Operand(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rn = fieldFromInstruction(Val, 9, 4);
+ unsigned U = fieldFromInstruction(Val, 8, 1);
+ unsigned imm = fieldFromInstruction(Val, 0, 8);
+
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ if (U)
+ Inst.addOperand(MCOperand::createImm(ARM_AM::getAM5Opc(ARM_AM::add, imm)));
+ else
+ Inst.addOperand(MCOperand::createImm(ARM_AM::getAM5Opc(ARM_AM::sub, imm)));
+
+ return S;
+}
+
+static DecodeStatus DecodeAddrMode7Operand(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ return DecodeGPRRegisterClass(Inst, Val, Address, Decoder);
+}
+
+static DecodeStatus
+DecodeT2BInstruction(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus Status = MCDisassembler::Success;
+
+ // Note the J1 and J2 values are from the encoded instruction. So here
+ // change them to I1 and I2 values via as documented:
+ // I1 = NOT(J1 EOR S);
+ // I2 = NOT(J2 EOR S);
+ // and build the imm32 with one trailing zero as documented:
+ // imm32 = SignExtend(S:I1:I2:imm10:imm11:'0', 32);
+ unsigned S = fieldFromInstruction(Insn, 26, 1);
+ unsigned J1 = fieldFromInstruction(Insn, 13, 1);
+ unsigned J2 = fieldFromInstruction(Insn, 11, 1);
+ unsigned I1 = !(J1 ^ S);
+ unsigned I2 = !(J2 ^ S);
+ unsigned imm10 = fieldFromInstruction(Insn, 16, 10);
+ unsigned imm11 = fieldFromInstruction(Insn, 0, 11);
+ unsigned tmp = (S << 23) | (I1 << 22) | (I2 << 21) | (imm10 << 11) | imm11;
+ int imm32 = SignExtend32<25>(tmp << 1);
+ if (!tryAddingSymbolicOperand(Address, Address + imm32 + 4,
+ true, 4, Inst, Decoder))
+ Inst.addOperand(MCOperand::createImm(imm32));
+
+ return Status;
+}
+
+static DecodeStatus
+DecodeBranchImmInstruction(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned pred = fieldFromInstruction(Insn, 28, 4);
+ unsigned imm = fieldFromInstruction(Insn, 0, 24) << 2;
+
+ if (pred == 0xF) {
+ Inst.setOpcode(ARM::BLXi);
+ imm |= fieldFromInstruction(Insn, 24, 1) << 1;
+ if (!tryAddingSymbolicOperand(Address, Address + SignExtend32<26>(imm) + 8,
+ true, 4, Inst, Decoder))
+ Inst.addOperand(MCOperand::createImm(SignExtend32<26>(imm)));
+ return S;
+ }
+
+ if (!tryAddingSymbolicOperand(Address, Address + SignExtend32<26>(imm) + 8,
+ true, 4, Inst, Decoder))
+ Inst.addOperand(MCOperand::createImm(SignExtend32<26>(imm)));
+ if (!Check(S, DecodePredicateOperand(Inst, pred, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ return S;
+}
+
+
+static DecodeStatus DecodeAddrMode6Operand(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rm = fieldFromInstruction(Val, 0, 4);
+ unsigned align = fieldFromInstruction(Val, 4, 2);
+
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!align)
+ Inst.addOperand(MCOperand::createImm(0));
+ else
+ Inst.addOperand(MCOperand::createImm(4 << align));
+
+ return S;
+}
+
+static DecodeStatus DecodeVLDInstruction(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rd = fieldFromInstruction(Insn, 12, 4);
+ Rd |= fieldFromInstruction(Insn, 22, 1) << 4;
+ unsigned wb = fieldFromInstruction(Insn, 16, 4);
+ unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+ Rn |= fieldFromInstruction(Insn, 4, 2) << 4;
+ unsigned Rm = fieldFromInstruction(Insn, 0, 4);
+
+ // First output register
+ switch (Inst.getOpcode()) {
+ case ARM::VLD1q16: case ARM::VLD1q32: case ARM::VLD1q64: case ARM::VLD1q8:
+ case ARM::VLD1q16wb_fixed: case ARM::VLD1q16wb_register:
+ case ARM::VLD1q32wb_fixed: case ARM::VLD1q32wb_register:
+ case ARM::VLD1q64wb_fixed: case ARM::VLD1q64wb_register:
+ case ARM::VLD1q8wb_fixed: case ARM::VLD1q8wb_register:
+ case ARM::VLD2d16: case ARM::VLD2d32: case ARM::VLD2d8:
+ case ARM::VLD2d16wb_fixed: case ARM::VLD2d16wb_register:
+ case ARM::VLD2d32wb_fixed: case ARM::VLD2d32wb_register:
+ case ARM::VLD2d8wb_fixed: case ARM::VLD2d8wb_register:
+ if (!Check(S, DecodeDPairRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ break;
+ case ARM::VLD2b16:
+ case ARM::VLD2b32:
+ case ARM::VLD2b8:
+ case ARM::VLD2b16wb_fixed:
+ case ARM::VLD2b16wb_register:
+ case ARM::VLD2b32wb_fixed:
+ case ARM::VLD2b32wb_register:
+ case ARM::VLD2b8wb_fixed:
+ case ARM::VLD2b8wb_register:
+ if (!Check(S, DecodeDPairSpacedRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ break;
+ default:
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ }
+
+ // Second output register
+ switch (Inst.getOpcode()) {
+ case ARM::VLD3d8:
+ case ARM::VLD3d16:
+ case ARM::VLD3d32:
+ case ARM::VLD3d8_UPD:
+ case ARM::VLD3d16_UPD:
+ case ARM::VLD3d32_UPD:
+ case ARM::VLD4d8:
+ case ARM::VLD4d16:
+ case ARM::VLD4d32:
+ case ARM::VLD4d8_UPD:
+ case ARM::VLD4d16_UPD:
+ case ARM::VLD4d32_UPD:
+ if (!Check(S, DecodeDPRRegisterClass(Inst, (Rd+1)%32, Address, Decoder)))
+ return MCDisassembler::Fail;
+ break;
+ case ARM::VLD3q8:
+ case ARM::VLD3q16:
+ case ARM::VLD3q32:
+ case ARM::VLD3q8_UPD:
+ case ARM::VLD3q16_UPD:
+ case ARM::VLD3q32_UPD:
+ case ARM::VLD4q8:
+ case ARM::VLD4q16:
+ case ARM::VLD4q32:
+ case ARM::VLD4q8_UPD:
+ case ARM::VLD4q16_UPD:
+ case ARM::VLD4q32_UPD:
+ if (!Check(S, DecodeDPRRegisterClass(Inst, (Rd+2)%32, Address, Decoder)))
+ return MCDisassembler::Fail;
+ default:
+ break;
+ }
+
+ // Third output register
+ switch(Inst.getOpcode()) {
+ case ARM::VLD3d8:
+ case ARM::VLD3d16:
+ case ARM::VLD3d32:
+ case ARM::VLD3d8_UPD:
+ case ARM::VLD3d16_UPD:
+ case ARM::VLD3d32_UPD:
+ case ARM::VLD4d8:
+ case ARM::VLD4d16:
+ case ARM::VLD4d32:
+ case ARM::VLD4d8_UPD:
+ case ARM::VLD4d16_UPD:
+ case ARM::VLD4d32_UPD:
+ if (!Check(S, DecodeDPRRegisterClass(Inst, (Rd+2)%32, Address, Decoder)))
+ return MCDisassembler::Fail;
+ break;
+ case ARM::VLD3q8:
+ case ARM::VLD3q16:
+ case ARM::VLD3q32:
+ case ARM::VLD3q8_UPD:
+ case ARM::VLD3q16_UPD:
+ case ARM::VLD3q32_UPD:
+ case ARM::VLD4q8:
+ case ARM::VLD4q16:
+ case ARM::VLD4q32:
+ case ARM::VLD4q8_UPD:
+ case ARM::VLD4q16_UPD:
+ case ARM::VLD4q32_UPD:
+ if (!Check(S, DecodeDPRRegisterClass(Inst, (Rd+4)%32, Address, Decoder)))
+ return MCDisassembler::Fail;
+ break;
+ default:
+ break;
+ }
+
+ // Fourth output register
+ switch (Inst.getOpcode()) {
+ case ARM::VLD4d8:
+ case ARM::VLD4d16:
+ case ARM::VLD4d32:
+ case ARM::VLD4d8_UPD:
+ case ARM::VLD4d16_UPD:
+ case ARM::VLD4d32_UPD:
+ if (!Check(S, DecodeDPRRegisterClass(Inst, (Rd+3)%32, Address, Decoder)))
+ return MCDisassembler::Fail;
+ break;
+ case ARM::VLD4q8:
+ case ARM::VLD4q16:
+ case ARM::VLD4q32:
+ case ARM::VLD4q8_UPD:
+ case ARM::VLD4q16_UPD:
+ case ARM::VLD4q32_UPD:
+ if (!Check(S, DecodeDPRRegisterClass(Inst, (Rd+6)%32, Address, Decoder)))
+ return MCDisassembler::Fail;
+ break;
+ default:
+ break;
+ }
+
+ // Writeback operand
+ switch (Inst.getOpcode()) {
+ case ARM::VLD1d8wb_fixed:
+ case ARM::VLD1d16wb_fixed:
+ case ARM::VLD1d32wb_fixed:
+ case ARM::VLD1d64wb_fixed:
+ case ARM::VLD1d8wb_register:
+ case ARM::VLD1d16wb_register:
+ case ARM::VLD1d32wb_register:
+ case ARM::VLD1d64wb_register:
+ case ARM::VLD1q8wb_fixed:
+ case ARM::VLD1q16wb_fixed:
+ case ARM::VLD1q32wb_fixed:
+ case ARM::VLD1q64wb_fixed:
+ case ARM::VLD1q8wb_register:
+ case ARM::VLD1q16wb_register:
+ case ARM::VLD1q32wb_register:
+ case ARM::VLD1q64wb_register:
+ case ARM::VLD1d8Twb_fixed:
+ case ARM::VLD1d8Twb_register:
+ case ARM::VLD1d16Twb_fixed:
+ case ARM::VLD1d16Twb_register:
+ case ARM::VLD1d32Twb_fixed:
+ case ARM::VLD1d32Twb_register:
+ case ARM::VLD1d64Twb_fixed:
+ case ARM::VLD1d64Twb_register:
+ case ARM::VLD1d8Qwb_fixed:
+ case ARM::VLD1d8Qwb_register:
+ case ARM::VLD1d16Qwb_fixed:
+ case ARM::VLD1d16Qwb_register:
+ case ARM::VLD1d32Qwb_fixed:
+ case ARM::VLD1d32Qwb_register:
+ case ARM::VLD1d64Qwb_fixed:
+ case ARM::VLD1d64Qwb_register:
+ case ARM::VLD2d8wb_fixed:
+ case ARM::VLD2d16wb_fixed:
+ case ARM::VLD2d32wb_fixed:
+ case ARM::VLD2q8wb_fixed:
+ case ARM::VLD2q16wb_fixed:
+ case ARM::VLD2q32wb_fixed:
+ case ARM::VLD2d8wb_register:
+ case ARM::VLD2d16wb_register:
+ case ARM::VLD2d32wb_register:
+ case ARM::VLD2q8wb_register:
+ case ARM::VLD2q16wb_register:
+ case ARM::VLD2q32wb_register:
+ case ARM::VLD2b8wb_fixed:
+ case ARM::VLD2b16wb_fixed:
+ case ARM::VLD2b32wb_fixed:
+ case ARM::VLD2b8wb_register:
+ case ARM::VLD2b16wb_register:
+ case ARM::VLD2b32wb_register:
+ Inst.addOperand(MCOperand::createImm(0));
+ break;
+ case ARM::VLD3d8_UPD:
+ case ARM::VLD3d16_UPD:
+ case ARM::VLD3d32_UPD:
+ case ARM::VLD3q8_UPD:
+ case ARM::VLD3q16_UPD:
+ case ARM::VLD3q32_UPD:
+ case ARM::VLD4d8_UPD:
+ case ARM::VLD4d16_UPD:
+ case ARM::VLD4d32_UPD:
+ case ARM::VLD4q8_UPD:
+ case ARM::VLD4q16_UPD:
+ case ARM::VLD4q32_UPD:
+ if (!Check(S, DecodeGPRRegisterClass(Inst, wb, Address, Decoder)))
+ return MCDisassembler::Fail;
+ break;
+ default:
+ break;
+ }
+
+ // AddrMode6 Base (register+alignment)
+ if (!Check(S, DecodeAddrMode6Operand(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ // AddrMode6 Offset (register)
+ switch (Inst.getOpcode()) {
+ default:
+ // The below have been updated to have explicit am6offset split
+ // between fixed and register offset. For those instructions not
+ // yet updated, we need to add an additional reg0 operand for the
+ // fixed variant.
+ //
+ // The fixed offset encodes as Rm == 0xd, so we check for that.
+ if (Rm == 0xd) {
+ Inst.addOperand(MCOperand::createReg(0));
+ break;
+ }
+ // Fall through to handle the register offset variant.
+ case ARM::VLD1d8wb_fixed:
+ case ARM::VLD1d16wb_fixed:
+ case ARM::VLD1d32wb_fixed:
+ case ARM::VLD1d64wb_fixed:
+ case ARM::VLD1d8Twb_fixed:
+ case ARM::VLD1d16Twb_fixed:
+ case ARM::VLD1d32Twb_fixed:
+ case ARM::VLD1d64Twb_fixed:
+ case ARM::VLD1d8Qwb_fixed:
+ case ARM::VLD1d16Qwb_fixed:
+ case ARM::VLD1d32Qwb_fixed:
+ case ARM::VLD1d64Qwb_fixed:
+ case ARM::VLD1d8wb_register:
+ case ARM::VLD1d16wb_register:
+ case ARM::VLD1d32wb_register:
+ case ARM::VLD1d64wb_register:
+ case ARM::VLD1q8wb_fixed:
+ case ARM::VLD1q16wb_fixed:
+ case ARM::VLD1q32wb_fixed:
+ case ARM::VLD1q64wb_fixed:
+ case ARM::VLD1q8wb_register:
+ case ARM::VLD1q16wb_register:
+ case ARM::VLD1q32wb_register:
+ case ARM::VLD1q64wb_register:
+ // The fixed offset post-increment encodes Rm == 0xd. The no-writeback
+ // variant encodes Rm == 0xf. Anything else is a register offset post-
+ // increment and we need to add the register operand to the instruction.
+ if (Rm != 0xD && Rm != 0xF &&
+ !Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder)))
+ return MCDisassembler::Fail;
+ break;
+ case ARM::VLD2d8wb_fixed:
+ case ARM::VLD2d16wb_fixed:
+ case ARM::VLD2d32wb_fixed:
+ case ARM::VLD2b8wb_fixed:
+ case ARM::VLD2b16wb_fixed:
+ case ARM::VLD2b32wb_fixed:
+ case ARM::VLD2q8wb_fixed:
+ case ARM::VLD2q16wb_fixed:
+ case ARM::VLD2q32wb_fixed:
+ break;
+ }
+
+ return S;
+}
+
+static DecodeStatus DecodeVLDST1Instruction(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ unsigned type = fieldFromInstruction(Insn, 8, 4);
+ unsigned align = fieldFromInstruction(Insn, 4, 2);
+ if (type == 6 && (align & 2)) return MCDisassembler::Fail;
+ if (type == 7 && (align & 2)) return MCDisassembler::Fail;
+ if (type == 10 && align == 3) return MCDisassembler::Fail;
+
+ unsigned load = fieldFromInstruction(Insn, 21, 1);
+ return load ? DecodeVLDInstruction(Inst, Insn, Address, Decoder)
+ : DecodeVSTInstruction(Inst, Insn, Address, Decoder);
+}
+
+static DecodeStatus DecodeVLDST2Instruction(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ unsigned size = fieldFromInstruction(Insn, 6, 2);
+ if (size == 3) return MCDisassembler::Fail;
+
+ unsigned type = fieldFromInstruction(Insn, 8, 4);
+ unsigned align = fieldFromInstruction(Insn, 4, 2);
+ if (type == 8 && align == 3) return MCDisassembler::Fail;
+ if (type == 9 && align == 3) return MCDisassembler::Fail;
+
+ unsigned load = fieldFromInstruction(Insn, 21, 1);
+ return load ? DecodeVLDInstruction(Inst, Insn, Address, Decoder)
+ : DecodeVSTInstruction(Inst, Insn, Address, Decoder);
+}
+
+static DecodeStatus DecodeVLDST3Instruction(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ unsigned size = fieldFromInstruction(Insn, 6, 2);
+ if (size == 3) return MCDisassembler::Fail;
+
+ unsigned align = fieldFromInstruction(Insn, 4, 2);
+ if (align & 2) return MCDisassembler::Fail;
+
+ unsigned load = fieldFromInstruction(Insn, 21, 1);
+ return load ? DecodeVLDInstruction(Inst, Insn, Address, Decoder)
+ : DecodeVSTInstruction(Inst, Insn, Address, Decoder);
+}
+
+static DecodeStatus DecodeVLDST4Instruction(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ unsigned size = fieldFromInstruction(Insn, 6, 2);
+ if (size == 3) return MCDisassembler::Fail;
+
+ unsigned load = fieldFromInstruction(Insn, 21, 1);
+ return load ? DecodeVLDInstruction(Inst, Insn, Address, Decoder)
+ : DecodeVSTInstruction(Inst, Insn, Address, Decoder);
+}
+
+static DecodeStatus DecodeVSTInstruction(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rd = fieldFromInstruction(Insn, 12, 4);
+ Rd |= fieldFromInstruction(Insn, 22, 1) << 4;
+ unsigned wb = fieldFromInstruction(Insn, 16, 4);
+ unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+ Rn |= fieldFromInstruction(Insn, 4, 2) << 4;
+ unsigned Rm = fieldFromInstruction(Insn, 0, 4);
+
+ // Writeback Operand
+ switch (Inst.getOpcode()) {
+ case ARM::VST1d8wb_fixed:
+ case ARM::VST1d16wb_fixed:
+ case ARM::VST1d32wb_fixed:
+ case ARM::VST1d64wb_fixed:
+ case ARM::VST1d8wb_register:
+ case ARM::VST1d16wb_register:
+ case ARM::VST1d32wb_register:
+ case ARM::VST1d64wb_register:
+ case ARM::VST1q8wb_fixed:
+ case ARM::VST1q16wb_fixed:
+ case ARM::VST1q32wb_fixed:
+ case ARM::VST1q64wb_fixed:
+ case ARM::VST1q8wb_register:
+ case ARM::VST1q16wb_register:
+ case ARM::VST1q32wb_register:
+ case ARM::VST1q64wb_register:
+ case ARM::VST1d8Twb_fixed:
+ case ARM::VST1d16Twb_fixed:
+ case ARM::VST1d32Twb_fixed:
+ case ARM::VST1d64Twb_fixed:
+ case ARM::VST1d8Twb_register:
+ case ARM::VST1d16Twb_register:
+ case ARM::VST1d32Twb_register:
+ case ARM::VST1d64Twb_register:
+ case ARM::VST1d8Qwb_fixed:
+ case ARM::VST1d16Qwb_fixed:
+ case ARM::VST1d32Qwb_fixed:
+ case ARM::VST1d64Qwb_fixed:
+ case ARM::VST1d8Qwb_register:
+ case ARM::VST1d16Qwb_register:
+ case ARM::VST1d32Qwb_register:
+ case ARM::VST1d64Qwb_register:
+ case ARM::VST2d8wb_fixed:
+ case ARM::VST2d16wb_fixed:
+ case ARM::VST2d32wb_fixed:
+ case ARM::VST2d8wb_register:
+ case ARM::VST2d16wb_register:
+ case ARM::VST2d32wb_register:
+ case ARM::VST2q8wb_fixed:
+ case ARM::VST2q16wb_fixed:
+ case ARM::VST2q32wb_fixed:
+ case ARM::VST2q8wb_register:
+ case ARM::VST2q16wb_register:
+ case ARM::VST2q32wb_register:
+ case ARM::VST2b8wb_fixed:
+ case ARM::VST2b16wb_fixed:
+ case ARM::VST2b32wb_fixed:
+ case ARM::VST2b8wb_register:
+ case ARM::VST2b16wb_register:
+ case ARM::VST2b32wb_register:
+ if (Rm == 0xF)
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::createImm(0));
+ break;
+ case ARM::VST3d8_UPD:
+ case ARM::VST3d16_UPD:
+ case ARM::VST3d32_UPD:
+ case ARM::VST3q8_UPD:
+ case ARM::VST3q16_UPD:
+ case ARM::VST3q32_UPD:
+ case ARM::VST4d8_UPD:
+ case ARM::VST4d16_UPD:
+ case ARM::VST4d32_UPD:
+ case ARM::VST4q8_UPD:
+ case ARM::VST4q16_UPD:
+ case ARM::VST4q32_UPD:
+ if (!Check(S, DecodeGPRRegisterClass(Inst, wb, Address, Decoder)))
+ return MCDisassembler::Fail;
+ break;
+ default:
+ break;
+ }
+
+ // AddrMode6 Base (register+alignment)
+ if (!Check(S, DecodeAddrMode6Operand(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ // AddrMode6 Offset (register)
+ switch (Inst.getOpcode()) {
+ default:
+ if (Rm == 0xD)
+ Inst.addOperand(MCOperand::createReg(0));
+ else if (Rm != 0xF) {
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder)))
+ return MCDisassembler::Fail;
+ }
+ break;
+ case ARM::VST1d8wb_fixed:
+ case ARM::VST1d16wb_fixed:
+ case ARM::VST1d32wb_fixed:
+ case ARM::VST1d64wb_fixed:
+ case ARM::VST1q8wb_fixed:
+ case ARM::VST1q16wb_fixed:
+ case ARM::VST1q32wb_fixed:
+ case ARM::VST1q64wb_fixed:
+ case ARM::VST1d8Twb_fixed:
+ case ARM::VST1d16Twb_fixed:
+ case ARM::VST1d32Twb_fixed:
+ case ARM::VST1d64Twb_fixed:
+ case ARM::VST1d8Qwb_fixed:
+ case ARM::VST1d16Qwb_fixed:
+ case ARM::VST1d32Qwb_fixed:
+ case ARM::VST1d64Qwb_fixed:
+ case ARM::VST2d8wb_fixed:
+ case ARM::VST2d16wb_fixed:
+ case ARM::VST2d32wb_fixed:
+ case ARM::VST2q8wb_fixed:
+ case ARM::VST2q16wb_fixed:
+ case ARM::VST2q32wb_fixed:
+ case ARM::VST2b8wb_fixed:
+ case ARM::VST2b16wb_fixed:
+ case ARM::VST2b32wb_fixed:
+ break;
+ }
+
+
+ // First input register
+ switch (Inst.getOpcode()) {
+ case ARM::VST1q16:
+ case ARM::VST1q32:
+ case ARM::VST1q64:
+ case ARM::VST1q8:
+ case ARM::VST1q16wb_fixed:
+ case ARM::VST1q16wb_register:
+ case ARM::VST1q32wb_fixed:
+ case ARM::VST1q32wb_register:
+ case ARM::VST1q64wb_fixed:
+ case ARM::VST1q64wb_register:
+ case ARM::VST1q8wb_fixed:
+ case ARM::VST1q8wb_register:
+ case ARM::VST2d16:
+ case ARM::VST2d32:
+ case ARM::VST2d8:
+ case ARM::VST2d16wb_fixed:
+ case ARM::VST2d16wb_register:
+ case ARM::VST2d32wb_fixed:
+ case ARM::VST2d32wb_register:
+ case ARM::VST2d8wb_fixed:
+ case ARM::VST2d8wb_register:
+ if (!Check(S, DecodeDPairRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ break;
+ case ARM::VST2b16:
+ case ARM::VST2b32:
+ case ARM::VST2b8:
+ case ARM::VST2b16wb_fixed:
+ case ARM::VST2b16wb_register:
+ case ARM::VST2b32wb_fixed:
+ case ARM::VST2b32wb_register:
+ case ARM::VST2b8wb_fixed:
+ case ARM::VST2b8wb_register:
+ if (!Check(S, DecodeDPairSpacedRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ break;
+ default:
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ }
+
+ // Second input register
+ switch (Inst.getOpcode()) {
+ case ARM::VST3d8:
+ case ARM::VST3d16:
+ case ARM::VST3d32:
+ case ARM::VST3d8_UPD:
+ case ARM::VST3d16_UPD:
+ case ARM::VST3d32_UPD:
+ case ARM::VST4d8:
+ case ARM::VST4d16:
+ case ARM::VST4d32:
+ case ARM::VST4d8_UPD:
+ case ARM::VST4d16_UPD:
+ case ARM::VST4d32_UPD:
+ if (!Check(S, DecodeDPRRegisterClass(Inst, (Rd+1)%32, Address, Decoder)))
+ return MCDisassembler::Fail;
+ break;
+ case ARM::VST3q8:
+ case ARM::VST3q16:
+ case ARM::VST3q32:
+ case ARM::VST3q8_UPD:
+ case ARM::VST3q16_UPD:
+ case ARM::VST3q32_UPD:
+ case ARM::VST4q8:
+ case ARM::VST4q16:
+ case ARM::VST4q32:
+ case ARM::VST4q8_UPD:
+ case ARM::VST4q16_UPD:
+ case ARM::VST4q32_UPD:
+ if (!Check(S, DecodeDPRRegisterClass(Inst, (Rd+2)%32, Address, Decoder)))
+ return MCDisassembler::Fail;
+ break;
+ default:
+ break;
+ }
+
+ // Third input register
+ switch (Inst.getOpcode()) {
+ case ARM::VST3d8:
+ case ARM::VST3d16:
+ case ARM::VST3d32:
+ case ARM::VST3d8_UPD:
+ case ARM::VST3d16_UPD:
+ case ARM::VST3d32_UPD:
+ case ARM::VST4d8:
+ case ARM::VST4d16:
+ case ARM::VST4d32:
+ case ARM::VST4d8_UPD:
+ case ARM::VST4d16_UPD:
+ case ARM::VST4d32_UPD:
+ if (!Check(S, DecodeDPRRegisterClass(Inst, (Rd+2)%32, Address, Decoder)))
+ return MCDisassembler::Fail;
+ break;
+ case ARM::VST3q8:
+ case ARM::VST3q16:
+ case ARM::VST3q32:
+ case ARM::VST3q8_UPD:
+ case ARM::VST3q16_UPD:
+ case ARM::VST3q32_UPD:
+ case ARM::VST4q8:
+ case ARM::VST4q16:
+ case ARM::VST4q32:
+ case ARM::VST4q8_UPD:
+ case ARM::VST4q16_UPD:
+ case ARM::VST4q32_UPD:
+ if (!Check(S, DecodeDPRRegisterClass(Inst, (Rd+4)%32, Address, Decoder)))
+ return MCDisassembler::Fail;
+ break;
+ default:
+ break;
+ }
+
+ // Fourth input register
+ switch (Inst.getOpcode()) {
+ case ARM::VST4d8:
+ case ARM::VST4d16:
+ case ARM::VST4d32:
+ case ARM::VST4d8_UPD:
+ case ARM::VST4d16_UPD:
+ case ARM::VST4d32_UPD:
+ if (!Check(S, DecodeDPRRegisterClass(Inst, (Rd+3)%32, Address, Decoder)))
+ return MCDisassembler::Fail;
+ break;
+ case ARM::VST4q8:
+ case ARM::VST4q16:
+ case ARM::VST4q32:
+ case ARM::VST4q8_UPD:
+ case ARM::VST4q16_UPD:
+ case ARM::VST4q32_UPD:
+ if (!Check(S, DecodeDPRRegisterClass(Inst, (Rd+6)%32, Address, Decoder)))
+ return MCDisassembler::Fail;
+ break;
+ default:
+ break;
+ }
+
+ return S;
+}
+
+static DecodeStatus DecodeVLD1DupInstruction(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rd = fieldFromInstruction(Insn, 12, 4);
+ Rd |= fieldFromInstruction(Insn, 22, 1) << 4;
+ unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+ unsigned Rm = fieldFromInstruction(Insn, 0, 4);
+ unsigned align = fieldFromInstruction(Insn, 4, 1);
+ unsigned size = fieldFromInstruction(Insn, 6, 2);
+
+ if (size == 0 && align == 1)
+ return MCDisassembler::Fail;
+ align *= (1 << size);
+
+ switch (Inst.getOpcode()) {
+ case ARM::VLD1DUPq16: case ARM::VLD1DUPq32: case ARM::VLD1DUPq8:
+ case ARM::VLD1DUPq16wb_fixed: case ARM::VLD1DUPq16wb_register:
+ case ARM::VLD1DUPq32wb_fixed: case ARM::VLD1DUPq32wb_register:
+ case ARM::VLD1DUPq8wb_fixed: case ARM::VLD1DUPq8wb_register:
+ if (!Check(S, DecodeDPairRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ break;
+ default:
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ break;
+ }
+ if (Rm != 0xF) {
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ }
+
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::createImm(align));
+
+ // The fixed offset post-increment encodes Rm == 0xd. The no-writeback
+ // variant encodes Rm == 0xf. Anything else is a register offset post-
+ // increment and we need to add the register operand to the instruction.
+ if (Rm != 0xD && Rm != 0xF &&
+ !Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ return S;
+}
+
+static DecodeStatus DecodeVLD2DupInstruction(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rd = fieldFromInstruction(Insn, 12, 4);
+ Rd |= fieldFromInstruction(Insn, 22, 1) << 4;
+ unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+ unsigned Rm = fieldFromInstruction(Insn, 0, 4);
+ unsigned align = fieldFromInstruction(Insn, 4, 1);
+ unsigned size = 1 << fieldFromInstruction(Insn, 6, 2);
+ align *= 2*size;
+
+ switch (Inst.getOpcode()) {
+ case ARM::VLD2DUPd16: case ARM::VLD2DUPd32: case ARM::VLD2DUPd8:
+ case ARM::VLD2DUPd16wb_fixed: case ARM::VLD2DUPd16wb_register:
+ case ARM::VLD2DUPd32wb_fixed: case ARM::VLD2DUPd32wb_register:
+ case ARM::VLD2DUPd8wb_fixed: case ARM::VLD2DUPd8wb_register:
+ if (!Check(S, DecodeDPairRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ break;
+ case ARM::VLD2DUPd16x2: case ARM::VLD2DUPd32x2: case ARM::VLD2DUPd8x2:
+ case ARM::VLD2DUPd16x2wb_fixed: case ARM::VLD2DUPd16x2wb_register:
+ case ARM::VLD2DUPd32x2wb_fixed: case ARM::VLD2DUPd32x2wb_register:
+ case ARM::VLD2DUPd8x2wb_fixed: case ARM::VLD2DUPd8x2wb_register:
+ if (!Check(S, DecodeDPairSpacedRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ break;
+ default:
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ break;
+ }
+
+ if (Rm != 0xF)
+ Inst.addOperand(MCOperand::createImm(0));
+
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::createImm(align));
+
+ if (Rm != 0xD && Rm != 0xF) {
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder)))
+ return MCDisassembler::Fail;
+ }
+
+ return S;
+}
+
+static DecodeStatus DecodeVLD3DupInstruction(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rd = fieldFromInstruction(Insn, 12, 4);
+ Rd |= fieldFromInstruction(Insn, 22, 1) << 4;
+ unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+ unsigned Rm = fieldFromInstruction(Insn, 0, 4);
+ unsigned inc = fieldFromInstruction(Insn, 5, 1) + 1;
+
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeDPRRegisterClass(Inst, (Rd+inc)%32, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeDPRRegisterClass(Inst, (Rd+2*inc)%32, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (Rm != 0xF) {
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ }
+
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::createImm(0));
+
+ if (Rm == 0xD)
+ Inst.addOperand(MCOperand::createReg(0));
+ else if (Rm != 0xF) {
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder)))
+ return MCDisassembler::Fail;
+ }
+
+ return S;
+}
+
+static DecodeStatus DecodeVLD4DupInstruction(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rd = fieldFromInstruction(Insn, 12, 4);
+ Rd |= fieldFromInstruction(Insn, 22, 1) << 4;
+ unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+ unsigned Rm = fieldFromInstruction(Insn, 0, 4);
+ unsigned size = fieldFromInstruction(Insn, 6, 2);
+ unsigned inc = fieldFromInstruction(Insn, 5, 1) + 1;
+ unsigned align = fieldFromInstruction(Insn, 4, 1);
+
+ if (size == 0x3) {
+ if (align == 0)
+ return MCDisassembler::Fail;
+ align = 16;
+ } else {
+ if (size == 2) {
+ align *= 8;
+ } else {
+ size = 1 << size;
+ align *= 4*size;
+ }
+ }
+
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeDPRRegisterClass(Inst, (Rd+inc)%32, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeDPRRegisterClass(Inst, (Rd+2*inc)%32, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeDPRRegisterClass(Inst, (Rd+3*inc)%32, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (Rm != 0xF) {
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ }
+
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::createImm(align));
+
+ if (Rm == 0xD)
+ Inst.addOperand(MCOperand::createReg(0));
+ else if (Rm != 0xF) {
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder)))
+ return MCDisassembler::Fail;
+ }
+
+ return S;
+}
+
+static DecodeStatus
+DecodeNEONModImmInstruction(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rd = fieldFromInstruction(Insn, 12, 4);
+ Rd |= fieldFromInstruction(Insn, 22, 1) << 4;
+ unsigned imm = fieldFromInstruction(Insn, 0, 4);
+ imm |= fieldFromInstruction(Insn, 16, 3) << 4;
+ imm |= fieldFromInstruction(Insn, 24, 1) << 7;
+ imm |= fieldFromInstruction(Insn, 8, 4) << 8;
+ imm |= fieldFromInstruction(Insn, 5, 1) << 12;
+ unsigned Q = fieldFromInstruction(Insn, 6, 1);
+
+ if (Q) {
+ if (!Check(S, DecodeQPRRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ } else {
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ }
+
+ Inst.addOperand(MCOperand::createImm(imm));
+
+ switch (Inst.getOpcode()) {
+ case ARM::VORRiv4i16:
+ case ARM::VORRiv2i32:
+ case ARM::VBICiv4i16:
+ case ARM::VBICiv2i32:
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ break;
+ case ARM::VORRiv8i16:
+ case ARM::VORRiv4i32:
+ case ARM::VBICiv8i16:
+ case ARM::VBICiv4i32:
+ if (!Check(S, DecodeQPRRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ break;
+ default:
+ break;
+ }
+
+ return S;
+}
+
+static DecodeStatus DecodeVSHLMaxInstruction(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rd = fieldFromInstruction(Insn, 12, 4);
+ Rd |= fieldFromInstruction(Insn, 22, 1) << 4;
+ unsigned Rm = fieldFromInstruction(Insn, 0, 4);
+ Rm |= fieldFromInstruction(Insn, 5, 1) << 4;
+ unsigned size = fieldFromInstruction(Insn, 18, 2);
+
+ if (!Check(S, DecodeQPRRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rm, Address, Decoder)))
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::createImm(8 << size));
+
+ return S;
+}
+
+static DecodeStatus DecodeShiftRight8Imm(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ Inst.addOperand(MCOperand::createImm(8 - Val));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeShiftRight16Imm(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ Inst.addOperand(MCOperand::createImm(16 - Val));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeShiftRight32Imm(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ Inst.addOperand(MCOperand::createImm(32 - Val));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeShiftRight64Imm(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ Inst.addOperand(MCOperand::createImm(64 - Val));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeTBLInstruction(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rd = fieldFromInstruction(Insn, 12, 4);
+ Rd |= fieldFromInstruction(Insn, 22, 1) << 4;
+ unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+ Rn |= fieldFromInstruction(Insn, 7, 1) << 4;
+ unsigned Rm = fieldFromInstruction(Insn, 0, 4);
+ Rm |= fieldFromInstruction(Insn, 5, 1) << 4;
+ unsigned op = fieldFromInstruction(Insn, 6, 1);
+
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (op) {
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail; // Writeback
+ }
+
+ switch (Inst.getOpcode()) {
+ case ARM::VTBL2:
+ case ARM::VTBX2:
+ if (!Check(S, DecodeDPairRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ break;
+ default:
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ }
+
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rm, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ return S;
+}
+
+static DecodeStatus DecodeThumbAddSpecialReg(MCInst &Inst, uint16_t Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned dst = fieldFromInstruction(Insn, 8, 3);
+ unsigned imm = fieldFromInstruction(Insn, 0, 8);
+
+ if (!Check(S, DecodetGPRRegisterClass(Inst, dst, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ switch(Inst.getOpcode()) {
+ default:
+ return MCDisassembler::Fail;
+ case ARM::tADR:
+ break; // tADR does not explicitly represent the PC as an operand.
+ case ARM::tADDrSPi:
+ Inst.addOperand(MCOperand::createReg(ARM::SP));
+ break;
+ }
+
+ Inst.addOperand(MCOperand::createImm(imm));
+ return S;
+}
+
+static DecodeStatus DecodeThumbBROperand(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ if (!tryAddingSymbolicOperand(Address, Address + SignExtend32<12>(Val<<1) + 4,
+ true, 2, Inst, Decoder))
+ Inst.addOperand(MCOperand::createImm(SignExtend32<12>(Val << 1)));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeT2BROperand(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ if (!tryAddingSymbolicOperand(Address, Address + SignExtend32<21>(Val) + 4,
+ true, 4, Inst, Decoder))
+ Inst.addOperand(MCOperand::createImm(SignExtend32<21>(Val)));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeThumbCmpBROperand(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ if (!tryAddingSymbolicOperand(Address, Address + (Val<<1) + 4,
+ true, 2, Inst, Decoder))
+ Inst.addOperand(MCOperand::createImm(Val << 1));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeThumbAddrModeRR(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rn = fieldFromInstruction(Val, 0, 3);
+ unsigned Rm = fieldFromInstruction(Val, 3, 3);
+
+ if (!Check(S, DecodetGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodetGPRRegisterClass(Inst, Rm, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ return S;
+}
+
+static DecodeStatus DecodeThumbAddrModeIS(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rn = fieldFromInstruction(Val, 0, 3);
+ unsigned imm = fieldFromInstruction(Val, 3, 5);
+
+ if (!Check(S, DecodetGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::createImm(imm));
+
+ return S;
+}
+
+static DecodeStatus DecodeThumbAddrModePC(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ unsigned imm = Val << 2;
+
+ Inst.addOperand(MCOperand::createImm(imm));
+ tryAddingPcLoadReferenceComment(Address, (Address & ~2u) + imm + 4, Decoder);
+
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeThumbAddrModeSP(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ Inst.addOperand(MCOperand::createReg(ARM::SP));
+ Inst.addOperand(MCOperand::createImm(Val));
+
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeT2AddrModeSOReg(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rn = fieldFromInstruction(Val, 6, 4);
+ unsigned Rm = fieldFromInstruction(Val, 2, 4);
+ unsigned imm = fieldFromInstruction(Val, 0, 2);
+
+ // Thumb stores cannot use PC as dest register.
+ switch (Inst.getOpcode()) {
+ case ARM::t2STRHs:
+ case ARM::t2STRBs:
+ case ARM::t2STRs:
+ if (Rn == 15)
+ return MCDisassembler::Fail;
+ default:
+ break;
+ }
+
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecoderGPRRegisterClass(Inst, Rm, Address, Decoder)))
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::createImm(imm));
+
+ return S;
+}
+
+static DecodeStatus DecodeT2LoadShift(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rt = fieldFromInstruction(Insn, 12, 4);
+ unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+
+ const FeatureBitset &featureBits =
+ ((const MCDisassembler*)Decoder)->getSubtargetInfo().getFeatureBits();
+
+ bool hasMP = featureBits[ARM::FeatureMP];
+ bool hasV7Ops = featureBits[ARM::HasV7Ops];
+
+ if (Rn == 15) {
+ switch (Inst.getOpcode()) {
+ case ARM::t2LDRBs:
+ Inst.setOpcode(ARM::t2LDRBpci);
+ break;
+ case ARM::t2LDRHs:
+ Inst.setOpcode(ARM::t2LDRHpci);
+ break;
+ case ARM::t2LDRSHs:
+ Inst.setOpcode(ARM::t2LDRSHpci);
+ break;
+ case ARM::t2LDRSBs:
+ Inst.setOpcode(ARM::t2LDRSBpci);
+ break;
+ case ARM::t2LDRs:
+ Inst.setOpcode(ARM::t2LDRpci);
+ break;
+ case ARM::t2PLDs:
+ Inst.setOpcode(ARM::t2PLDpci);
+ break;
+ case ARM::t2PLIs:
+ Inst.setOpcode(ARM::t2PLIpci);
+ break;
+ default:
+ return MCDisassembler::Fail;
+ }
+
+ return DecodeT2LoadLabel(Inst, Insn, Address, Decoder);
+ }
+
+ if (Rt == 15) {
+ switch (Inst.getOpcode()) {
+ case ARM::t2LDRSHs:
+ return MCDisassembler::Fail;
+ case ARM::t2LDRHs:
+ Inst.setOpcode(ARM::t2PLDWs);
+ break;
+ case ARM::t2LDRSBs:
+ Inst.setOpcode(ARM::t2PLIs);
+ default:
+ break;
+ }
+ }
+
+ switch (Inst.getOpcode()) {
+ case ARM::t2PLDs:
+ break;
+ case ARM::t2PLIs:
+ if (!hasV7Ops)
+ return MCDisassembler::Fail;
+ break;
+ case ARM::t2PLDWs:
+ if (!hasV7Ops || !hasMP)
+ return MCDisassembler::Fail;
+ break;
+ default:
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rt, Address, Decoder)))
+ return MCDisassembler::Fail;
+ }
+
+ unsigned addrmode = fieldFromInstruction(Insn, 4, 2);
+ addrmode |= fieldFromInstruction(Insn, 0, 4) << 2;
+ addrmode |= fieldFromInstruction(Insn, 16, 4) << 6;
+ if (!Check(S, DecodeT2AddrModeSOReg(Inst, addrmode, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ return S;
+}
+
+static DecodeStatus DecodeT2LoadImm8(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void* Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+ unsigned Rt = fieldFromInstruction(Insn, 12, 4);
+ unsigned U = fieldFromInstruction(Insn, 9, 1);
+ unsigned imm = fieldFromInstruction(Insn, 0, 8);
+ imm |= (U << 8);
+ imm |= (Rn << 9);
+ unsigned add = fieldFromInstruction(Insn, 9, 1);
+
+ const FeatureBitset &featureBits =
+ ((const MCDisassembler*)Decoder)->getSubtargetInfo().getFeatureBits();
+
+ bool hasMP = featureBits[ARM::FeatureMP];
+ bool hasV7Ops = featureBits[ARM::HasV7Ops];
+
+ if (Rn == 15) {
+ switch (Inst.getOpcode()) {
+ case ARM::t2LDRi8:
+ Inst.setOpcode(ARM::t2LDRpci);
+ break;
+ case ARM::t2LDRBi8:
+ Inst.setOpcode(ARM::t2LDRBpci);
+ break;
+ case ARM::t2LDRSBi8:
+ Inst.setOpcode(ARM::t2LDRSBpci);
+ break;
+ case ARM::t2LDRHi8:
+ Inst.setOpcode(ARM::t2LDRHpci);
+ break;
+ case ARM::t2LDRSHi8:
+ Inst.setOpcode(ARM::t2LDRSHpci);
+ break;
+ case ARM::t2PLDi8:
+ Inst.setOpcode(ARM::t2PLDpci);
+ break;
+ case ARM::t2PLIi8:
+ Inst.setOpcode(ARM::t2PLIpci);
+ break;
+ default:
+ return MCDisassembler::Fail;
+ }
+ return DecodeT2LoadLabel(Inst, Insn, Address, Decoder);
+ }
+
+ if (Rt == 15) {
+ switch (Inst.getOpcode()) {
+ case ARM::t2LDRSHi8:
+ return MCDisassembler::Fail;
+ case ARM::t2LDRHi8:
+ if (!add)
+ Inst.setOpcode(ARM::t2PLDWi8);
+ break;
+ case ARM::t2LDRSBi8:
+ Inst.setOpcode(ARM::t2PLIi8);
+ break;
+ default:
+ break;
+ }
+ }
+
+ switch (Inst.getOpcode()) {
+ case ARM::t2PLDi8:
+ break;
+ case ARM::t2PLIi8:
+ if (!hasV7Ops)
+ return MCDisassembler::Fail;
+ break;
+ case ARM::t2PLDWi8:
+ if (!hasV7Ops || !hasMP)
+ return MCDisassembler::Fail;
+ break;
+ default:
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rt, Address, Decoder)))
+ return MCDisassembler::Fail;
+ }
+
+ if (!Check(S, DecodeT2AddrModeImm8(Inst, imm, Address, Decoder)))
+ return MCDisassembler::Fail;
+ return S;
+}
+
+static DecodeStatus DecodeT2LoadImm12(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void* Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+ unsigned Rt = fieldFromInstruction(Insn, 12, 4);
+ unsigned imm = fieldFromInstruction(Insn, 0, 12);
+ imm |= (Rn << 13);
+
+ const FeatureBitset &featureBits =
+ ((const MCDisassembler*)Decoder)->getSubtargetInfo().getFeatureBits();
+
+ bool hasMP = featureBits[ARM::FeatureMP];
+ bool hasV7Ops = featureBits[ARM::HasV7Ops];
+
+ if (Rn == 15) {
+ switch (Inst.getOpcode()) {
+ case ARM::t2LDRi12:
+ Inst.setOpcode(ARM::t2LDRpci);
+ break;
+ case ARM::t2LDRHi12:
+ Inst.setOpcode(ARM::t2LDRHpci);
+ break;
+ case ARM::t2LDRSHi12:
+ Inst.setOpcode(ARM::t2LDRSHpci);
+ break;
+ case ARM::t2LDRBi12:
+ Inst.setOpcode(ARM::t2LDRBpci);
+ break;
+ case ARM::t2LDRSBi12:
+ Inst.setOpcode(ARM::t2LDRSBpci);
+ break;
+ case ARM::t2PLDi12:
+ Inst.setOpcode(ARM::t2PLDpci);
+ break;
+ case ARM::t2PLIi12:
+ Inst.setOpcode(ARM::t2PLIpci);
+ break;
+ default:
+ return MCDisassembler::Fail;
+ }
+ return DecodeT2LoadLabel(Inst, Insn, Address, Decoder);
+ }
+
+ if (Rt == 15) {
+ switch (Inst.getOpcode()) {
+ case ARM::t2LDRSHi12:
+ return MCDisassembler::Fail;
+ case ARM::t2LDRHi12:
+ Inst.setOpcode(ARM::t2PLDWi12);
+ break;
+ case ARM::t2LDRSBi12:
+ Inst.setOpcode(ARM::t2PLIi12);
+ break;
+ default:
+ break;
+ }
+ }
+
+ switch (Inst.getOpcode()) {
+ case ARM::t2PLDi12:
+ break;
+ case ARM::t2PLIi12:
+ if (!hasV7Ops)
+ return MCDisassembler::Fail;
+ break;
+ case ARM::t2PLDWi12:
+ if (!hasV7Ops || !hasMP)
+ return MCDisassembler::Fail;
+ break;
+ default:
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rt, Address, Decoder)))
+ return MCDisassembler::Fail;
+ }
+
+ if (!Check(S, DecodeT2AddrModeImm12(Inst, imm, Address, Decoder)))
+ return MCDisassembler::Fail;
+ return S;
+}
+
+static DecodeStatus DecodeT2LoadT(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void* Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+ unsigned Rt = fieldFromInstruction(Insn, 12, 4);
+ unsigned imm = fieldFromInstruction(Insn, 0, 8);
+ imm |= (Rn << 9);
+
+ if (Rn == 15) {
+ switch (Inst.getOpcode()) {
+ case ARM::t2LDRT:
+ Inst.setOpcode(ARM::t2LDRpci);
+ break;
+ case ARM::t2LDRBT:
+ Inst.setOpcode(ARM::t2LDRBpci);
+ break;
+ case ARM::t2LDRHT:
+ Inst.setOpcode(ARM::t2LDRHpci);
+ break;
+ case ARM::t2LDRSBT:
+ Inst.setOpcode(ARM::t2LDRSBpci);
+ break;
+ case ARM::t2LDRSHT:
+ Inst.setOpcode(ARM::t2LDRSHpci);
+ break;
+ default:
+ return MCDisassembler::Fail;
+ }
+ return DecodeT2LoadLabel(Inst, Insn, Address, Decoder);
+ }
+
+ if (!Check(S, DecoderGPRRegisterClass(Inst, Rt, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeT2AddrModeImm8(Inst, imm, Address, Decoder)))
+ return MCDisassembler::Fail;
+ return S;
+}
+
+static DecodeStatus DecodeT2LoadLabel(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void* Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rt = fieldFromInstruction(Insn, 12, 4);
+ unsigned U = fieldFromInstruction(Insn, 23, 1);
+ int imm = fieldFromInstruction(Insn, 0, 12);
+
+ const FeatureBitset &featureBits =
+ ((const MCDisassembler*)Decoder)->getSubtargetInfo().getFeatureBits();
+
+ bool hasV7Ops = featureBits[ARM::HasV7Ops];
+
+ if (Rt == 15) {
+ switch (Inst.getOpcode()) {
+ case ARM::t2LDRBpci:
+ case ARM::t2LDRHpci:
+ Inst.setOpcode(ARM::t2PLDpci);
+ break;
+ case ARM::t2LDRSBpci:
+ Inst.setOpcode(ARM::t2PLIpci);
+ break;
+ case ARM::t2LDRSHpci:
+ return MCDisassembler::Fail;
+ default:
+ break;
+ }
+ }
+
+ switch(Inst.getOpcode()) {
+ case ARM::t2PLDpci:
+ break;
+ case ARM::t2PLIpci:
+ if (!hasV7Ops)
+ return MCDisassembler::Fail;
+ break;
+ default:
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rt, Address, Decoder)))
+ return MCDisassembler::Fail;
+ }
+
+ if (!U) {
+ // Special case for #-0.
+ if (imm == 0)
+ imm = INT32_MIN;
+ else
+ imm = -imm;
+ }
+ Inst.addOperand(MCOperand::createImm(imm));
+
+ return S;
+}
+
+static DecodeStatus DecodeT2Imm8S4(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ if (Val == 0)
+ Inst.addOperand(MCOperand::createImm(INT32_MIN));
+ else {
+ int imm = Val & 0xFF;
+
+ if (!(Val & 0x100)) imm *= -1;
+ Inst.addOperand(MCOperand::createImm(imm * 4));
+ }
+
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeT2AddrModeImm8s4(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rn = fieldFromInstruction(Val, 9, 4);
+ unsigned imm = fieldFromInstruction(Val, 0, 9);
+
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeT2Imm8S4(Inst, imm, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ return S;
+}
+
+static DecodeStatus DecodeT2AddrModeImm0_1020s4(MCInst &Inst,unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rn = fieldFromInstruction(Val, 8, 4);
+ unsigned imm = fieldFromInstruction(Val, 0, 8);
+
+ if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ Inst.addOperand(MCOperand::createImm(imm));
+
+ return S;
+}
+
+static DecodeStatus DecodeT2Imm8(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ int imm = Val & 0xFF;
+ if (Val == 0)
+ imm = INT32_MIN;
+ else if (!(Val & 0x100))
+ imm *= -1;
+ Inst.addOperand(MCOperand::createImm(imm));
+
+ return MCDisassembler::Success;
+}
+
+
+static DecodeStatus DecodeT2AddrModeImm8(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rn = fieldFromInstruction(Val, 9, 4);
+ unsigned imm = fieldFromInstruction(Val, 0, 9);
+
+ // Thumb stores cannot use PC as dest register.
+ switch (Inst.getOpcode()) {
+ case ARM::t2STRT:
+ case ARM::t2STRBT:
+ case ARM::t2STRHT:
+ case ARM::t2STRi8:
+ case ARM::t2STRHi8:
+ case ARM::t2STRBi8:
+ if (Rn == 15)
+ return MCDisassembler::Fail;
+ break;
+ default:
+ break;
+ }
+
+ // Some instructions always use an additive offset.
+ switch (Inst.getOpcode()) {
+ case ARM::t2LDRT:
+ case ARM::t2LDRBT:
+ case ARM::t2LDRHT:
+ case ARM::t2LDRSBT:
+ case ARM::t2LDRSHT:
+ case ARM::t2STRT:
+ case ARM::t2STRBT:
+ case ARM::t2STRHT:
+ imm |= 0x100;
+ break;
+ default:
+ break;
+ }
+
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeT2Imm8(Inst, imm, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ return S;
+}
+
+static DecodeStatus DecodeT2LdStPre(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rt = fieldFromInstruction(Insn, 12, 4);
+ unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+ unsigned addr = fieldFromInstruction(Insn, 0, 8);
+ addr |= fieldFromInstruction(Insn, 9, 1) << 8;
+ addr |= Rn << 9;
+ unsigned load = fieldFromInstruction(Insn, 20, 1);
+
+ if (Rn == 15) {
+ switch (Inst.getOpcode()) {
+ case ARM::t2LDR_PRE:
+ case ARM::t2LDR_POST:
+ Inst.setOpcode(ARM::t2LDRpci);
+ break;
+ case ARM::t2LDRB_PRE:
+ case ARM::t2LDRB_POST:
+ Inst.setOpcode(ARM::t2LDRBpci);
+ break;
+ case ARM::t2LDRH_PRE:
+ case ARM::t2LDRH_POST:
+ Inst.setOpcode(ARM::t2LDRHpci);
+ break;
+ case ARM::t2LDRSB_PRE:
+ case ARM::t2LDRSB_POST:
+ if (Rt == 15)
+ Inst.setOpcode(ARM::t2PLIpci);
+ else
+ Inst.setOpcode(ARM::t2LDRSBpci);
+ break;
+ case ARM::t2LDRSH_PRE:
+ case ARM::t2LDRSH_POST:
+ Inst.setOpcode(ARM::t2LDRSHpci);
+ break;
+ default:
+ return MCDisassembler::Fail;
+ }
+ return DecodeT2LoadLabel(Inst, Insn, Address, Decoder);
+ }
+
+ if (!load) {
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ }
+
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rt, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ if (load) {
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ }
+
+ if (!Check(S, DecodeT2AddrModeImm8(Inst, addr, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ return S;
+}
+
+static DecodeStatus DecodeT2AddrModeImm12(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rn = fieldFromInstruction(Val, 13, 4);
+ unsigned imm = fieldFromInstruction(Val, 0, 12);
+
+ // Thumb stores cannot use PC as dest register.
+ switch (Inst.getOpcode()) {
+ case ARM::t2STRi12:
+ case ARM::t2STRBi12:
+ case ARM::t2STRHi12:
+ if (Rn == 15)
+ return MCDisassembler::Fail;
+ default:
+ break;
+ }
+
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::createImm(imm));
+
+ return S;
+}
+
+
+static DecodeStatus DecodeThumbAddSPImm(MCInst &Inst, uint16_t Insn,
+ uint64_t Address, const void *Decoder) {
+ unsigned imm = fieldFromInstruction(Insn, 0, 7);
+
+ Inst.addOperand(MCOperand::createReg(ARM::SP));
+ Inst.addOperand(MCOperand::createReg(ARM::SP));
+ Inst.addOperand(MCOperand::createImm(imm));
+
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeThumbAddSPReg(MCInst &Inst, uint16_t Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ if (Inst.getOpcode() == ARM::tADDrSP) {
+ unsigned Rdm = fieldFromInstruction(Insn, 0, 3);
+ Rdm |= fieldFromInstruction(Insn, 7, 1) << 3;
+
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rdm, Address, Decoder)))
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::createReg(ARM::SP));
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rdm, Address, Decoder)))
+ return MCDisassembler::Fail;
+ } else if (Inst.getOpcode() == ARM::tADDspr) {
+ unsigned Rm = fieldFromInstruction(Insn, 3, 4);
+
+ Inst.addOperand(MCOperand::createReg(ARM::SP));
+ Inst.addOperand(MCOperand::createReg(ARM::SP));
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder)))
+ return MCDisassembler::Fail;
+ }
+
+ return S;
+}
+
+static DecodeStatus DecodeThumbCPS(MCInst &Inst, uint16_t Insn,
+ uint64_t Address, const void *Decoder) {
+ unsigned imod = fieldFromInstruction(Insn, 4, 1) | 0x2;
+ unsigned flags = fieldFromInstruction(Insn, 0, 3);
+
+ Inst.addOperand(MCOperand::createImm(imod));
+ Inst.addOperand(MCOperand::createImm(flags));
+
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodePostIdxReg(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+ unsigned Rm = fieldFromInstruction(Insn, 0, 4);
+ unsigned add = fieldFromInstruction(Insn, 4, 1);
+
+ if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rm, Address, Decoder)))
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::createImm(add));
+
+ return S;
+}
+
+static DecodeStatus DecodeThumbBLXOffset(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ // Val is passed in as S:J1:J2:imm10H:imm10L:'0'
+ // Note only one trailing zero not two. Also the J1 and J2 values are from
+ // the encoded instruction. So here change to I1 and I2 values via:
+ // I1 = NOT(J1 EOR S);
+ // I2 = NOT(J2 EOR S);
+ // and build the imm32 with two trailing zeros as documented:
+ // imm32 = SignExtend(S:I1:I2:imm10H:imm10L:'00', 32);
+ unsigned S = (Val >> 23) & 1;
+ unsigned J1 = (Val >> 22) & 1;
+ unsigned J2 = (Val >> 21) & 1;
+ unsigned I1 = !(J1 ^ S);
+ unsigned I2 = !(J2 ^ S);
+ unsigned tmp = (Val & ~0x600000) | (I1 << 22) | (I2 << 21);
+ int imm32 = SignExtend32<25>(tmp << 1);
+
+ if (!tryAddingSymbolicOperand(Address,
+ (Address & ~2u) + imm32 + 4,
+ true, 4, Inst, Decoder))
+ Inst.addOperand(MCOperand::createImm(imm32));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeCoprocessor(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ if (Val == 0xA || Val == 0xB)
+ return MCDisassembler::Fail;
+
+ const FeatureBitset &featureBits =
+ ((const MCDisassembler*)Decoder)->getSubtargetInfo().getFeatureBits();
+
+ if (featureBits[ARM::HasV8Ops] && !(Val == 14 || Val == 15))
+ return MCDisassembler::Fail;
+
+ Inst.addOperand(MCOperand::createImm(Val));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus
+DecodeThumbTableBranch(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+ unsigned Rm = fieldFromInstruction(Insn, 0, 4);
+
+ if (Rn == ARM::SP) S = MCDisassembler::SoftFail;
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecoderGPRRegisterClass(Inst, Rm, Address, Decoder)))
+ return MCDisassembler::Fail;
+ return S;
+}
+
+static DecodeStatus
+DecodeThumb2BCCInstruction(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned pred = fieldFromInstruction(Insn, 22, 4);
+ if (pred == 0xE || pred == 0xF) {
+ unsigned opc = fieldFromInstruction(Insn, 4, 28);
+ switch (opc) {
+ default:
+ return MCDisassembler::Fail;
+ case 0xf3bf8f4:
+ Inst.setOpcode(ARM::t2DSB);
+ break;
+ case 0xf3bf8f5:
+ Inst.setOpcode(ARM::t2DMB);
+ break;
+ case 0xf3bf8f6:
+ Inst.setOpcode(ARM::t2ISB);
+ break;
+ }
+
+ unsigned imm = fieldFromInstruction(Insn, 0, 4);
+ return DecodeMemBarrierOption(Inst, imm, Address, Decoder);
+ }
+
+ unsigned brtarget = fieldFromInstruction(Insn, 0, 11) << 1;
+ brtarget |= fieldFromInstruction(Insn, 11, 1) << 19;
+ brtarget |= fieldFromInstruction(Insn, 13, 1) << 18;
+ brtarget |= fieldFromInstruction(Insn, 16, 6) << 12;
+ brtarget |= fieldFromInstruction(Insn, 26, 1) << 20;
+
+ if (!Check(S, DecodeT2BROperand(Inst, brtarget, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodePredicateOperand(Inst, pred, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ return S;
+}
+
+// Decode a shifted immediate operand. These basically consist
+// of an 8-bit value, and a 4-bit directive that specifies either
+// a splat operation or a rotation.
+static DecodeStatus DecodeT2SOImm(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ unsigned ctrl = fieldFromInstruction(Val, 10, 2);
+ if (ctrl == 0) {
+ unsigned byte = fieldFromInstruction(Val, 8, 2);
+ unsigned imm = fieldFromInstruction(Val, 0, 8);
+ switch (byte) {
+ case 0:
+ Inst.addOperand(MCOperand::createImm(imm));
+ break;
+ case 1:
+ Inst.addOperand(MCOperand::createImm((imm << 16) | imm));
+ break;
+ case 2:
+ Inst.addOperand(MCOperand::createImm((imm << 24) | (imm << 8)));
+ break;
+ case 3:
+ Inst.addOperand(MCOperand::createImm((imm << 24) | (imm << 16) |
+ (imm << 8) | imm));
+ break;
+ }
+ } else {
+ unsigned unrot = fieldFromInstruction(Val, 0, 7) | 0x80;
+ unsigned rot = fieldFromInstruction(Val, 7, 5);
+ unsigned imm = (unrot >> rot) | (unrot << ((32-rot)&31));
+ Inst.addOperand(MCOperand::createImm(imm));
+ }
+
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus
+DecodeThumbBCCTargetOperand(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder){
+ if (!tryAddingSymbolicOperand(Address, Address + SignExtend32<9>(Val<<1) + 4,
+ true, 2, Inst, Decoder))
+ Inst.addOperand(MCOperand::createImm(SignExtend32<9>(Val << 1)));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeThumbBLTargetOperand(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder){
+ // Val is passed in as S:J1:J2:imm10:imm11
+ // Note no trailing zero after imm11. Also the J1 and J2 values are from
+ // the encoded instruction. So here change to I1 and I2 values via:
+ // I1 = NOT(J1 EOR S);
+ // I2 = NOT(J2 EOR S);
+ // and build the imm32 with one trailing zero as documented:
+ // imm32 = SignExtend(S:I1:I2:imm10:imm11:'0', 32);
+ unsigned S = (Val >> 23) & 1;
+ unsigned J1 = (Val >> 22) & 1;
+ unsigned J2 = (Val >> 21) & 1;
+ unsigned I1 = !(J1 ^ S);
+ unsigned I2 = !(J2 ^ S);
+ unsigned tmp = (Val & ~0x600000) | (I1 << 22) | (I2 << 21);
+ int imm32 = SignExtend32<25>(tmp << 1);
+
+ if (!tryAddingSymbolicOperand(Address, Address + imm32 + 4,
+ true, 4, Inst, Decoder))
+ Inst.addOperand(MCOperand::createImm(imm32));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeMemBarrierOption(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ if (Val & ~0xf)
+ return MCDisassembler::Fail;
+
+ Inst.addOperand(MCOperand::createImm(Val));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeInstSyncBarrierOption(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ if (Val & ~0xf)
+ return MCDisassembler::Fail;
+
+ Inst.addOperand(MCOperand::createImm(Val));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeMSRMask(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+ const FeatureBitset &FeatureBits =
+ ((const MCDisassembler*)Decoder)->getSubtargetInfo().getFeatureBits();
+
+ if (FeatureBits[ARM::FeatureMClass]) {
+ unsigned ValLow = Val & 0xff;
+
+ // Validate the SYSm value first.
+ switch (ValLow) {
+ case 0: // apsr
+ case 1: // iapsr
+ case 2: // eapsr
+ case 3: // xpsr
+ case 5: // ipsr
+ case 6: // epsr
+ case 7: // iepsr
+ case 8: // msp
+ case 9: // psp
+ case 16: // primask
+ case 20: // control
+ break;
+ case 17: // basepri
+ case 18: // basepri_max
+ case 19: // faultmask
+ if (!(FeatureBits[ARM::HasV7Ops]))
+ // Values basepri, basepri_max and faultmask are only valid for v7m.
+ return MCDisassembler::Fail;
+ break;
+ default:
+ return MCDisassembler::Fail;
+ }
+
+ if (Inst.getOpcode() == ARM::t2MSR_M) {
+ unsigned Mask = fieldFromInstruction(Val, 10, 2);
+ if (!(FeatureBits[ARM::HasV7Ops])) {
+ // The ARMv6-M MSR bits {11-10} can be only 0b10, other values are
+ // unpredictable.
+ if (Mask != 2)
+ S = MCDisassembler::SoftFail;
+ }
+ else {
+ // The ARMv7-M architecture stores an additional 2-bit mask value in
+ // MSR bits {11-10}. The mask is used only with apsr, iapsr, eapsr and
+ // xpsr, it has to be 0b10 in other cases. Bit mask{1} indicates if
+ // the NZCVQ bits should be moved by the instruction. Bit mask{0}
+ // indicates the move for the GE{3:0} bits, the mask{0} bit can be set
+ // only if the processor includes the DSP extension.
+ if (Mask == 0 || (Mask != 2 && ValLow > 3) ||
+ (!(FeatureBits[ARM::FeatureDSP]) && (Mask & 1)))
+ S = MCDisassembler::SoftFail;
+ }
+ }
+ } else {
+ // A/R class
+ if (Val == 0)
+ return MCDisassembler::Fail;
+ }
+ Inst.addOperand(MCOperand::createImm(Val));
+ return S;
+}
+
+static DecodeStatus DecodeBankedReg(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+
+ unsigned R = fieldFromInstruction(Val, 5, 1);
+ unsigned SysM = fieldFromInstruction(Val, 0, 5);
+
+ // The table of encodings for these banked registers comes from B9.2.3 of the
+ // ARM ARM. There are patterns, but nothing regular enough to make this logic
+ // neater. So by fiat, these values are UNPREDICTABLE:
+ if (!R) {
+ if (SysM == 0x7 || SysM == 0xf || SysM == 0x18 || SysM == 0x19 ||
+ SysM == 0x1a || SysM == 0x1b)
+ return MCDisassembler::SoftFail;
+ } else {
+ if (SysM != 0xe && SysM != 0x10 && SysM != 0x12 && SysM != 0x14 &&
+ SysM != 0x16 && SysM != 0x1c && SysM != 0x1e)
+ return MCDisassembler::SoftFail;
+ }
+
+ Inst.addOperand(MCOperand::createImm(Val));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeDoubleRegLoad(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rt = fieldFromInstruction(Insn, 12, 4);
+ unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+ unsigned pred = fieldFromInstruction(Insn, 28, 4);
+
+ if (Rn == 0xF)
+ S = MCDisassembler::SoftFail;
+
+ if (!Check(S, DecodeGPRPairRegisterClass(Inst, Rt, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodePredicateOperand(Inst, pred, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ return S;
+}
+
+static DecodeStatus DecodeDoubleRegStore(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder){
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rd = fieldFromInstruction(Insn, 12, 4);
+ unsigned Rt = fieldFromInstruction(Insn, 0, 4);
+ unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+ unsigned pred = fieldFromInstruction(Insn, 28, 4);
+
+ if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ if (Rn == 0xF || Rd == Rn || Rd == Rt || Rd == Rt+1)
+ S = MCDisassembler::SoftFail;
+
+ if (!Check(S, DecodeGPRPairRegisterClass(Inst, Rt, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodePredicateOperand(Inst, pred, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ return S;
+}
+
+static DecodeStatus DecodeLDRPreImm(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+ unsigned Rt = fieldFromInstruction(Insn, 12, 4);
+ unsigned imm = fieldFromInstruction(Insn, 0, 12);
+ imm |= fieldFromInstruction(Insn, 16, 4) << 13;
+ imm |= fieldFromInstruction(Insn, 23, 1) << 12;
+ unsigned pred = fieldFromInstruction(Insn, 28, 4);
+
+ if (Rn == 0xF || Rn == Rt) S = MCDisassembler::SoftFail;
+
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rt, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeAddrModeImm12Operand(Inst, imm, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodePredicateOperand(Inst, pred, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ return S;
+}
+
+static DecodeStatus DecodeLDRPreReg(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+ unsigned Rt = fieldFromInstruction(Insn, 12, 4);
+ unsigned imm = fieldFromInstruction(Insn, 0, 12);
+ imm |= fieldFromInstruction(Insn, 16, 4) << 13;
+ imm |= fieldFromInstruction(Insn, 23, 1) << 12;
+ unsigned pred = fieldFromInstruction(Insn, 28, 4);
+ unsigned Rm = fieldFromInstruction(Insn, 0, 4);
+
+ if (Rn == 0xF || Rn == Rt) S = MCDisassembler::SoftFail;
+ if (Rm == 0xF) S = MCDisassembler::SoftFail;
+
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rt, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeSORegMemOperand(Inst, imm, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodePredicateOperand(Inst, pred, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ return S;
+}
+
+
+static DecodeStatus DecodeSTRPreImm(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+ unsigned Rt = fieldFromInstruction(Insn, 12, 4);
+ unsigned imm = fieldFromInstruction(Insn, 0, 12);
+ imm |= fieldFromInstruction(Insn, 16, 4) << 13;
+ imm |= fieldFromInstruction(Insn, 23, 1) << 12;
+ unsigned pred = fieldFromInstruction(Insn, 28, 4);
+
+ if (Rn == 0xF || Rn == Rt) S = MCDisassembler::SoftFail;
+
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rt, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeAddrModeImm12Operand(Inst, imm, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodePredicateOperand(Inst, pred, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ return S;
+}
+
+static DecodeStatus DecodeSTRPreReg(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+ unsigned Rt = fieldFromInstruction(Insn, 12, 4);
+ unsigned imm = fieldFromInstruction(Insn, 0, 12);
+ imm |= fieldFromInstruction(Insn, 16, 4) << 13;
+ imm |= fieldFromInstruction(Insn, 23, 1) << 12;
+ unsigned pred = fieldFromInstruction(Insn, 28, 4);
+
+ if (Rn == 0xF || Rn == Rt) S = MCDisassembler::SoftFail;
+
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rt, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeSORegMemOperand(Inst, imm, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodePredicateOperand(Inst, pred, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ return S;
+}
+
+static DecodeStatus DecodeVLD1LN(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+ unsigned Rm = fieldFromInstruction(Insn, 0, 4);
+ unsigned Rd = fieldFromInstruction(Insn, 12, 4);
+ Rd |= fieldFromInstruction(Insn, 22, 1) << 4;
+ unsigned size = fieldFromInstruction(Insn, 10, 2);
+
+ unsigned align = 0;
+ unsigned index = 0;
+ switch (size) {
+ default:
+ return MCDisassembler::Fail;
+ case 0:
+ if (fieldFromInstruction(Insn, 4, 1))
+ return MCDisassembler::Fail; // UNDEFINED
+ index = fieldFromInstruction(Insn, 5, 3);
+ break;
+ case 1:
+ if (fieldFromInstruction(Insn, 5, 1))
+ return MCDisassembler::Fail; // UNDEFINED
+ index = fieldFromInstruction(Insn, 6, 2);
+ if (fieldFromInstruction(Insn, 4, 1))
+ align = 2;
+ break;
+ case 2:
+ if (fieldFromInstruction(Insn, 6, 1))
+ return MCDisassembler::Fail; // UNDEFINED
+ index = fieldFromInstruction(Insn, 7, 1);
+
+ switch (fieldFromInstruction(Insn, 4, 2)) {
+ case 0 :
+ align = 0; break;
+ case 3:
+ align = 4; break;
+ default:
+ return MCDisassembler::Fail;
+ }
+ break;
+ }
+
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (Rm != 0xF) { // Writeback
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ }
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::createImm(align));
+ if (Rm != 0xF) {
+ if (Rm != 0xD) {
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder)))
+ return MCDisassembler::Fail;
+ } else
+ Inst.addOperand(MCOperand::createReg(0));
+ }
+
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::createImm(index));
+
+ return S;
+}
+
+static DecodeStatus DecodeVST1LN(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+ unsigned Rm = fieldFromInstruction(Insn, 0, 4);
+ unsigned Rd = fieldFromInstruction(Insn, 12, 4);
+ Rd |= fieldFromInstruction(Insn, 22, 1) << 4;
+ unsigned size = fieldFromInstruction(Insn, 10, 2);
+
+ unsigned align = 0;
+ unsigned index = 0;
+ switch (size) {
+ default:
+ return MCDisassembler::Fail;
+ case 0:
+ if (fieldFromInstruction(Insn, 4, 1))
+ return MCDisassembler::Fail; // UNDEFINED
+ index = fieldFromInstruction(Insn, 5, 3);
+ break;
+ case 1:
+ if (fieldFromInstruction(Insn, 5, 1))
+ return MCDisassembler::Fail; // UNDEFINED
+ index = fieldFromInstruction(Insn, 6, 2);
+ if (fieldFromInstruction(Insn, 4, 1))
+ align = 2;
+ break;
+ case 2:
+ if (fieldFromInstruction(Insn, 6, 1))
+ return MCDisassembler::Fail; // UNDEFINED
+ index = fieldFromInstruction(Insn, 7, 1);
+
+ switch (fieldFromInstruction(Insn, 4, 2)) {
+ case 0:
+ align = 0; break;
+ case 3:
+ align = 4; break;
+ default:
+ return MCDisassembler::Fail;
+ }
+ break;
+ }
+
+ if (Rm != 0xF) { // Writeback
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ }
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::createImm(align));
+ if (Rm != 0xF) {
+ if (Rm != 0xD) {
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder)))
+ return MCDisassembler::Fail;
+ } else
+ Inst.addOperand(MCOperand::createReg(0));
+ }
+
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::createImm(index));
+
+ return S;
+}
+
+
+static DecodeStatus DecodeVLD2LN(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+ unsigned Rm = fieldFromInstruction(Insn, 0, 4);
+ unsigned Rd = fieldFromInstruction(Insn, 12, 4);
+ Rd |= fieldFromInstruction(Insn, 22, 1) << 4;
+ unsigned size = fieldFromInstruction(Insn, 10, 2);
+
+ unsigned align = 0;
+ unsigned index = 0;
+ unsigned inc = 1;
+ switch (size) {
+ default:
+ return MCDisassembler::Fail;
+ case 0:
+ index = fieldFromInstruction(Insn, 5, 3);
+ if (fieldFromInstruction(Insn, 4, 1))
+ align = 2;
+ break;
+ case 1:
+ index = fieldFromInstruction(Insn, 6, 2);
+ if (fieldFromInstruction(Insn, 4, 1))
+ align = 4;
+ if (fieldFromInstruction(Insn, 5, 1))
+ inc = 2;
+ break;
+ case 2:
+ if (fieldFromInstruction(Insn, 5, 1))
+ return MCDisassembler::Fail; // UNDEFINED
+ index = fieldFromInstruction(Insn, 7, 1);
+ if (fieldFromInstruction(Insn, 4, 1) != 0)
+ align = 8;
+ if (fieldFromInstruction(Insn, 6, 1))
+ inc = 2;
+ break;
+ }
+
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd+inc, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (Rm != 0xF) { // Writeback
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ }
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::createImm(align));
+ if (Rm != 0xF) {
+ if (Rm != 0xD) {
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder)))
+ return MCDisassembler::Fail;
+ } else
+ Inst.addOperand(MCOperand::createReg(0));
+ }
+
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd+inc, Address, Decoder)))
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::createImm(index));
+
+ return S;
+}
+
+static DecodeStatus DecodeVST2LN(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+ unsigned Rm = fieldFromInstruction(Insn, 0, 4);
+ unsigned Rd = fieldFromInstruction(Insn, 12, 4);
+ Rd |= fieldFromInstruction(Insn, 22, 1) << 4;
+ unsigned size = fieldFromInstruction(Insn, 10, 2);
+
+ unsigned align = 0;
+ unsigned index = 0;
+ unsigned inc = 1;
+ switch (size) {
+ default:
+ return MCDisassembler::Fail;
+ case 0:
+ index = fieldFromInstruction(Insn, 5, 3);
+ if (fieldFromInstruction(Insn, 4, 1))
+ align = 2;
+ break;
+ case 1:
+ index = fieldFromInstruction(Insn, 6, 2);
+ if (fieldFromInstruction(Insn, 4, 1))
+ align = 4;
+ if (fieldFromInstruction(Insn, 5, 1))
+ inc = 2;
+ break;
+ case 2:
+ if (fieldFromInstruction(Insn, 5, 1))
+ return MCDisassembler::Fail; // UNDEFINED
+ index = fieldFromInstruction(Insn, 7, 1);
+ if (fieldFromInstruction(Insn, 4, 1) != 0)
+ align = 8;
+ if (fieldFromInstruction(Insn, 6, 1))
+ inc = 2;
+ break;
+ }
+
+ if (Rm != 0xF) { // Writeback
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ }
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::createImm(align));
+ if (Rm != 0xF) {
+ if (Rm != 0xD) {
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder)))
+ return MCDisassembler::Fail;
+ } else
+ Inst.addOperand(MCOperand::createReg(0));
+ }
+
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd+inc, Address, Decoder)))
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::createImm(index));
+
+ return S;
+}
+
+
+static DecodeStatus DecodeVLD3LN(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+ unsigned Rm = fieldFromInstruction(Insn, 0, 4);
+ unsigned Rd = fieldFromInstruction(Insn, 12, 4);
+ Rd |= fieldFromInstruction(Insn, 22, 1) << 4;
+ unsigned size = fieldFromInstruction(Insn, 10, 2);
+
+ unsigned align = 0;
+ unsigned index = 0;
+ unsigned inc = 1;
+ switch (size) {
+ default:
+ return MCDisassembler::Fail;
+ case 0:
+ if (fieldFromInstruction(Insn, 4, 1))
+ return MCDisassembler::Fail; // UNDEFINED
+ index = fieldFromInstruction(Insn, 5, 3);
+ break;
+ case 1:
+ if (fieldFromInstruction(Insn, 4, 1))
+ return MCDisassembler::Fail; // UNDEFINED
+ index = fieldFromInstruction(Insn, 6, 2);
+ if (fieldFromInstruction(Insn, 5, 1))
+ inc = 2;
+ break;
+ case 2:
+ if (fieldFromInstruction(Insn, 4, 2))
+ return MCDisassembler::Fail; // UNDEFINED
+ index = fieldFromInstruction(Insn, 7, 1);
+ if (fieldFromInstruction(Insn, 6, 1))
+ inc = 2;
+ break;
+ }
+
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd+inc, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd+2*inc, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ if (Rm != 0xF) { // Writeback
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ }
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::createImm(align));
+ if (Rm != 0xF) {
+ if (Rm != 0xD) {
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder)))
+ return MCDisassembler::Fail;
+ } else
+ Inst.addOperand(MCOperand::createReg(0));
+ }
+
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd+inc, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd+2*inc, Address, Decoder)))
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::createImm(index));
+
+ return S;
+}
+
+static DecodeStatus DecodeVST3LN(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+ unsigned Rm = fieldFromInstruction(Insn, 0, 4);
+ unsigned Rd = fieldFromInstruction(Insn, 12, 4);
+ Rd |= fieldFromInstruction(Insn, 22, 1) << 4;
+ unsigned size = fieldFromInstruction(Insn, 10, 2);
+
+ unsigned align = 0;
+ unsigned index = 0;
+ unsigned inc = 1;
+ switch (size) {
+ default:
+ return MCDisassembler::Fail;
+ case 0:
+ if (fieldFromInstruction(Insn, 4, 1))
+ return MCDisassembler::Fail; // UNDEFINED
+ index = fieldFromInstruction(Insn, 5, 3);
+ break;
+ case 1:
+ if (fieldFromInstruction(Insn, 4, 1))
+ return MCDisassembler::Fail; // UNDEFINED
+ index = fieldFromInstruction(Insn, 6, 2);
+ if (fieldFromInstruction(Insn, 5, 1))
+ inc = 2;
+ break;
+ case 2:
+ if (fieldFromInstruction(Insn, 4, 2))
+ return MCDisassembler::Fail; // UNDEFINED
+ index = fieldFromInstruction(Insn, 7, 1);
+ if (fieldFromInstruction(Insn, 6, 1))
+ inc = 2;
+ break;
+ }
+
+ if (Rm != 0xF) { // Writeback
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ }
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::createImm(align));
+ if (Rm != 0xF) {
+ if (Rm != 0xD) {
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder)))
+ return MCDisassembler::Fail;
+ } else
+ Inst.addOperand(MCOperand::createReg(0));
+ }
+
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd+inc, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd+2*inc, Address, Decoder)))
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::createImm(index));
+
+ return S;
+}
+
+
+static DecodeStatus DecodeVLD4LN(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+ unsigned Rm = fieldFromInstruction(Insn, 0, 4);
+ unsigned Rd = fieldFromInstruction(Insn, 12, 4);
+ Rd |= fieldFromInstruction(Insn, 22, 1) << 4;
+ unsigned size = fieldFromInstruction(Insn, 10, 2);
+
+ unsigned align = 0;
+ unsigned index = 0;
+ unsigned inc = 1;
+ switch (size) {
+ default:
+ return MCDisassembler::Fail;
+ case 0:
+ if (fieldFromInstruction(Insn, 4, 1))
+ align = 4;
+ index = fieldFromInstruction(Insn, 5, 3);
+ break;
+ case 1:
+ if (fieldFromInstruction(Insn, 4, 1))
+ align = 8;
+ index = fieldFromInstruction(Insn, 6, 2);
+ if (fieldFromInstruction(Insn, 5, 1))
+ inc = 2;
+ break;
+ case 2:
+ switch (fieldFromInstruction(Insn, 4, 2)) {
+ case 0:
+ align = 0; break;
+ case 3:
+ return MCDisassembler::Fail;
+ default:
+ align = 4 << fieldFromInstruction(Insn, 4, 2); break;
+ }
+
+ index = fieldFromInstruction(Insn, 7, 1);
+ if (fieldFromInstruction(Insn, 6, 1))
+ inc = 2;
+ break;
+ }
+
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd+inc, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd+2*inc, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd+3*inc, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ if (Rm != 0xF) { // Writeback
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ }
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::createImm(align));
+ if (Rm != 0xF) {
+ if (Rm != 0xD) {
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder)))
+ return MCDisassembler::Fail;
+ } else
+ Inst.addOperand(MCOperand::createReg(0));
+ }
+
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd+inc, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd+2*inc, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd+3*inc, Address, Decoder)))
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::createImm(index));
+
+ return S;
+}
+
+static DecodeStatus DecodeVST4LN(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+ unsigned Rm = fieldFromInstruction(Insn, 0, 4);
+ unsigned Rd = fieldFromInstruction(Insn, 12, 4);
+ Rd |= fieldFromInstruction(Insn, 22, 1) << 4;
+ unsigned size = fieldFromInstruction(Insn, 10, 2);
+
+ unsigned align = 0;
+ unsigned index = 0;
+ unsigned inc = 1;
+ switch (size) {
+ default:
+ return MCDisassembler::Fail;
+ case 0:
+ if (fieldFromInstruction(Insn, 4, 1))
+ align = 4;
+ index = fieldFromInstruction(Insn, 5, 3);
+ break;
+ case 1:
+ if (fieldFromInstruction(Insn, 4, 1))
+ align = 8;
+ index = fieldFromInstruction(Insn, 6, 2);
+ if (fieldFromInstruction(Insn, 5, 1))
+ inc = 2;
+ break;
+ case 2:
+ switch (fieldFromInstruction(Insn, 4, 2)) {
+ case 0:
+ align = 0; break;
+ case 3:
+ return MCDisassembler::Fail;
+ default:
+ align = 4 << fieldFromInstruction(Insn, 4, 2); break;
+ }
+
+ index = fieldFromInstruction(Insn, 7, 1);
+ if (fieldFromInstruction(Insn, 6, 1))
+ inc = 2;
+ break;
+ }
+
+ if (Rm != 0xF) { // Writeback
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ }
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::createImm(align));
+ if (Rm != 0xF) {
+ if (Rm != 0xD) {
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder)))
+ return MCDisassembler::Fail;
+ } else
+ Inst.addOperand(MCOperand::createReg(0));
+ }
+
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd+inc, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd+2*inc, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd+3*inc, Address, Decoder)))
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::createImm(index));
+
+ return S;
+}
+
+static DecodeStatus DecodeVMOVSRR(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+ unsigned Rt = fieldFromInstruction(Insn, 12, 4);
+ unsigned Rt2 = fieldFromInstruction(Insn, 16, 4);
+ unsigned Rm = fieldFromInstruction(Insn, 5, 1);
+ unsigned pred = fieldFromInstruction(Insn, 28, 4);
+ Rm |= fieldFromInstruction(Insn, 0, 4) << 1;
+
+ if (Rt == 0xF || Rt2 == 0xF || Rm == 0x1F)
+ S = MCDisassembler::SoftFail;
+
+ if (!Check(S, DecodeSPRRegisterClass(Inst, Rm , Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeSPRRegisterClass(Inst, Rm+1, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rt , Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rt2 , Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodePredicateOperand(Inst, pred, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ return S;
+}
+
+static DecodeStatus DecodeVMOVRRS(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+ unsigned Rt = fieldFromInstruction(Insn, 12, 4);
+ unsigned Rt2 = fieldFromInstruction(Insn, 16, 4);
+ unsigned Rm = fieldFromInstruction(Insn, 5, 1);
+ unsigned pred = fieldFromInstruction(Insn, 28, 4);
+ Rm |= fieldFromInstruction(Insn, 0, 4) << 1;
+
+ if (Rt == 0xF || Rt2 == 0xF || Rm == 0x1F)
+ S = MCDisassembler::SoftFail;
+
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rt , Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rt2 , Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeSPRRegisterClass(Inst, Rm , Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeSPRRegisterClass(Inst, Rm+1, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodePredicateOperand(Inst, pred, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ return S;
+}
+
+static DecodeStatus DecodeIT(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+ unsigned pred = fieldFromInstruction(Insn, 4, 4);
+ unsigned mask = fieldFromInstruction(Insn, 0, 4);
+
+ if (pred == 0xF) {
+ pred = 0xE;
+ S = MCDisassembler::SoftFail;
+ }
+
+ if (mask == 0x0)
+ return MCDisassembler::Fail;
+
+ Inst.addOperand(MCOperand::createImm(pred));
+ Inst.addOperand(MCOperand::createImm(mask));
+ return S;
+}
+
+static DecodeStatus
+DecodeT2LDRDPreInstruction(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rt = fieldFromInstruction(Insn, 12, 4);
+ unsigned Rt2 = fieldFromInstruction(Insn, 8, 4);
+ unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+ unsigned addr = fieldFromInstruction(Insn, 0, 8);
+ unsigned W = fieldFromInstruction(Insn, 21, 1);
+ unsigned U = fieldFromInstruction(Insn, 23, 1);
+ unsigned P = fieldFromInstruction(Insn, 24, 1);
+ bool writeback = (W == 1) | (P == 0);
+
+ addr |= (U << 8) | (Rn << 9);
+
+ if (writeback && (Rn == Rt || Rn == Rt2))
+ Check(S, MCDisassembler::SoftFail);
+ if (Rt == Rt2)
+ Check(S, MCDisassembler::SoftFail);
+
+ // Rt
+ if (!Check(S, DecoderGPRRegisterClass(Inst, Rt, Address, Decoder)))
+ return MCDisassembler::Fail;
+ // Rt2
+ if (!Check(S, DecoderGPRRegisterClass(Inst, Rt2, Address, Decoder)))
+ return MCDisassembler::Fail;
+ // Writeback operand
+ if (!Check(S, DecoderGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ // addr
+ if (!Check(S, DecodeT2AddrModeImm8s4(Inst, addr, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ return S;
+}
+
+static DecodeStatus
+DecodeT2STRDPreInstruction(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rt = fieldFromInstruction(Insn, 12, 4);
+ unsigned Rt2 = fieldFromInstruction(Insn, 8, 4);
+ unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+ unsigned addr = fieldFromInstruction(Insn, 0, 8);
+ unsigned W = fieldFromInstruction(Insn, 21, 1);
+ unsigned U = fieldFromInstruction(Insn, 23, 1);
+ unsigned P = fieldFromInstruction(Insn, 24, 1);
+ bool writeback = (W == 1) | (P == 0);
+
+ addr |= (U << 8) | (Rn << 9);
+
+ if (writeback && (Rn == Rt || Rn == Rt2))
+ Check(S, MCDisassembler::SoftFail);
+
+ // Writeback operand
+ if (!Check(S, DecoderGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ // Rt
+ if (!Check(S, DecoderGPRRegisterClass(Inst, Rt, Address, Decoder)))
+ return MCDisassembler::Fail;
+ // Rt2
+ if (!Check(S, DecoderGPRRegisterClass(Inst, Rt2, Address, Decoder)))
+ return MCDisassembler::Fail;
+ // addr
+ if (!Check(S, DecodeT2AddrModeImm8s4(Inst, addr, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ return S;
+}
+
+static DecodeStatus DecodeT2Adr(MCInst &Inst, uint32_t Insn,
+ uint64_t Address, const void *Decoder) {
+ unsigned sign1 = fieldFromInstruction(Insn, 21, 1);
+ unsigned sign2 = fieldFromInstruction(Insn, 23, 1);
+ if (sign1 != sign2) return MCDisassembler::Fail;
+
+ unsigned Val = fieldFromInstruction(Insn, 0, 8);
+ Val |= fieldFromInstruction(Insn, 12, 3) << 8;
+ Val |= fieldFromInstruction(Insn, 26, 1) << 11;
+ Val |= sign1 << 12;
+ Inst.addOperand(MCOperand::createImm(SignExtend32<13>(Val)));
+
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeT2ShifterImmOperand(MCInst &Inst, uint32_t Val,
+ uint64_t Address,
+ const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ // Shift of "asr #32" is not allowed in Thumb2 mode.
+ if (Val == 0x20) S = MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::createImm(Val));
+ return S;
+}
+
+static DecodeStatus DecodeSwap(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ unsigned Rt = fieldFromInstruction(Insn, 12, 4);
+ unsigned Rt2 = fieldFromInstruction(Insn, 0, 4);
+ unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+ unsigned pred = fieldFromInstruction(Insn, 28, 4);
+
+ if (pred == 0xF)
+ return DecodeCPSInstruction(Inst, Insn, Address, Decoder);
+
+ DecodeStatus S = MCDisassembler::Success;
+
+ if (Rt == Rn || Rn == Rt2)
+ S = MCDisassembler::SoftFail;
+
+ if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rt, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rt2, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodePredicateOperand(Inst, pred, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ return S;
+}
+
+static DecodeStatus DecodeVCVTD(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ const FeatureBitset &featureBits =
+ ((const MCDisassembler *)Decoder)->getSubtargetInfo().getFeatureBits();
+ bool hasFullFP16 = featureBits[ARM::FeatureFullFP16];
+
+ unsigned Vd = (fieldFromInstruction(Insn, 12, 4) << 0);
+ Vd |= (fieldFromInstruction(Insn, 22, 1) << 4);
+ unsigned Vm = (fieldFromInstruction(Insn, 0, 4) << 0);
+ Vm |= (fieldFromInstruction(Insn, 5, 1) << 4);
+ unsigned imm = fieldFromInstruction(Insn, 16, 6);
+ unsigned cmode = fieldFromInstruction(Insn, 8, 4);
+ unsigned op = fieldFromInstruction(Insn, 5, 1);
+
+ DecodeStatus S = MCDisassembler::Success;
+
+ // If the top 3 bits of imm are clear, this is a VMOV (immediate)
+ if (!(imm & 0x38)) {
+ if (cmode == 0xF) {
+ if (op == 1) return MCDisassembler::Fail;
+ Inst.setOpcode(ARM::VMOVv2f32);
+ }
+ if (hasFullFP16) {
+ if (cmode == 0xE) {
+ if (op == 1) {
+ Inst.setOpcode(ARM::VMOVv1i64);
+ } else {
+ Inst.setOpcode(ARM::VMOVv8i8);
+ }
+ }
+ if (cmode == 0xD) {
+ if (op == 1) {
+ Inst.setOpcode(ARM::VMVNv2i32);
+ } else {
+ Inst.setOpcode(ARM::VMOVv2i32);
+ }
+ }
+ if (cmode == 0xC) {
+ if (op == 1) {
+ Inst.setOpcode(ARM::VMVNv2i32);
+ } else {
+ Inst.setOpcode(ARM::VMOVv2i32);
+ }
+ }
+ }
+ return DecodeNEONModImmInstruction(Inst, Insn, Address, Decoder);
+ }
+
+ if (!(imm & 0x20)) return MCDisassembler::Fail;
+
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Vd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Vm, Address, Decoder)))
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::createImm(64 - imm));
+
+ return S;
+}
+
+static DecodeStatus DecodeVCVTQ(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ const FeatureBitset &featureBits =
+ ((const MCDisassembler *)Decoder)->getSubtargetInfo().getFeatureBits();
+ bool hasFullFP16 = featureBits[ARM::FeatureFullFP16];
+
+ unsigned Vd = (fieldFromInstruction(Insn, 12, 4) << 0);
+ Vd |= (fieldFromInstruction(Insn, 22, 1) << 4);
+ unsigned Vm = (fieldFromInstruction(Insn, 0, 4) << 0);
+ Vm |= (fieldFromInstruction(Insn, 5, 1) << 4);
+ unsigned imm = fieldFromInstruction(Insn, 16, 6);
+ unsigned cmode = fieldFromInstruction(Insn, 8, 4);
+ unsigned op = fieldFromInstruction(Insn, 5, 1);
+
+ DecodeStatus S = MCDisassembler::Success;
+
+ // If the top 3 bits of imm are clear, this is a VMOV (immediate)
+ if (!(imm & 0x38)) {
+ if (cmode == 0xF) {
+ if (op == 1) return MCDisassembler::Fail;
+ Inst.setOpcode(ARM::VMOVv4f32);
+ }
+ if (hasFullFP16) {
+ if (cmode == 0xE) {
+ if (op == 1) {
+ Inst.setOpcode(ARM::VMOVv2i64);
+ } else {
+ Inst.setOpcode(ARM::VMOVv16i8);
+ }
+ }
+ if (cmode == 0xD) {
+ if (op == 1) {
+ Inst.setOpcode(ARM::VMVNv4i32);
+ } else {
+ Inst.setOpcode(ARM::VMOVv4i32);
+ }
+ }
+ if (cmode == 0xC) {
+ if (op == 1) {
+ Inst.setOpcode(ARM::VMVNv4i32);
+ } else {
+ Inst.setOpcode(ARM::VMOVv4i32);
+ }
+ }
+ }
+ return DecodeNEONModImmInstruction(Inst, Insn, Address, Decoder);
+ }
+
+ if (!(imm & 0x20)) return MCDisassembler::Fail;
+
+ if (!Check(S, DecodeQPRRegisterClass(Inst, Vd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeQPRRegisterClass(Inst, Vm, Address, Decoder)))
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::createImm(64 - imm));
+
+ return S;
+}
+
+static DecodeStatus DecodeLDR(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rn = fieldFromInstruction(Val, 16, 4);
+ unsigned Rt = fieldFromInstruction(Val, 12, 4);
+ unsigned Rm = fieldFromInstruction(Val, 0, 4);
+ Rm |= (fieldFromInstruction(Val, 23, 1) << 4);
+ unsigned Cond = fieldFromInstruction(Val, 28, 4);
+
+ if (fieldFromInstruction(Val, 8, 4) != 0 || Rn == Rt)
+ S = MCDisassembler::SoftFail;
+
+ if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rt, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeAddrMode7Operand(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodePostIdxReg(Inst, Rm, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodePredicateOperand(Inst, Cond, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ return S;
+}
+
+static DecodeStatus DecodeMRRC2(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned CRm = fieldFromInstruction(Val, 0, 4);
+ unsigned opc1 = fieldFromInstruction(Val, 4, 4);
+ unsigned cop = fieldFromInstruction(Val, 8, 4);
+ unsigned Rt = fieldFromInstruction(Val, 12, 4);
+ unsigned Rt2 = fieldFromInstruction(Val, 16, 4);
+
+ if ((cop & ~0x1) == 0xa)
+ return MCDisassembler::Fail;
+
+ if (Rt == Rt2)
+ S = MCDisassembler::SoftFail;
+
+ Inst.addOperand(MCOperand::createImm(cop));
+ Inst.addOperand(MCOperand::createImm(opc1));
+ if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rt, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rt2, Address, Decoder)))
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::createImm(CRm));
+
+ return S;
+}
+
diff --git a/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp b/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
new file mode 100644
index 0000000..33fc85a
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
@@ -0,0 +1,1647 @@
+//===-- ARMInstPrinter.cpp - Convert ARM MCInst to assembly syntax --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class prints an ARM MCInst to a .s file.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARMInstPrinter.h"
+#include "MCTargetDesc/ARMAddressingModes.h"
+#include "MCTargetDesc/ARMBaseInfo.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "asm-printer"
+
+#include "ARMGenAsmWriter.inc"
+
+/// translateShiftImm - Convert shift immediate from 0-31 to 1-32 for printing.
+///
+/// getSORegOffset returns an integer from 0-31, representing '32' as 0.
+static unsigned translateShiftImm(unsigned imm) {
+ // lsr #32 and asr #32 exist, but should be encoded as a 0.
+ assert((imm & ~0x1f) == 0 && "Invalid shift encoding");
+
+ if (imm == 0)
+ return 32;
+ return imm;
+}
+
+/// Prints the shift value with an immediate value.
+static void printRegImmShift(raw_ostream &O, ARM_AM::ShiftOpc ShOpc,
+ unsigned ShImm, bool UseMarkup) {
+ if (ShOpc == ARM_AM::no_shift || (ShOpc == ARM_AM::lsl && !ShImm))
+ return;
+ O << ", ";
+
+ assert(!(ShOpc == ARM_AM::ror && !ShImm) && "Cannot have ror #0");
+ O << getShiftOpcStr(ShOpc);
+
+ if (ShOpc != ARM_AM::rrx) {
+ O << " ";
+ if (UseMarkup)
+ O << "<imm:";
+ O << "#" << translateShiftImm(ShImm);
+ if (UseMarkup)
+ O << ">";
+ }
+}
+
+ARMInstPrinter::ARMInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
+ const MCRegisterInfo &MRI)
+ : MCInstPrinter(MAI, MII, MRI) {}
+
+void ARMInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
+ OS << markup("<reg:") << getRegisterName(RegNo) << markup(">");
+}
+
+void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
+ StringRef Annot, const MCSubtargetInfo &STI) {
+ unsigned Opcode = MI->getOpcode();
+
+ switch (Opcode) {
+
+ // Check for HINT instructions w/ canonical names.
+ case ARM::HINT:
+ case ARM::tHINT:
+ case ARM::t2HINT:
+ switch (MI->getOperand(0).getImm()) {
+ case 0:
+ O << "\tnop";
+ break;
+ case 1:
+ O << "\tyield";
+ break;
+ case 2:
+ O << "\twfe";
+ break;
+ case 3:
+ O << "\twfi";
+ break;
+ case 4:
+ O << "\tsev";
+ break;
+ case 5:
+ if (STI.getFeatureBits()[ARM::HasV8Ops]) {
+ O << "\tsevl";
+ break;
+ } // Fallthrough for non-v8
+ default:
+ // Anything else should just print normally.
+ printInstruction(MI, STI, O);
+ printAnnotation(O, Annot);
+ return;
+ }
+ printPredicateOperand(MI, 1, STI, O);
+ if (Opcode == ARM::t2HINT)
+ O << ".w";
+ printAnnotation(O, Annot);
+ return;
+
+ // Check for MOVs and print canonical forms, instead.
+ case ARM::MOVsr: {
+ // FIXME: Thumb variants?
+ const MCOperand &Dst = MI->getOperand(0);
+ const MCOperand &MO1 = MI->getOperand(1);
+ const MCOperand &MO2 = MI->getOperand(2);
+ const MCOperand &MO3 = MI->getOperand(3);
+
+ O << '\t' << ARM_AM::getShiftOpcStr(ARM_AM::getSORegShOp(MO3.getImm()));
+ printSBitModifierOperand(MI, 6, STI, O);
+ printPredicateOperand(MI, 4, STI, O);
+
+ O << '\t';
+ printRegName(O, Dst.getReg());
+ O << ", ";
+ printRegName(O, MO1.getReg());
+
+ O << ", ";
+ printRegName(O, MO2.getReg());
+ assert(ARM_AM::getSORegOffset(MO3.getImm()) == 0);
+ printAnnotation(O, Annot);
+ return;
+ }
+
+ case ARM::MOVsi: {
+ // FIXME: Thumb variants?
+ const MCOperand &Dst = MI->getOperand(0);
+ const MCOperand &MO1 = MI->getOperand(1);
+ const MCOperand &MO2 = MI->getOperand(2);
+
+ O << '\t' << ARM_AM::getShiftOpcStr(ARM_AM::getSORegShOp(MO2.getImm()));
+ printSBitModifierOperand(MI, 5, STI, O);
+ printPredicateOperand(MI, 3, STI, O);
+
+ O << '\t';
+ printRegName(O, Dst.getReg());
+ O << ", ";
+ printRegName(O, MO1.getReg());
+
+ if (ARM_AM::getSORegShOp(MO2.getImm()) == ARM_AM::rrx) {
+ printAnnotation(O, Annot);
+ return;
+ }
+
+ O << ", " << markup("<imm:") << "#"
+ << translateShiftImm(ARM_AM::getSORegOffset(MO2.getImm())) << markup(">");
+ printAnnotation(O, Annot);
+ return;
+ }
+
+ // A8.6.123 PUSH
+ case ARM::STMDB_UPD:
+ case ARM::t2STMDB_UPD:
+ if (MI->getOperand(0).getReg() == ARM::SP && MI->getNumOperands() > 5) {
+ // Should only print PUSH if there are at least two registers in the list.
+ O << '\t' << "push";
+ printPredicateOperand(MI, 2, STI, O);
+ if (Opcode == ARM::t2STMDB_UPD)
+ O << ".w";
+ O << '\t';
+ printRegisterList(MI, 4, STI, O);
+ printAnnotation(O, Annot);
+ return;
+ } else
+ break;
+
+ case ARM::STR_PRE_IMM:
+ if (MI->getOperand(2).getReg() == ARM::SP &&
+ MI->getOperand(3).getImm() == -4) {
+ O << '\t' << "push";
+ printPredicateOperand(MI, 4, STI, O);
+ O << "\t{";
+ printRegName(O, MI->getOperand(1).getReg());
+ O << "}";
+ printAnnotation(O, Annot);
+ return;
+ } else
+ break;
+
+ // A8.6.122 POP
+ case ARM::LDMIA_UPD:
+ case ARM::t2LDMIA_UPD:
+ if (MI->getOperand(0).getReg() == ARM::SP && MI->getNumOperands() > 5) {
+ // Should only print POP if there are at least two registers in the list.
+ O << '\t' << "pop";
+ printPredicateOperand(MI, 2, STI, O);
+ if (Opcode == ARM::t2LDMIA_UPD)
+ O << ".w";
+ O << '\t';
+ printRegisterList(MI, 4, STI, O);
+ printAnnotation(O, Annot);
+ return;
+ } else
+ break;
+
+ case ARM::LDR_POST_IMM:
+ if (MI->getOperand(2).getReg() == ARM::SP &&
+ MI->getOperand(4).getImm() == 4) {
+ O << '\t' << "pop";
+ printPredicateOperand(MI, 5, STI, O);
+ O << "\t{";
+ printRegName(O, MI->getOperand(0).getReg());
+ O << "}";
+ printAnnotation(O, Annot);
+ return;
+ } else
+ break;
+
+ // A8.6.355 VPUSH
+ case ARM::VSTMSDB_UPD:
+ case ARM::VSTMDDB_UPD:
+ if (MI->getOperand(0).getReg() == ARM::SP) {
+ O << '\t' << "vpush";
+ printPredicateOperand(MI, 2, STI, O);
+ O << '\t';
+ printRegisterList(MI, 4, STI, O);
+ printAnnotation(O, Annot);
+ return;
+ } else
+ break;
+
+ // A8.6.354 VPOP
+ case ARM::VLDMSIA_UPD:
+ case ARM::VLDMDIA_UPD:
+ if (MI->getOperand(0).getReg() == ARM::SP) {
+ O << '\t' << "vpop";
+ printPredicateOperand(MI, 2, STI, O);
+ O << '\t';
+ printRegisterList(MI, 4, STI, O);
+ printAnnotation(O, Annot);
+ return;
+ } else
+ break;
+
+ case ARM::tLDMIA: {
+ bool Writeback = true;
+ unsigned BaseReg = MI->getOperand(0).getReg();
+ for (unsigned i = 3; i < MI->getNumOperands(); ++i) {
+ if (MI->getOperand(i).getReg() == BaseReg)
+ Writeback = false;
+ }
+
+ O << "\tldm";
+
+ printPredicateOperand(MI, 1, STI, O);
+ O << '\t';
+ printRegName(O, BaseReg);
+ if (Writeback)
+ O << "!";
+ O << ", ";
+ printRegisterList(MI, 3, STI, O);
+ printAnnotation(O, Annot);
+ return;
+ }
+
+ // Combine 2 GPRs from disassember into a GPRPair to match with instr def.
+ // ldrexd/strexd require even/odd GPR pair. To enforce this constraint,
+ // a single GPRPair reg operand is used in the .td file to replace the two
+ // GPRs. However, when decoding them, the two GRPs cannot be automatically
+ // expressed as a GPRPair, so we have to manually merge them.
+ // FIXME: We would really like to be able to tablegen'erate this.
+ case ARM::LDREXD:
+ case ARM::STREXD:
+ case ARM::LDAEXD:
+ case ARM::STLEXD: {
+ const MCRegisterClass &MRC = MRI.getRegClass(ARM::GPRRegClassID);
+ bool isStore = Opcode == ARM::STREXD || Opcode == ARM::STLEXD;
+ unsigned Reg = MI->getOperand(isStore ? 1 : 0).getReg();
+ if (MRC.contains(Reg)) {
+ MCInst NewMI;
+ MCOperand NewReg;
+ NewMI.setOpcode(Opcode);
+
+ if (isStore)
+ NewMI.addOperand(MI->getOperand(0));
+ NewReg = MCOperand::createReg(MRI.getMatchingSuperReg(
+ Reg, ARM::gsub_0, &MRI.getRegClass(ARM::GPRPairRegClassID)));
+ NewMI.addOperand(NewReg);
+
+ // Copy the rest operands into NewMI.
+ for (unsigned i = isStore ? 3 : 2; i < MI->getNumOperands(); ++i)
+ NewMI.addOperand(MI->getOperand(i));
+ printInstruction(&NewMI, STI, O);
+ return;
+ }
+ break;
+ }
+ // B9.3.3 ERET (Thumb)
+ // For a target that has Virtualization Extensions, ERET is the preferred
+ // disassembly of SUBS PC, LR, #0
+ case ARM::t2SUBS_PC_LR: {
+ if (MI->getNumOperands() == 3 && MI->getOperand(0).isImm() &&
+ MI->getOperand(0).getImm() == 0 &&
+ STI.getFeatureBits()[ARM::FeatureVirtualization]) {
+ O << "\teret";
+ printPredicateOperand(MI, 1, STI, O);
+ printAnnotation(O, Annot);
+ return;
+ }
+ break;
+ }
+ }
+
+ printInstruction(MI, STI, O);
+ printAnnotation(O, Annot);
+}
+
+void ARMInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI, raw_ostream &O) {
+ const MCOperand &Op = MI->getOperand(OpNo);
+ if (Op.isReg()) {
+ unsigned Reg = Op.getReg();
+ printRegName(O, Reg);
+ } else if (Op.isImm()) {
+ O << markup("<imm:") << '#' << formatImm(Op.getImm()) << markup(">");
+ } else {
+ assert(Op.isExpr() && "unknown operand kind in printOperand");
+ const MCExpr *Expr = Op.getExpr();
+ switch (Expr->getKind()) {
+ case MCExpr::Binary:
+ O << '#';
+ Expr->print(O, &MAI);
+ break;
+ case MCExpr::Constant: {
+ // If a symbolic branch target was added as a constant expression then
+ // print that address in hex. And only print 32 unsigned bits for the
+ // address.
+ const MCConstantExpr *Constant = cast<MCConstantExpr>(Expr);
+ int64_t TargetAddress;
+ if (!Constant->evaluateAsAbsolute(TargetAddress)) {
+ O << '#';
+ Expr->print(O, &MAI);
+ } else {
+ O << "0x";
+ O.write_hex(static_cast<uint32_t>(TargetAddress));
+ }
+ break;
+ }
+ default:
+ // FIXME: Should we always treat this as if it is a constant literal and
+ // prefix it with '#'?
+ Expr->print(O, &MAI);
+ break;
+ }
+ }
+}
+
+void ARMInstPrinter::printThumbLdrLabelOperand(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ const MCOperand &MO1 = MI->getOperand(OpNum);
+ if (MO1.isExpr()) {
+ MO1.getExpr()->print(O, &MAI);
+ return;
+ }
+
+ O << markup("<mem:") << "[pc, ";
+
+ int32_t OffImm = (int32_t)MO1.getImm();
+ bool isSub = OffImm < 0;
+
+ // Special value for #-0. All others are normal.
+ if (OffImm == INT32_MIN)
+ OffImm = 0;
+ if (isSub) {
+ O << markup("<imm:") << "#-" << formatImm(-OffImm) << markup(">");
+ } else {
+ O << markup("<imm:") << "#" << formatImm(OffImm) << markup(">");
+ }
+ O << "]" << markup(">");
+}
+
+// so_reg is a 4-operand unit corresponding to register forms of the A5.1
+// "Addressing Mode 1 - Data-processing operands" forms. This includes:
+// REG 0 0 - e.g. R5
+// REG REG 0,SH_OPC - e.g. R5, ROR R3
+// REG 0 IMM,SH_OPC - e.g. R5, LSL #3
+void ARMInstPrinter::printSORegRegOperand(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ const MCOperand &MO1 = MI->getOperand(OpNum);
+ const MCOperand &MO2 = MI->getOperand(OpNum + 1);
+ const MCOperand &MO3 = MI->getOperand(OpNum + 2);
+
+ printRegName(O, MO1.getReg());
+
+ // Print the shift opc.
+ ARM_AM::ShiftOpc ShOpc = ARM_AM::getSORegShOp(MO3.getImm());
+ O << ", " << ARM_AM::getShiftOpcStr(ShOpc);
+ if (ShOpc == ARM_AM::rrx)
+ return;
+
+ O << ' ';
+ printRegName(O, MO2.getReg());
+ assert(ARM_AM::getSORegOffset(MO3.getImm()) == 0);
+}
+
+void ARMInstPrinter::printSORegImmOperand(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ const MCOperand &MO1 = MI->getOperand(OpNum);
+ const MCOperand &MO2 = MI->getOperand(OpNum + 1);
+
+ printRegName(O, MO1.getReg());
+
+ // Print the shift opc.
+ printRegImmShift(O, ARM_AM::getSORegShOp(MO2.getImm()),
+ ARM_AM::getSORegOffset(MO2.getImm()), UseMarkup);
+}
+
+//===--------------------------------------------------------------------===//
+// Addressing Mode #2
+//===--------------------------------------------------------------------===//
+
+void ARMInstPrinter::printAM2PreOrOffsetIndexOp(const MCInst *MI, unsigned Op,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ const MCOperand &MO1 = MI->getOperand(Op);
+ const MCOperand &MO2 = MI->getOperand(Op + 1);
+ const MCOperand &MO3 = MI->getOperand(Op + 2);
+
+ O << markup("<mem:") << "[";
+ printRegName(O, MO1.getReg());
+
+ if (!MO2.getReg()) {
+ if (ARM_AM::getAM2Offset(MO3.getImm())) { // Don't print +0.
+ O << ", " << markup("<imm:") << "#"
+ << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO3.getImm()))
+ << ARM_AM::getAM2Offset(MO3.getImm()) << markup(">");
+ }
+ O << "]" << markup(">");
+ return;
+ }
+
+ O << ", ";
+ O << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO3.getImm()));
+ printRegName(O, MO2.getReg());
+
+ printRegImmShift(O, ARM_AM::getAM2ShiftOpc(MO3.getImm()),
+ ARM_AM::getAM2Offset(MO3.getImm()), UseMarkup);
+ O << "]" << markup(">");
+}
+
+void ARMInstPrinter::printAddrModeTBB(const MCInst *MI, unsigned Op,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ const MCOperand &MO1 = MI->getOperand(Op);
+ const MCOperand &MO2 = MI->getOperand(Op + 1);
+ O << markup("<mem:") << "[";
+ printRegName(O, MO1.getReg());
+ O << ", ";
+ printRegName(O, MO2.getReg());
+ O << "]" << markup(">");
+}
+
+void ARMInstPrinter::printAddrModeTBH(const MCInst *MI, unsigned Op,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ const MCOperand &MO1 = MI->getOperand(Op);
+ const MCOperand &MO2 = MI->getOperand(Op + 1);
+ O << markup("<mem:") << "[";
+ printRegName(O, MO1.getReg());
+ O << ", ";
+ printRegName(O, MO2.getReg());
+ O << ", lsl " << markup("<imm:") << "#1" << markup(">") << "]" << markup(">");
+}
+
+void ARMInstPrinter::printAddrMode2Operand(const MCInst *MI, unsigned Op,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ const MCOperand &MO1 = MI->getOperand(Op);
+
+ if (!MO1.isReg()) { // FIXME: This is for CP entries, but isn't right.
+ printOperand(MI, Op, STI, O);
+ return;
+ }
+
+#ifndef NDEBUG
+ const MCOperand &MO3 = MI->getOperand(Op + 2);
+ unsigned IdxMode = ARM_AM::getAM2IdxMode(MO3.getImm());
+ assert(IdxMode != ARMII::IndexModePost && "Should be pre or offset index op");
+#endif
+
+ printAM2PreOrOffsetIndexOp(MI, Op, STI, O);
+}
+
+void ARMInstPrinter::printAddrMode2OffsetOperand(const MCInst *MI,
+ unsigned OpNum,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ const MCOperand &MO1 = MI->getOperand(OpNum);
+ const MCOperand &MO2 = MI->getOperand(OpNum + 1);
+
+ if (!MO1.getReg()) {
+ unsigned ImmOffs = ARM_AM::getAM2Offset(MO2.getImm());
+ O << markup("<imm:") << '#'
+ << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO2.getImm())) << ImmOffs
+ << markup(">");
+ return;
+ }
+
+ O << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO2.getImm()));
+ printRegName(O, MO1.getReg());
+
+ printRegImmShift(O, ARM_AM::getAM2ShiftOpc(MO2.getImm()),
+ ARM_AM::getAM2Offset(MO2.getImm()), UseMarkup);
+}
+
+//===--------------------------------------------------------------------===//
+// Addressing Mode #3
+//===--------------------------------------------------------------------===//
+
+void ARMInstPrinter::printAM3PreOrOffsetIndexOp(const MCInst *MI, unsigned Op,
+ raw_ostream &O,
+ bool AlwaysPrintImm0) {
+ const MCOperand &MO1 = MI->getOperand(Op);
+ const MCOperand &MO2 = MI->getOperand(Op + 1);
+ const MCOperand &MO3 = MI->getOperand(Op + 2);
+
+ O << markup("<mem:") << '[';
+ printRegName(O, MO1.getReg());
+
+ if (MO2.getReg()) {
+ O << ", " << getAddrOpcStr(ARM_AM::getAM3Op(MO3.getImm()));
+ printRegName(O, MO2.getReg());
+ O << ']' << markup(">");
+ return;
+ }
+
+ // If the op is sub we have to print the immediate even if it is 0
+ unsigned ImmOffs = ARM_AM::getAM3Offset(MO3.getImm());
+ ARM_AM::AddrOpc op = ARM_AM::getAM3Op(MO3.getImm());
+
+ if (AlwaysPrintImm0 || ImmOffs || (op == ARM_AM::sub)) {
+ O << ", " << markup("<imm:") << "#" << ARM_AM::getAddrOpcStr(op) << ImmOffs
+ << markup(">");
+ }
+ O << ']' << markup(">");
+}
+
+template <bool AlwaysPrintImm0>
+void ARMInstPrinter::printAddrMode3Operand(const MCInst *MI, unsigned Op,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ const MCOperand &MO1 = MI->getOperand(Op);
+ if (!MO1.isReg()) { // For label symbolic references.
+ printOperand(MI, Op, STI, O);
+ return;
+ }
+
+ assert(ARM_AM::getAM3IdxMode(MI->getOperand(Op + 2).getImm()) !=
+ ARMII::IndexModePost &&
+ "unexpected idxmode");
+ printAM3PreOrOffsetIndexOp(MI, Op, O, AlwaysPrintImm0);
+}
+
+void ARMInstPrinter::printAddrMode3OffsetOperand(const MCInst *MI,
+ unsigned OpNum,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ const MCOperand &MO1 = MI->getOperand(OpNum);
+ const MCOperand &MO2 = MI->getOperand(OpNum + 1);
+
+ if (MO1.getReg()) {
+ O << getAddrOpcStr(ARM_AM::getAM3Op(MO2.getImm()));
+ printRegName(O, MO1.getReg());
+ return;
+ }
+
+ unsigned ImmOffs = ARM_AM::getAM3Offset(MO2.getImm());
+ O << markup("<imm:") << '#'
+ << ARM_AM::getAddrOpcStr(ARM_AM::getAM3Op(MO2.getImm())) << ImmOffs
+ << markup(">");
+}
+
+void ARMInstPrinter::printPostIdxImm8Operand(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ const MCOperand &MO = MI->getOperand(OpNum);
+ unsigned Imm = MO.getImm();
+ O << markup("<imm:") << '#' << ((Imm & 256) ? "" : "-") << (Imm & 0xff)
+ << markup(">");
+}
+
+void ARMInstPrinter::printPostIdxRegOperand(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ const MCOperand &MO1 = MI->getOperand(OpNum);
+ const MCOperand &MO2 = MI->getOperand(OpNum + 1);
+
+ O << (MO2.getImm() ? "" : "-");
+ printRegName(O, MO1.getReg());
+}
+
+void ARMInstPrinter::printPostIdxImm8s4Operand(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ const MCOperand &MO = MI->getOperand(OpNum);
+ unsigned Imm = MO.getImm();
+ O << markup("<imm:") << '#' << ((Imm & 256) ? "" : "-") << ((Imm & 0xff) << 2)
+ << markup(">");
+}
+
+void ARMInstPrinter::printLdStmModeOperand(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ ARM_AM::AMSubMode Mode =
+ ARM_AM::getAM4SubMode(MI->getOperand(OpNum).getImm());
+ O << ARM_AM::getAMSubModeStr(Mode);
+}
+
+template <bool AlwaysPrintImm0>
+void ARMInstPrinter::printAddrMode5Operand(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ const MCOperand &MO1 = MI->getOperand(OpNum);
+ const MCOperand &MO2 = MI->getOperand(OpNum + 1);
+
+ if (!MO1.isReg()) { // FIXME: This is for CP entries, but isn't right.
+ printOperand(MI, OpNum, STI, O);
+ return;
+ }
+
+ O << markup("<mem:") << "[";
+ printRegName(O, MO1.getReg());
+
+ unsigned ImmOffs = ARM_AM::getAM5Offset(MO2.getImm());
+ ARM_AM::AddrOpc Op = ARM_AM::getAM5Op(MO2.getImm());
+ if (AlwaysPrintImm0 || ImmOffs || Op == ARM_AM::sub) {
+ O << ", " << markup("<imm:") << "#" << ARM_AM::getAddrOpcStr(Op)
+ << ImmOffs * 4 << markup(">");
+ }
+ O << "]" << markup(">");
+}
+
+void ARMInstPrinter::printAddrMode6Operand(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ const MCOperand &MO1 = MI->getOperand(OpNum);
+ const MCOperand &MO2 = MI->getOperand(OpNum + 1);
+
+ O << markup("<mem:") << "[";
+ printRegName(O, MO1.getReg());
+ if (MO2.getImm()) {
+ O << ":" << (MO2.getImm() << 3);
+ }
+ O << "]" << markup(">");
+}
+
+void ARMInstPrinter::printAddrMode7Operand(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ const MCOperand &MO1 = MI->getOperand(OpNum);
+ O << markup("<mem:") << "[";
+ printRegName(O, MO1.getReg());
+ O << "]" << markup(">");
+}
+
+void ARMInstPrinter::printAddrMode6OffsetOperand(const MCInst *MI,
+ unsigned OpNum,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ const MCOperand &MO = MI->getOperand(OpNum);
+ if (MO.getReg() == 0)
+ O << "!";
+ else {
+ O << ", ";
+ printRegName(O, MO.getReg());
+ }
+}
+
+void ARMInstPrinter::printBitfieldInvMaskImmOperand(const MCInst *MI,
+ unsigned OpNum,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ const MCOperand &MO = MI->getOperand(OpNum);
+ uint32_t v = ~MO.getImm();
+ int32_t lsb = countTrailingZeros(v);
+ int32_t width = (32 - countLeadingZeros(v)) - lsb;
+ assert(MO.isImm() && "Not a valid bf_inv_mask_imm value!");
+ O << markup("<imm:") << '#' << lsb << markup(">") << ", " << markup("<imm:")
+ << '#' << width << markup(">");
+}
+
+void ARMInstPrinter::printMemBOption(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ unsigned val = MI->getOperand(OpNum).getImm();
+ O << ARM_MB::MemBOptToString(val, STI.getFeatureBits()[ARM::HasV8Ops]);
+}
+
+void ARMInstPrinter::printInstSyncBOption(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ unsigned val = MI->getOperand(OpNum).getImm();
+ O << ARM_ISB::InstSyncBOptToString(val);
+}
+
+void ARMInstPrinter::printShiftImmOperand(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ unsigned ShiftOp = MI->getOperand(OpNum).getImm();
+ bool isASR = (ShiftOp & (1 << 5)) != 0;
+ unsigned Amt = ShiftOp & 0x1f;
+ if (isASR) {
+ O << ", asr " << markup("<imm:") << "#" << (Amt == 0 ? 32 : Amt)
+ << markup(">");
+ } else if (Amt) {
+ O << ", lsl " << markup("<imm:") << "#" << Amt << markup(">");
+ }
+}
+
+void ARMInstPrinter::printPKHLSLShiftImm(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ unsigned Imm = MI->getOperand(OpNum).getImm();
+ if (Imm == 0)
+ return;
+ assert(Imm > 0 && Imm < 32 && "Invalid PKH shift immediate value!");
+ O << ", lsl " << markup("<imm:") << "#" << Imm << markup(">");
+}
+
+void ARMInstPrinter::printPKHASRShiftImm(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ unsigned Imm = MI->getOperand(OpNum).getImm();
+ // A shift amount of 32 is encoded as 0.
+ if (Imm == 0)
+ Imm = 32;
+ assert(Imm > 0 && Imm <= 32 && "Invalid PKH shift immediate value!");
+ O << ", asr " << markup("<imm:") << "#" << Imm << markup(">");
+}
+
+void ARMInstPrinter::printRegisterList(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ O << "{";
+ for (unsigned i = OpNum, e = MI->getNumOperands(); i != e; ++i) {
+ if (i != OpNum)
+ O << ", ";
+ printRegName(O, MI->getOperand(i).getReg());
+ }
+ O << "}";
+}
+
+void ARMInstPrinter::printGPRPairOperand(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ unsigned Reg = MI->getOperand(OpNum).getReg();
+ printRegName(O, MRI.getSubReg(Reg, ARM::gsub_0));
+ O << ", ";
+ printRegName(O, MRI.getSubReg(Reg, ARM::gsub_1));
+}
+
+void ARMInstPrinter::printSetendOperand(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ const MCOperand &Op = MI->getOperand(OpNum);
+ if (Op.getImm())
+ O << "be";
+ else
+ O << "le";
+}
+
+void ARMInstPrinter::printCPSIMod(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O) {
+ const MCOperand &Op = MI->getOperand(OpNum);
+ O << ARM_PROC::IModToString(Op.getImm());
+}
+
+void ARMInstPrinter::printCPSIFlag(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O) {
+ const MCOperand &Op = MI->getOperand(OpNum);
+ unsigned IFlags = Op.getImm();
+ for (int i = 2; i >= 0; --i)
+ if (IFlags & (1 << i))
+ O << ARM_PROC::IFlagsToString(1 << i);
+
+ if (IFlags == 0)
+ O << "none";
+}
+
+void ARMInstPrinter::printMSRMaskOperand(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ const MCOperand &Op = MI->getOperand(OpNum);
+ unsigned SpecRegRBit = Op.getImm() >> 4;
+ unsigned Mask = Op.getImm() & 0xf;
+ const FeatureBitset &FeatureBits = STI.getFeatureBits();
+
+ if (FeatureBits[ARM::FeatureMClass]) {
+ unsigned SYSm = Op.getImm();
+ unsigned Opcode = MI->getOpcode();
+
+ // For writes, handle extended mask bits if the DSP extension is present.
+ if (Opcode == ARM::t2MSR_M && FeatureBits[ARM::FeatureDSP]) {
+ switch (SYSm) {
+ case 0x400:
+ O << "apsr_g";
+ return;
+ case 0xc00:
+ O << "apsr_nzcvqg";
+ return;
+ case 0x401:
+ O << "iapsr_g";
+ return;
+ case 0xc01:
+ O << "iapsr_nzcvqg";
+ return;
+ case 0x402:
+ O << "eapsr_g";
+ return;
+ case 0xc02:
+ O << "eapsr_nzcvqg";
+ return;
+ case 0x403:
+ O << "xpsr_g";
+ return;
+ case 0xc03:
+ O << "xpsr_nzcvqg";
+ return;
+ }
+ }
+
+ // Handle the basic 8-bit mask.
+ SYSm &= 0xff;
+
+ if (Opcode == ARM::t2MSR_M && FeatureBits [ARM::HasV7Ops]) {
+ // ARMv7-M deprecates using MSR APSR without a _<bits> qualifier as an
+ // alias for MSR APSR_nzcvq.
+ switch (SYSm) {
+ case 0:
+ O << "apsr_nzcvq";
+ return;
+ case 1:
+ O << "iapsr_nzcvq";
+ return;
+ case 2:
+ O << "eapsr_nzcvq";
+ return;
+ case 3:
+ O << "xpsr_nzcvq";
+ return;
+ }
+ }
+
+ switch (SYSm) {
+ default:
+ llvm_unreachable("Unexpected mask value!");
+ case 0:
+ O << "apsr";
+ return;
+ case 1:
+ O << "iapsr";
+ return;
+ case 2:
+ O << "eapsr";
+ return;
+ case 3:
+ O << "xpsr";
+ return;
+ case 5:
+ O << "ipsr";
+ return;
+ case 6:
+ O << "epsr";
+ return;
+ case 7:
+ O << "iepsr";
+ return;
+ case 8:
+ O << "msp";
+ return;
+ case 9:
+ O << "psp";
+ return;
+ case 16:
+ O << "primask";
+ return;
+ case 17:
+ O << "basepri";
+ return;
+ case 18:
+ O << "basepri_max";
+ return;
+ case 19:
+ O << "faultmask";
+ return;
+ case 20:
+ O << "control";
+ return;
+ }
+ }
+
+ // As special cases, CPSR_f, CPSR_s and CPSR_fs prefer printing as
+ // APSR_nzcvq, APSR_g and APSRnzcvqg, respectively.
+ if (!SpecRegRBit && (Mask == 8 || Mask == 4 || Mask == 12)) {
+ O << "APSR_";
+ switch (Mask) {
+ default:
+ llvm_unreachable("Unexpected mask value!");
+ case 4:
+ O << "g";
+ return;
+ case 8:
+ O << "nzcvq";
+ return;
+ case 12:
+ O << "nzcvqg";
+ return;
+ }
+ }
+
+ if (SpecRegRBit)
+ O << "SPSR";
+ else
+ O << "CPSR";
+
+ if (Mask) {
+ O << '_';
+ if (Mask & 8)
+ O << 'f';
+ if (Mask & 4)
+ O << 's';
+ if (Mask & 2)
+ O << 'x';
+ if (Mask & 1)
+ O << 'c';
+ }
+}
+
+void ARMInstPrinter::printBankedRegOperand(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ uint32_t Banked = MI->getOperand(OpNum).getImm();
+ uint32_t R = (Banked & 0x20) >> 5;
+ uint32_t SysM = Banked & 0x1f;
+
+ // Nothing much we can do about this, the encodings are specified in B9.2.3 of
+ // the ARM ARM v7C, and are all over the shop.
+ if (R) {
+ O << "SPSR_";
+
+ switch (SysM) {
+ case 0x0e:
+ O << "fiq";
+ return;
+ case 0x10:
+ O << "irq";
+ return;
+ case 0x12:
+ O << "svc";
+ return;
+ case 0x14:
+ O << "abt";
+ return;
+ case 0x16:
+ O << "und";
+ return;
+ case 0x1c:
+ O << "mon";
+ return;
+ case 0x1e:
+ O << "hyp";
+ return;
+ default:
+ llvm_unreachable("Invalid banked SPSR register");
+ }
+ }
+
+ assert(!R && "should have dealt with SPSR regs");
+ const char *RegNames[] = {
+ "r8_usr", "r9_usr", "r10_usr", "r11_usr", "r12_usr", "sp_usr", "lr_usr",
+ "", "r8_fiq", "r9_fiq", "r10_fiq", "r11_fiq", "r12_fiq", "sp_fiq",
+ "lr_fiq", "", "lr_irq", "sp_irq", "lr_svc", "sp_svc", "lr_abt",
+ "sp_abt", "lr_und", "sp_und", "", "", "", "",
+ "lr_mon", "sp_mon", "elr_hyp", "sp_hyp"};
+ const char *Name = RegNames[SysM];
+ assert(Name[0] && "invalid banked register operand");
+
+ O << Name;
+}
+
+void ARMInstPrinter::printPredicateOperand(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ ARMCC::CondCodes CC = (ARMCC::CondCodes)MI->getOperand(OpNum).getImm();
+ // Handle the undefined 15 CC value here for printing so we don't abort().
+ if ((unsigned)CC == 15)
+ O << "<und>";
+ else if (CC != ARMCC::AL)
+ O << ARMCondCodeToString(CC);
+}
+
+void ARMInstPrinter::printMandatoryPredicateOperand(const MCInst *MI,
+ unsigned OpNum,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ ARMCC::CondCodes CC = (ARMCC::CondCodes)MI->getOperand(OpNum).getImm();
+ O << ARMCondCodeToString(CC);
+}
+
+void ARMInstPrinter::printSBitModifierOperand(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ if (MI->getOperand(OpNum).getReg()) {
+ assert(MI->getOperand(OpNum).getReg() == ARM::CPSR &&
+ "Expect ARM CPSR register!");
+ O << 's';
+ }
+}
+
+void ARMInstPrinter::printNoHashImmediate(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ O << MI->getOperand(OpNum).getImm();
+}
+
+void ARMInstPrinter::printPImmediate(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ O << "p" << MI->getOperand(OpNum).getImm();
+}
+
+void ARMInstPrinter::printCImmediate(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ O << "c" << MI->getOperand(OpNum).getImm();
+}
+
+void ARMInstPrinter::printCoprocOptionImm(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ O << "{" << MI->getOperand(OpNum).getImm() << "}";
+}
+
+void ARMInstPrinter::printPCLabel(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O) {
+ llvm_unreachable("Unhandled PC-relative pseudo-instruction!");
+}
+
+template <unsigned scale>
+void ARMInstPrinter::printAdrLabelOperand(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ const MCOperand &MO = MI->getOperand(OpNum);
+
+ if (MO.isExpr()) {
+ MO.getExpr()->print(O, &MAI);
+ return;
+ }
+
+ int32_t OffImm = (int32_t)MO.getImm() << scale;
+
+ O << markup("<imm:");
+ if (OffImm == INT32_MIN)
+ O << "#-0";
+ else if (OffImm < 0)
+ O << "#-" << -OffImm;
+ else
+ O << "#" << OffImm;
+ O << markup(">");
+}
+
+void ARMInstPrinter::printThumbS4ImmOperand(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ O << markup("<imm:") << "#" << formatImm(MI->getOperand(OpNum).getImm() * 4)
+ << markup(">");
+}
+
+void ARMInstPrinter::printThumbSRImm(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ unsigned Imm = MI->getOperand(OpNum).getImm();
+ O << markup("<imm:") << "#" << formatImm((Imm == 0 ? 32 : Imm))
+ << markup(">");
+}
+
+void ARMInstPrinter::printThumbITMask(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ // (3 - the number of trailing zeros) is the number of then / else.
+ unsigned Mask = MI->getOperand(OpNum).getImm();
+ unsigned Firstcond = MI->getOperand(OpNum - 1).getImm();
+ unsigned CondBit0 = Firstcond & 1;
+ unsigned NumTZ = countTrailingZeros(Mask);
+ assert(NumTZ <= 3 && "Invalid IT mask!");
+ for (unsigned Pos = 3, e = NumTZ; Pos > e; --Pos) {
+ bool T = ((Mask >> Pos) & 1) == CondBit0;
+ if (T)
+ O << 't';
+ else
+ O << 'e';
+ }
+}
+
+void ARMInstPrinter::printThumbAddrModeRROperand(const MCInst *MI, unsigned Op,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ const MCOperand &MO1 = MI->getOperand(Op);
+ const MCOperand &MO2 = MI->getOperand(Op + 1);
+
+ if (!MO1.isReg()) { // FIXME: This is for CP entries, but isn't right.
+ printOperand(MI, Op, STI, O);
+ return;
+ }
+
+ O << markup("<mem:") << "[";
+ printRegName(O, MO1.getReg());
+ if (unsigned RegNum = MO2.getReg()) {
+ O << ", ";
+ printRegName(O, RegNum);
+ }
+ O << "]" << markup(">");
+}
+
+void ARMInstPrinter::printThumbAddrModeImm5SOperand(const MCInst *MI,
+ unsigned Op,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O,
+ unsigned Scale) {
+ const MCOperand &MO1 = MI->getOperand(Op);
+ const MCOperand &MO2 = MI->getOperand(Op + 1);
+
+ if (!MO1.isReg()) { // FIXME: This is for CP entries, but isn't right.
+ printOperand(MI, Op, STI, O);
+ return;
+ }
+
+ O << markup("<mem:") << "[";
+ printRegName(O, MO1.getReg());
+ if (unsigned ImmOffs = MO2.getImm()) {
+ O << ", " << markup("<imm:") << "#" << formatImm(ImmOffs * Scale)
+ << markup(">");
+ }
+ O << "]" << markup(">");
+}
+
+void ARMInstPrinter::printThumbAddrModeImm5S1Operand(const MCInst *MI,
+ unsigned Op,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ printThumbAddrModeImm5SOperand(MI, Op, STI, O, 1);
+}
+
+void ARMInstPrinter::printThumbAddrModeImm5S2Operand(const MCInst *MI,
+ unsigned Op,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ printThumbAddrModeImm5SOperand(MI, Op, STI, O, 2);
+}
+
+void ARMInstPrinter::printThumbAddrModeImm5S4Operand(const MCInst *MI,
+ unsigned Op,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ printThumbAddrModeImm5SOperand(MI, Op, STI, O, 4);
+}
+
+void ARMInstPrinter::printThumbAddrModeSPOperand(const MCInst *MI, unsigned Op,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ printThumbAddrModeImm5SOperand(MI, Op, STI, O, 4);
+}
+
+// Constant shifts t2_so_reg is a 2-operand unit corresponding to the Thumb2
+// register with shift forms.
+// REG 0 0 - e.g. R5
+// REG IMM, SH_OPC - e.g. R5, LSL #3
+void ARMInstPrinter::printT2SOOperand(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ const MCOperand &MO1 = MI->getOperand(OpNum);
+ const MCOperand &MO2 = MI->getOperand(OpNum + 1);
+
+ unsigned Reg = MO1.getReg();
+ printRegName(O, Reg);
+
+ // Print the shift opc.
+ assert(MO2.isImm() && "Not a valid t2_so_reg value!");
+ printRegImmShift(O, ARM_AM::getSORegShOp(MO2.getImm()),
+ ARM_AM::getSORegOffset(MO2.getImm()), UseMarkup);
+}
+
+template <bool AlwaysPrintImm0>
+void ARMInstPrinter::printAddrModeImm12Operand(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ const MCOperand &MO1 = MI->getOperand(OpNum);
+ const MCOperand &MO2 = MI->getOperand(OpNum + 1);
+
+ if (!MO1.isReg()) { // FIXME: This is for CP entries, but isn't right.
+ printOperand(MI, OpNum, STI, O);
+ return;
+ }
+
+ O << markup("<mem:") << "[";
+ printRegName(O, MO1.getReg());
+
+ int32_t OffImm = (int32_t)MO2.getImm();
+ bool isSub = OffImm < 0;
+ // Special value for #-0. All others are normal.
+ if (OffImm == INT32_MIN)
+ OffImm = 0;
+ if (isSub) {
+ O << ", " << markup("<imm:") << "#-" << formatImm(-OffImm) << markup(">");
+ } else if (AlwaysPrintImm0 || OffImm > 0) {
+ O << ", " << markup("<imm:") << "#" << formatImm(OffImm) << markup(">");
+ }
+ O << "]" << markup(">");
+}
+
+template <bool AlwaysPrintImm0>
+void ARMInstPrinter::printT2AddrModeImm8Operand(const MCInst *MI,
+ unsigned OpNum,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ const MCOperand &MO1 = MI->getOperand(OpNum);
+ const MCOperand &MO2 = MI->getOperand(OpNum + 1);
+
+ O << markup("<mem:") << "[";
+ printRegName(O, MO1.getReg());
+
+ int32_t OffImm = (int32_t)MO2.getImm();
+ bool isSub = OffImm < 0;
+ // Don't print +0.
+ if (OffImm == INT32_MIN)
+ OffImm = 0;
+ if (isSub) {
+ O << ", " << markup("<imm:") << "#-" << -OffImm << markup(">");
+ } else if (AlwaysPrintImm0 || OffImm > 0) {
+ O << ", " << markup("<imm:") << "#" << OffImm << markup(">");
+ }
+ O << "]" << markup(">");
+}
+
+template <bool AlwaysPrintImm0>
+void ARMInstPrinter::printT2AddrModeImm8s4Operand(const MCInst *MI,
+ unsigned OpNum,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ const MCOperand &MO1 = MI->getOperand(OpNum);
+ const MCOperand &MO2 = MI->getOperand(OpNum + 1);
+
+ if (!MO1.isReg()) { // For label symbolic references.
+ printOperand(MI, OpNum, STI, O);
+ return;
+ }
+
+ O << markup("<mem:") << "[";
+ printRegName(O, MO1.getReg());
+
+ int32_t OffImm = (int32_t)MO2.getImm();
+ bool isSub = OffImm < 0;
+
+ assert(((OffImm & 0x3) == 0) && "Not a valid immediate!");
+
+ // Don't print +0.
+ if (OffImm == INT32_MIN)
+ OffImm = 0;
+ if (isSub) {
+ O << ", " << markup("<imm:") << "#-" << -OffImm << markup(">");
+ } else if (AlwaysPrintImm0 || OffImm > 0) {
+ O << ", " << markup("<imm:") << "#" << OffImm << markup(">");
+ }
+ O << "]" << markup(">");
+}
+
+void ARMInstPrinter::printT2AddrModeImm0_1020s4Operand(
+ const MCInst *MI, unsigned OpNum, const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ const MCOperand &MO1 = MI->getOperand(OpNum);
+ const MCOperand &MO2 = MI->getOperand(OpNum + 1);
+
+ O << markup("<mem:") << "[";
+ printRegName(O, MO1.getReg());
+ if (MO2.getImm()) {
+ O << ", " << markup("<imm:") << "#" << formatImm(MO2.getImm() * 4)
+ << markup(">");
+ }
+ O << "]" << markup(">");
+}
+
+void ARMInstPrinter::printT2AddrModeImm8OffsetOperand(
+ const MCInst *MI, unsigned OpNum, const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ const MCOperand &MO1 = MI->getOperand(OpNum);
+ int32_t OffImm = (int32_t)MO1.getImm();
+ O << ", " << markup("<imm:");
+ if (OffImm == INT32_MIN)
+ O << "#-0";
+ else if (OffImm < 0)
+ O << "#-" << -OffImm;
+ else
+ O << "#" << OffImm;
+ O << markup(">");
+}
+
+void ARMInstPrinter::printT2AddrModeImm8s4OffsetOperand(
+ const MCInst *MI, unsigned OpNum, const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ const MCOperand &MO1 = MI->getOperand(OpNum);
+ int32_t OffImm = (int32_t)MO1.getImm();
+
+ assert(((OffImm & 0x3) == 0) && "Not a valid immediate!");
+
+ O << ", " << markup("<imm:");
+ if (OffImm == INT32_MIN)
+ O << "#-0";
+ else if (OffImm < 0)
+ O << "#-" << -OffImm;
+ else
+ O << "#" << OffImm;
+ O << markup(">");
+}
+
+void ARMInstPrinter::printT2AddrModeSoRegOperand(const MCInst *MI,
+ unsigned OpNum,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ const MCOperand &MO1 = MI->getOperand(OpNum);
+ const MCOperand &MO2 = MI->getOperand(OpNum + 1);
+ const MCOperand &MO3 = MI->getOperand(OpNum + 2);
+
+ O << markup("<mem:") << "[";
+ printRegName(O, MO1.getReg());
+
+ assert(MO2.getReg() && "Invalid so_reg load / store address!");
+ O << ", ";
+ printRegName(O, MO2.getReg());
+
+ unsigned ShAmt = MO3.getImm();
+ if (ShAmt) {
+ assert(ShAmt <= 3 && "Not a valid Thumb2 addressing mode!");
+ O << ", lsl " << markup("<imm:") << "#" << ShAmt << markup(">");
+ }
+ O << "]" << markup(">");
+}
+
+void ARMInstPrinter::printFPImmOperand(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ const MCOperand &MO = MI->getOperand(OpNum);
+ O << markup("<imm:") << '#' << ARM_AM::getFPImmFloat(MO.getImm())
+ << markup(">");
+}
+
+void ARMInstPrinter::printNEONModImmOperand(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ unsigned EncodedImm = MI->getOperand(OpNum).getImm();
+ unsigned EltBits;
+ uint64_t Val = ARM_AM::decodeNEONModImm(EncodedImm, EltBits);
+ O << markup("<imm:") << "#0x";
+ O.write_hex(Val);
+ O << markup(">");
+}
+
+void ARMInstPrinter::printImmPlusOneOperand(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ unsigned Imm = MI->getOperand(OpNum).getImm();
+ O << markup("<imm:") << "#" << formatImm(Imm + 1) << markup(">");
+}
+
+void ARMInstPrinter::printRotImmOperand(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ unsigned Imm = MI->getOperand(OpNum).getImm();
+ if (Imm == 0)
+ return;
+ assert(Imm <= 3 && "illegal ror immediate!");
+ O << ", ror " << markup("<imm:") << "#" << 8 * Imm << markup(">");
+}
+
+void ARMInstPrinter::printModImmOperand(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ MCOperand Op = MI->getOperand(OpNum);
+
+ // Support for fixups (MCFixup)
+ if (Op.isExpr())
+ return printOperand(MI, OpNum, STI, O);
+
+ unsigned Bits = Op.getImm() & 0xFF;
+ unsigned Rot = (Op.getImm() & 0xF00) >> 7;
+
+ bool PrintUnsigned = false;
+ switch (MI->getOpcode()) {
+ case ARM::MOVi:
+ // Movs to PC should be treated unsigned
+ PrintUnsigned = (MI->getOperand(OpNum - 1).getReg() == ARM::PC);
+ break;
+ case ARM::MSRi:
+ // Movs to special registers should be treated unsigned
+ PrintUnsigned = true;
+ break;
+ }
+
+ int32_t Rotated = ARM_AM::rotr32(Bits, Rot);
+ if (ARM_AM::getSOImmVal(Rotated) == Op.getImm()) {
+ // #rot has the least possible value
+ O << "#" << markup("<imm:");
+ if (PrintUnsigned)
+ O << static_cast<uint32_t>(Rotated);
+ else
+ O << Rotated;
+ O << markup(">");
+ return;
+ }
+
+ // Explicit #bits, #rot implied
+ O << "#" << markup("<imm:") << Bits << markup(">") << ", #" << markup("<imm:")
+ << Rot << markup(">");
+}
+
+void ARMInstPrinter::printFBits16(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O) {
+ O << markup("<imm:") << "#" << 16 - MI->getOperand(OpNum).getImm()
+ << markup(">");
+}
+
+void ARMInstPrinter::printFBits32(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O) {
+ O << markup("<imm:") << "#" << 32 - MI->getOperand(OpNum).getImm()
+ << markup(">");
+}
+
+void ARMInstPrinter::printVectorIndex(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ O << "[" << MI->getOperand(OpNum).getImm() << "]";
+}
+
+void ARMInstPrinter::printVectorListOne(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ O << "{";
+ printRegName(O, MI->getOperand(OpNum).getReg());
+ O << "}";
+}
+
+void ARMInstPrinter::printVectorListTwo(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ unsigned Reg = MI->getOperand(OpNum).getReg();
+ unsigned Reg0 = MRI.getSubReg(Reg, ARM::dsub_0);
+ unsigned Reg1 = MRI.getSubReg(Reg, ARM::dsub_1);
+ O << "{";
+ printRegName(O, Reg0);
+ O << ", ";
+ printRegName(O, Reg1);
+ O << "}";
+}
+
+void ARMInstPrinter::printVectorListTwoSpaced(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ unsigned Reg = MI->getOperand(OpNum).getReg();
+ unsigned Reg0 = MRI.getSubReg(Reg, ARM::dsub_0);
+ unsigned Reg1 = MRI.getSubReg(Reg, ARM::dsub_2);
+ O << "{";
+ printRegName(O, Reg0);
+ O << ", ";
+ printRegName(O, Reg1);
+ O << "}";
+}
+
+void ARMInstPrinter::printVectorListThree(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ // Normally, it's not safe to use register enum values directly with
+ // addition to get the next register, but for VFP registers, the
+ // sort order is guaranteed because they're all of the form D<n>.
+ O << "{";
+ printRegName(O, MI->getOperand(OpNum).getReg());
+ O << ", ";
+ printRegName(O, MI->getOperand(OpNum).getReg() + 1);
+ O << ", ";
+ printRegName(O, MI->getOperand(OpNum).getReg() + 2);
+ O << "}";
+}
+
+void ARMInstPrinter::printVectorListFour(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ // Normally, it's not safe to use register enum values directly with
+ // addition to get the next register, but for VFP registers, the
+ // sort order is guaranteed because they're all of the form D<n>.
+ O << "{";
+ printRegName(O, MI->getOperand(OpNum).getReg());
+ O << ", ";
+ printRegName(O, MI->getOperand(OpNum).getReg() + 1);
+ O << ", ";
+ printRegName(O, MI->getOperand(OpNum).getReg() + 2);
+ O << ", ";
+ printRegName(O, MI->getOperand(OpNum).getReg() + 3);
+ O << "}";
+}
+
+void ARMInstPrinter::printVectorListOneAllLanes(const MCInst *MI,
+ unsigned OpNum,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ O << "{";
+ printRegName(O, MI->getOperand(OpNum).getReg());
+ O << "[]}";
+}
+
+void ARMInstPrinter::printVectorListTwoAllLanes(const MCInst *MI,
+ unsigned OpNum,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ unsigned Reg = MI->getOperand(OpNum).getReg();
+ unsigned Reg0 = MRI.getSubReg(Reg, ARM::dsub_0);
+ unsigned Reg1 = MRI.getSubReg(Reg, ARM::dsub_1);
+ O << "{";
+ printRegName(O, Reg0);
+ O << "[], ";
+ printRegName(O, Reg1);
+ O << "[]}";
+}
+
+void ARMInstPrinter::printVectorListThreeAllLanes(const MCInst *MI,
+ unsigned OpNum,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ // Normally, it's not safe to use register enum values directly with
+ // addition to get the next register, but for VFP registers, the
+ // sort order is guaranteed because they're all of the form D<n>.
+ O << "{";
+ printRegName(O, MI->getOperand(OpNum).getReg());
+ O << "[], ";
+ printRegName(O, MI->getOperand(OpNum).getReg() + 1);
+ O << "[], ";
+ printRegName(O, MI->getOperand(OpNum).getReg() + 2);
+ O << "[]}";
+}
+
+void ARMInstPrinter::printVectorListFourAllLanes(const MCInst *MI,
+ unsigned OpNum,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ // Normally, it's not safe to use register enum values directly with
+ // addition to get the next register, but for VFP registers, the
+ // sort order is guaranteed because they're all of the form D<n>.
+ O << "{";
+ printRegName(O, MI->getOperand(OpNum).getReg());
+ O << "[], ";
+ printRegName(O, MI->getOperand(OpNum).getReg() + 1);
+ O << "[], ";
+ printRegName(O, MI->getOperand(OpNum).getReg() + 2);
+ O << "[], ";
+ printRegName(O, MI->getOperand(OpNum).getReg() + 3);
+ O << "[]}";
+}
+
+void ARMInstPrinter::printVectorListTwoSpacedAllLanes(
+ const MCInst *MI, unsigned OpNum, const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ unsigned Reg = MI->getOperand(OpNum).getReg();
+ unsigned Reg0 = MRI.getSubReg(Reg, ARM::dsub_0);
+ unsigned Reg1 = MRI.getSubReg(Reg, ARM::dsub_2);
+ O << "{";
+ printRegName(O, Reg0);
+ O << "[], ";
+ printRegName(O, Reg1);
+ O << "[]}";
+}
+
+void ARMInstPrinter::printVectorListThreeSpacedAllLanes(
+ const MCInst *MI, unsigned OpNum, const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ // Normally, it's not safe to use register enum values directly with
+ // addition to get the next register, but for VFP registers, the
+ // sort order is guaranteed because they're all of the form D<n>.
+ O << "{";
+ printRegName(O, MI->getOperand(OpNum).getReg());
+ O << "[], ";
+ printRegName(O, MI->getOperand(OpNum).getReg() + 2);
+ O << "[], ";
+ printRegName(O, MI->getOperand(OpNum).getReg() + 4);
+ O << "[]}";
+}
+
+void ARMInstPrinter::printVectorListFourSpacedAllLanes(
+ const MCInst *MI, unsigned OpNum, const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ // Normally, it's not safe to use register enum values directly with
+ // addition to get the next register, but for VFP registers, the
+ // sort order is guaranteed because they're all of the form D<n>.
+ O << "{";
+ printRegName(O, MI->getOperand(OpNum).getReg());
+ O << "[], ";
+ printRegName(O, MI->getOperand(OpNum).getReg() + 2);
+ O << "[], ";
+ printRegName(O, MI->getOperand(OpNum).getReg() + 4);
+ O << "[], ";
+ printRegName(O, MI->getOperand(OpNum).getReg() + 6);
+ O << "[]}";
+}
+
+void ARMInstPrinter::printVectorListThreeSpaced(const MCInst *MI,
+ unsigned OpNum,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ // Normally, it's not safe to use register enum values directly with
+ // addition to get the next register, but for VFP registers, the
+ // sort order is guaranteed because they're all of the form D<n>.
+ O << "{";
+ printRegName(O, MI->getOperand(OpNum).getReg());
+ O << ", ";
+ printRegName(O, MI->getOperand(OpNum).getReg() + 2);
+ O << ", ";
+ printRegName(O, MI->getOperand(OpNum).getReg() + 4);
+ O << "}";
+}
+
+void ARMInstPrinter::printVectorListFourSpaced(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ // Normally, it's not safe to use register enum values directly with
+ // addition to get the next register, but for VFP registers, the
+ // sort order is guaranteed because they're all of the form D<n>.
+ O << "{";
+ printRegName(O, MI->getOperand(OpNum).getReg());
+ O << ", ";
+ printRegName(O, MI->getOperand(OpNum).getReg() + 2);
+ O << ", ";
+ printRegName(O, MI->getOperand(OpNum).getReg() + 4);
+ O << ", ";
+ printRegName(O, MI->getOperand(OpNum).getReg() + 6);
+ O << "}";
+}
diff --git a/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.h b/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.h
new file mode 100644
index 0000000..52f7115
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.h
@@ -0,0 +1,229 @@
+//===- ARMInstPrinter.h - Convert ARM MCInst to assembly syntax -*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class prints an ARM MCInst to a .s file.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_ARM_INSTPRINTER_ARMINSTPRINTER_H
+#define LLVM_LIB_TARGET_ARM_INSTPRINTER_ARMINSTPRINTER_H
+
+#include "llvm/MC/MCInstPrinter.h"
+
+namespace llvm {
+
+class ARMInstPrinter : public MCInstPrinter {
+public:
+ ARMInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
+ const MCRegisterInfo &MRI);
+
+ void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot,
+ const MCSubtargetInfo &STI) override;
+ void printRegName(raw_ostream &OS, unsigned RegNo) const override;
+
+ // Autogenerated by tblgen.
+ void printInstruction(const MCInst *MI, const MCSubtargetInfo &STI,
+ raw_ostream &O);
+ static const char *getRegisterName(unsigned RegNo);
+
+ void printOperand(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
+ raw_ostream &O);
+
+ void printSORegRegOperand(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printSORegImmOperand(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+
+ void printAddrModeTBB(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printAddrModeTBH(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printAddrMode2Operand(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printAM2PostIndexOp(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printAM2PreOrOffsetIndexOp(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printAddrMode2OffsetOperand(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ template <bool AlwaysPrintImm0>
+ void printAddrMode3Operand(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printAddrMode3OffsetOperand(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printAM3PreOrOffsetIndexOp(const MCInst *MI, unsigned Op, raw_ostream &O,
+ bool AlwaysPrintImm0);
+ void printPostIdxImm8Operand(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printPostIdxRegOperand(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printPostIdxImm8s4Operand(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+
+ void printLdStmModeOperand(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ template <bool AlwaysPrintImm0>
+ void printAddrMode5Operand(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printAddrMode6Operand(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printAddrMode7Operand(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printAddrMode6OffsetOperand(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+
+ void printBitfieldInvMaskImmOperand(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O);
+ void printMemBOption(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printInstSyncBOption(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printShiftImmOperand(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printPKHLSLShiftImm(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printPKHASRShiftImm(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+
+ template <unsigned scale>
+ void printAdrLabelOperand(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printThumbS4ImmOperand(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printThumbSRImm(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printThumbITMask(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printThumbAddrModeRROperand(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printThumbAddrModeImm5SOperand(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O, unsigned Scale);
+ void printThumbAddrModeImm5S1Operand(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O);
+ void printThumbAddrModeImm5S2Operand(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O);
+ void printThumbAddrModeImm5S4Operand(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O);
+ void printThumbAddrModeSPOperand(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+
+ void printT2SOOperand(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ template <bool AlwaysPrintImm0>
+ void printAddrModeImm12Operand(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ template <bool AlwaysPrintImm0>
+ void printT2AddrModeImm8Operand(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ template <bool AlwaysPrintImm0>
+ void printT2AddrModeImm8s4Operand(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printT2AddrModeImm0_1020s4Operand(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O);
+ void printT2AddrModeImm8OffsetOperand(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O);
+ void printT2AddrModeImm8s4OffsetOperand(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O);
+ void printT2AddrModeSoRegOperand(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+
+ void printSetendOperand(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printCPSIMod(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printCPSIFlag(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printMSRMaskOperand(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printBankedRegOperand(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printPredicateOperand(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printMandatoryPredicateOperand(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O);
+ void printSBitModifierOperand(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printRegisterList(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printNoHashImmediate(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printPImmediate(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printCImmediate(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printCoprocOptionImm(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printFPImmOperand(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printNEONModImmOperand(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printImmPlusOneOperand(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printRotImmOperand(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printModImmOperand(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printGPRPairOperand(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+
+ void printPCLabel(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printThumbLdrLabelOperand(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printFBits16(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printFBits32(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printVectorIndex(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printVectorListOne(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printVectorListTwo(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printVectorListTwoSpaced(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printVectorListThree(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printVectorListFour(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printVectorListOneAllLanes(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printVectorListTwoAllLanes(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printVectorListThreeAllLanes(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printVectorListFourAllLanes(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printVectorListTwoSpacedAllLanes(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O);
+ void printVectorListThreeSpacedAllLanes(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O);
+ void printVectorListFourSpacedAllLanes(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O);
+ void printVectorListThreeSpaced(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printVectorListFourSpaced(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/ARM/LICENSE.TXT b/contrib/llvm/lib/Target/ARM/LICENSE.TXT
new file mode 100755
index 0000000..68afea1
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/LICENSE.TXT
@@ -0,0 +1,47 @@
+ARM Limited
+
+Software Grant License Agreement ("Agreement")
+
+Except for the license granted herein to you, ARM Limited ("ARM") reserves all
+right, title, and interest in and to the Software (defined below).
+
+Definition
+
+"Software" means the code and documentation as well as any original work of
+authorship, including any modifications or additions to an existing work, that
+is intentionally submitted by ARM to llvm.org (http://llvm.org) ("LLVM") for
+inclusion in, or documentation of, any of the products owned or managed by LLVM
+(the "Work"). For the purposes of this definition, "submitted" means any form of
+electronic, verbal, or written communication sent to LLVM or its
+representatives, including but not limited to communication on electronic
+mailing lists, source code control systems, and issue tracking systems that are
+managed by, or on behalf of, LLVM for the purpose of discussing and improving
+the Work, but excluding communication that is conspicuously marked otherwise.
+
+1. Grant of Copyright License. Subject to the terms and conditions of this
+ Agreement, ARM hereby grants to you and to recipients of the Software
+ distributed by LLVM a perpetual, worldwide, non-exclusive, no-charge,
+ royalty-free, irrevocable copyright license to reproduce, prepare derivative
+ works of, publicly display, publicly perform, sublicense, and distribute the
+ Software and such derivative works.
+
+2. Grant of Patent License. Subject to the terms and conditions of this
+ Agreement, ARM hereby grants you and to recipients of the Software
+ distributed by LLVM a perpetual, worldwide, non-exclusive, no-charge,
+ royalty-free, irrevocable (except as stated in this section) patent license
+ to make, have made, use, offer to sell, sell, import, and otherwise transfer
+ the Work, where such license applies only to those patent claims licensable
+ by ARM that are necessarily infringed by ARM's Software alone or by
+ combination of the Software with the Work to which such Software was
+ submitted. If any entity institutes patent litigation against ARM or any
+ other entity (including a cross-claim or counterclaim in a lawsuit) alleging
+ that ARM's Software, or the Work to which ARM has contributed constitutes
+ direct or contributory patent infringement, then any patent licenses granted
+ to that entity under this Agreement for the Software or Work shall terminate
+ as of the date such litigation is filed.
+
+Unless required by applicable law or agreed to in writing, the software is
+provided on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
+either express or implied, including, without limitation, any warranties or
+conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+PARTICULAR PURPOSE.
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h
new file mode 100644
index 0000000..b03cada
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h
@@ -0,0 +1,713 @@
+//===-- ARMAddressingModes.h - ARM Addressing Modes -------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the ARM addressing mode implementation stuff.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_ARM_MCTARGETDESC_ARMADDRESSINGMODES_H
+#define LLVM_LIB_TARGET_ARM_MCTARGETDESC_ARMADDRESSINGMODES_H
+
+#include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include <cassert>
+
+namespace llvm {
+
+/// ARM_AM - ARM Addressing Mode Stuff
+namespace ARM_AM {
+ enum ShiftOpc {
+ no_shift = 0,
+ asr,
+ lsl,
+ lsr,
+ ror,
+ rrx
+ };
+
+ enum AddrOpc {
+ sub = 0,
+ add
+ };
+
+ static inline const char *getAddrOpcStr(AddrOpc Op) {
+ return Op == sub ? "-" : "";
+ }
+
+ static inline const char *getShiftOpcStr(ShiftOpc Op) {
+ switch (Op) {
+ default: llvm_unreachable("Unknown shift opc!");
+ case ARM_AM::asr: return "asr";
+ case ARM_AM::lsl: return "lsl";
+ case ARM_AM::lsr: return "lsr";
+ case ARM_AM::ror: return "ror";
+ case ARM_AM::rrx: return "rrx";
+ }
+ }
+
+ static inline unsigned getShiftOpcEncoding(ShiftOpc Op) {
+ switch (Op) {
+ default: llvm_unreachable("Unknown shift opc!");
+ case ARM_AM::asr: return 2;
+ case ARM_AM::lsl: return 0;
+ case ARM_AM::lsr: return 1;
+ case ARM_AM::ror: return 3;
+ }
+ }
+
+ enum AMSubMode {
+ bad_am_submode = 0,
+ ia,
+ ib,
+ da,
+ db
+ };
+
+ static inline const char *getAMSubModeStr(AMSubMode Mode) {
+ switch (Mode) {
+ default: llvm_unreachable("Unknown addressing sub-mode!");
+ case ARM_AM::ia: return "ia";
+ case ARM_AM::ib: return "ib";
+ case ARM_AM::da: return "da";
+ case ARM_AM::db: return "db";
+ }
+ }
+
+ /// rotr32 - Rotate a 32-bit unsigned value right by a specified # bits.
+ ///
+ static inline unsigned rotr32(unsigned Val, unsigned Amt) {
+ assert(Amt < 32 && "Invalid rotate amount");
+ return (Val >> Amt) | (Val << ((32-Amt)&31));
+ }
+
+ /// rotl32 - Rotate a 32-bit unsigned value left by a specified # bits.
+ ///
+ static inline unsigned rotl32(unsigned Val, unsigned Amt) {
+ assert(Amt < 32 && "Invalid rotate amount");
+ return (Val << Amt) | (Val >> ((32-Amt)&31));
+ }
+
+ //===--------------------------------------------------------------------===//
+ // Addressing Mode #1: shift_operand with registers
+ //===--------------------------------------------------------------------===//
+ //
+ // This 'addressing mode' is used for arithmetic instructions. It can
+ // represent things like:
+ // reg
+ // reg [asr|lsl|lsr|ror|rrx] reg
+ // reg [asr|lsl|lsr|ror|rrx] imm
+ //
+ // This is stored three operands [rega, regb, opc]. The first is the base
+ // reg, the second is the shift amount (or reg0 if not present or imm). The
+ // third operand encodes the shift opcode and the imm if a reg isn't present.
+ //
+ static inline unsigned getSORegOpc(ShiftOpc ShOp, unsigned Imm) {
+ return ShOp | (Imm << 3);
+ }
+ static inline unsigned getSORegOffset(unsigned Op) {
+ return Op >> 3;
+ }
+ static inline ShiftOpc getSORegShOp(unsigned Op) {
+ return (ShiftOpc)(Op & 7);
+ }
+
+ /// getSOImmValImm - Given an encoded imm field for the reg/imm form, return
+ /// the 8-bit imm value.
+ static inline unsigned getSOImmValImm(unsigned Imm) {
+ return Imm & 0xFF;
+ }
+ /// getSOImmValRot - Given an encoded imm field for the reg/imm form, return
+ /// the rotate amount.
+ static inline unsigned getSOImmValRot(unsigned Imm) {
+ return (Imm >> 8) * 2;
+ }
+
+ /// getSOImmValRotate - Try to handle Imm with an immediate shifter operand,
+ /// computing the rotate amount to use. If this immediate value cannot be
+ /// handled with a single shifter-op, determine a good rotate amount that will
+ /// take a maximal chunk of bits out of the immediate.
+ static inline unsigned getSOImmValRotate(unsigned Imm) {
+ // 8-bit (or less) immediates are trivially shifter_operands with a rotate
+ // of zero.
+ if ((Imm & ~255U) == 0) return 0;
+
+ // Use CTZ to compute the rotate amount.
+ unsigned TZ = countTrailingZeros(Imm);
+
+ // Rotate amount must be even. Something like 0x200 must be rotated 8 bits,
+ // not 9.
+ unsigned RotAmt = TZ & ~1;
+
+ // If we can handle this spread, return it.
+ if ((rotr32(Imm, RotAmt) & ~255U) == 0)
+ return (32-RotAmt)&31; // HW rotates right, not left.
+
+ // For values like 0xF000000F, we should ignore the low 6 bits, then
+ // retry the hunt.
+ if (Imm & 63U) {
+ unsigned TZ2 = countTrailingZeros(Imm & ~63U);
+ unsigned RotAmt2 = TZ2 & ~1;
+ if ((rotr32(Imm, RotAmt2) & ~255U) == 0)
+ return (32-RotAmt2)&31; // HW rotates right, not left.
+ }
+
+ // Otherwise, we have no way to cover this span of bits with a single
+ // shifter_op immediate. Return a chunk of bits that will be useful to
+ // handle.
+ return (32-RotAmt)&31; // HW rotates right, not left.
+ }
+
+ /// getSOImmVal - Given a 32-bit immediate, if it is something that can fit
+ /// into an shifter_operand immediate operand, return the 12-bit encoding for
+ /// it. If not, return -1.
+ static inline int getSOImmVal(unsigned Arg) {
+ // 8-bit (or less) immediates are trivially shifter_operands with a rotate
+ // of zero.
+ if ((Arg & ~255U) == 0) return Arg;
+
+ unsigned RotAmt = getSOImmValRotate(Arg);
+
+ // If this cannot be handled with a single shifter_op, bail out.
+ if (rotr32(~255U, RotAmt) & Arg)
+ return -1;
+
+ // Encode this correctly.
+ return rotl32(Arg, RotAmt) | ((RotAmt>>1) << 8);
+ }
+
+ /// isSOImmTwoPartVal - Return true if the specified value can be obtained by
+ /// or'ing together two SOImmVal's.
+ static inline bool isSOImmTwoPartVal(unsigned V) {
+ // If this can be handled with a single shifter_op, bail out.
+ V = rotr32(~255U, getSOImmValRotate(V)) & V;
+ if (V == 0)
+ return false;
+
+ // If this can be handled with two shifter_op's, accept.
+ V = rotr32(~255U, getSOImmValRotate(V)) & V;
+ return V == 0;
+ }
+
+ /// getSOImmTwoPartFirst - If V is a value that satisfies isSOImmTwoPartVal,
+ /// return the first chunk of it.
+ static inline unsigned getSOImmTwoPartFirst(unsigned V) {
+ return rotr32(255U, getSOImmValRotate(V)) & V;
+ }
+
+ /// getSOImmTwoPartSecond - If V is a value that satisfies isSOImmTwoPartVal,
+ /// return the second chunk of it.
+ static inline unsigned getSOImmTwoPartSecond(unsigned V) {
+ // Mask out the first hunk.
+ V = rotr32(~255U, getSOImmValRotate(V)) & V;
+
+ // Take what's left.
+ assert(V == (rotr32(255U, getSOImmValRotate(V)) & V));
+ return V;
+ }
+
+ /// getThumbImmValShift - Try to handle Imm with a 8-bit immediate followed
+ /// by a left shift. Returns the shift amount to use.
+ static inline unsigned getThumbImmValShift(unsigned Imm) {
+ // 8-bit (or less) immediates are trivially immediate operand with a shift
+ // of zero.
+ if ((Imm & ~255U) == 0) return 0;
+
+ // Use CTZ to compute the shift amount.
+ return countTrailingZeros(Imm);
+ }
+
+ /// isThumbImmShiftedVal - Return true if the specified value can be obtained
+ /// by left shifting a 8-bit immediate.
+ static inline bool isThumbImmShiftedVal(unsigned V) {
+ // If this can be handled with
+ V = (~255U << getThumbImmValShift(V)) & V;
+ return V == 0;
+ }
+
+ /// getThumbImm16ValShift - Try to handle Imm with a 16-bit immediate followed
+ /// by a left shift. Returns the shift amount to use.
+ static inline unsigned getThumbImm16ValShift(unsigned Imm) {
+ // 16-bit (or less) immediates are trivially immediate operand with a shift
+ // of zero.
+ if ((Imm & ~65535U) == 0) return 0;
+
+ // Use CTZ to compute the shift amount.
+ return countTrailingZeros(Imm);
+ }
+
+ /// isThumbImm16ShiftedVal - Return true if the specified value can be
+ /// obtained by left shifting a 16-bit immediate.
+ static inline bool isThumbImm16ShiftedVal(unsigned V) {
+ // If this can be handled with
+ V = (~65535U << getThumbImm16ValShift(V)) & V;
+ return V == 0;
+ }
+
+ /// getThumbImmNonShiftedVal - If V is a value that satisfies
+ /// isThumbImmShiftedVal, return the non-shiftd value.
+ static inline unsigned getThumbImmNonShiftedVal(unsigned V) {
+ return V >> getThumbImmValShift(V);
+ }
+
+
+ /// getT2SOImmValSplat - Return the 12-bit encoded representation
+ /// if the specified value can be obtained by splatting the low 8 bits
+ /// into every other byte or every byte of a 32-bit value. i.e.,
+ /// 00000000 00000000 00000000 abcdefgh control = 0
+ /// 00000000 abcdefgh 00000000 abcdefgh control = 1
+ /// abcdefgh 00000000 abcdefgh 00000000 control = 2
+ /// abcdefgh abcdefgh abcdefgh abcdefgh control = 3
+ /// Return -1 if none of the above apply.
+ /// See ARM Reference Manual A6.3.2.
+ static inline int getT2SOImmValSplatVal(unsigned V) {
+ unsigned u, Vs, Imm;
+ // control = 0
+ if ((V & 0xffffff00) == 0)
+ return V;
+
+ // If the value is zeroes in the first byte, just shift those off
+ Vs = ((V & 0xff) == 0) ? V >> 8 : V;
+ // Any passing value only has 8 bits of payload, splatted across the word
+ Imm = Vs & 0xff;
+ // Likewise, any passing values have the payload splatted into the 3rd byte
+ u = Imm | (Imm << 16);
+
+ // control = 1 or 2
+ if (Vs == u)
+ return (((Vs == V) ? 1 : 2) << 8) | Imm;
+
+ // control = 3
+ if (Vs == (u | (u << 8)))
+ return (3 << 8) | Imm;
+
+ return -1;
+ }
+
+ /// getT2SOImmValRotateVal - Return the 12-bit encoded representation if the
+ /// specified value is a rotated 8-bit value. Return -1 if no rotation
+ /// encoding is possible.
+ /// See ARM Reference Manual A6.3.2.
+ static inline int getT2SOImmValRotateVal(unsigned V) {
+ unsigned RotAmt = countLeadingZeros(V);
+ if (RotAmt >= 24)
+ return -1;
+
+ // If 'Arg' can be handled with a single shifter_op return the value.
+ if ((rotr32(0xff000000U, RotAmt) & V) == V)
+ return (rotr32(V, 24 - RotAmt) & 0x7f) | ((RotAmt + 8) << 7);
+
+ return -1;
+ }
+
+ /// getT2SOImmVal - Given a 32-bit immediate, if it is something that can fit
+ /// into a Thumb-2 shifter_operand immediate operand, return the 12-bit
+ /// encoding for it. If not, return -1.
+ /// See ARM Reference Manual A6.3.2.
+ static inline int getT2SOImmVal(unsigned Arg) {
+ // If 'Arg' is an 8-bit splat, then get the encoded value.
+ int Splat = getT2SOImmValSplatVal(Arg);
+ if (Splat != -1)
+ return Splat;
+
+ // If 'Arg' can be handled with a single shifter_op return the value.
+ int Rot = getT2SOImmValRotateVal(Arg);
+ if (Rot != -1)
+ return Rot;
+
+ return -1;
+ }
+
+ static inline unsigned getT2SOImmValRotate(unsigned V) {
+ if ((V & ~255U) == 0) return 0;
+ // Use CTZ to compute the rotate amount.
+ unsigned RotAmt = countTrailingZeros(V);
+ return (32 - RotAmt) & 31;
+ }
+
+ static inline bool isT2SOImmTwoPartVal (unsigned Imm) {
+ unsigned V = Imm;
+ // Passing values can be any combination of splat values and shifter
+ // values. If this can be handled with a single shifter or splat, bail
+ // out. Those should be handled directly, not with a two-part val.
+ if (getT2SOImmValSplatVal(V) != -1)
+ return false;
+ V = rotr32 (~255U, getT2SOImmValRotate(V)) & V;
+ if (V == 0)
+ return false;
+
+ // If this can be handled as an immediate, accept.
+ if (getT2SOImmVal(V) != -1) return true;
+
+ // Likewise, try masking out a splat value first.
+ V = Imm;
+ if (getT2SOImmValSplatVal(V & 0xff00ff00U) != -1)
+ V &= ~0xff00ff00U;
+ else if (getT2SOImmValSplatVal(V & 0x00ff00ffU) != -1)
+ V &= ~0x00ff00ffU;
+ // If what's left can be handled as an immediate, accept.
+ if (getT2SOImmVal(V) != -1) return true;
+
+ // Otherwise, do not accept.
+ return false;
+ }
+
+ static inline unsigned getT2SOImmTwoPartFirst(unsigned Imm) {
+ assert (isT2SOImmTwoPartVal(Imm) &&
+ "Immedate cannot be encoded as two part immediate!");
+ // Try a shifter operand as one part
+ unsigned V = rotr32 (~255, getT2SOImmValRotate(Imm)) & Imm;
+ // If the rest is encodable as an immediate, then return it.
+ if (getT2SOImmVal(V) != -1) return V;
+
+ // Try masking out a splat value first.
+ if (getT2SOImmValSplatVal(Imm & 0xff00ff00U) != -1)
+ return Imm & 0xff00ff00U;
+
+ // The other splat is all that's left as an option.
+ assert (getT2SOImmValSplatVal(Imm & 0x00ff00ffU) != -1);
+ return Imm & 0x00ff00ffU;
+ }
+
+ static inline unsigned getT2SOImmTwoPartSecond(unsigned Imm) {
+ // Mask out the first hunk
+ Imm ^= getT2SOImmTwoPartFirst(Imm);
+ // Return what's left
+ assert (getT2SOImmVal(Imm) != -1 &&
+ "Unable to encode second part of T2 two part SO immediate");
+ return Imm;
+ }
+
+
+ //===--------------------------------------------------------------------===//
+ // Addressing Mode #2
+ //===--------------------------------------------------------------------===//
+ //
+ // This is used for most simple load/store instructions.
+ //
+ // addrmode2 := reg +/- reg shop imm
+ // addrmode2 := reg +/- imm12
+ //
+ // The first operand is always a Reg. The second operand is a reg if in
+ // reg/reg form, otherwise it's reg#0. The third field encodes the operation
+ // in bit 12, the immediate in bits 0-11, and the shift op in 13-15. The
+ // fourth operand 16-17 encodes the index mode.
+ //
+ // If this addressing mode is a frame index (before prolog/epilog insertion
+ // and code rewriting), this operand will have the form: FI#, reg0, <offs>
+ // with no shift amount for the frame offset.
+ //
+ static inline unsigned getAM2Opc(AddrOpc Opc, unsigned Imm12, ShiftOpc SO,
+ unsigned IdxMode = 0) {
+ assert(Imm12 < (1 << 12) && "Imm too large!");
+ bool isSub = Opc == sub;
+ return Imm12 | ((int)isSub << 12) | (SO << 13) | (IdxMode << 16) ;
+ }
+ static inline unsigned getAM2Offset(unsigned AM2Opc) {
+ return AM2Opc & ((1 << 12)-1);
+ }
+ static inline AddrOpc getAM2Op(unsigned AM2Opc) {
+ return ((AM2Opc >> 12) & 1) ? sub : add;
+ }
+ static inline ShiftOpc getAM2ShiftOpc(unsigned AM2Opc) {
+ return (ShiftOpc)((AM2Opc >> 13) & 7);
+ }
+ static inline unsigned getAM2IdxMode(unsigned AM2Opc) {
+ return (AM2Opc >> 16);
+ }
+
+
+ //===--------------------------------------------------------------------===//
+ // Addressing Mode #3
+ //===--------------------------------------------------------------------===//
+ //
+ // This is used for sign-extending loads, and load/store-pair instructions.
+ //
+ // addrmode3 := reg +/- reg
+ // addrmode3 := reg +/- imm8
+ //
+ // The first operand is always a Reg. The second operand is a reg if in
+ // reg/reg form, otherwise it's reg#0. The third field encodes the operation
+ // in bit 8, the immediate in bits 0-7. The fourth operand 9-10 encodes the
+ // index mode.
+
+ /// getAM3Opc - This function encodes the addrmode3 opc field.
+ static inline unsigned getAM3Opc(AddrOpc Opc, unsigned char Offset,
+ unsigned IdxMode = 0) {
+ bool isSub = Opc == sub;
+ return ((int)isSub << 8) | Offset | (IdxMode << 9);
+ }
+ static inline unsigned char getAM3Offset(unsigned AM3Opc) {
+ return AM3Opc & 0xFF;
+ }
+ static inline AddrOpc getAM3Op(unsigned AM3Opc) {
+ return ((AM3Opc >> 8) & 1) ? sub : add;
+ }
+ static inline unsigned getAM3IdxMode(unsigned AM3Opc) {
+ return (AM3Opc >> 9);
+ }
+
+ //===--------------------------------------------------------------------===//
+ // Addressing Mode #4
+ //===--------------------------------------------------------------------===//
+ //
+ // This is used for load / store multiple instructions.
+ //
+ // addrmode4 := reg, <mode>
+ //
+ // The four modes are:
+ // IA - Increment after
+ // IB - Increment before
+ // DA - Decrement after
+ // DB - Decrement before
+ // For VFP instructions, only the IA and DB modes are valid.
+
+ static inline AMSubMode getAM4SubMode(unsigned Mode) {
+ return (AMSubMode)(Mode & 0x7);
+ }
+
+ static inline unsigned getAM4ModeImm(AMSubMode SubMode) {
+ return (int)SubMode;
+ }
+
+ //===--------------------------------------------------------------------===//
+ // Addressing Mode #5
+ //===--------------------------------------------------------------------===//
+ //
+ // This is used for coprocessor instructions, such as FP load/stores.
+ //
+ // addrmode5 := reg +/- imm8*4
+ //
+ // The first operand is always a Reg. The second operand encodes the
+ // operation in bit 8 and the immediate in bits 0-7.
+
+ /// getAM5Opc - This function encodes the addrmode5 opc field.
+ static inline unsigned getAM5Opc(AddrOpc Opc, unsigned char Offset) {
+ bool isSub = Opc == sub;
+ return ((int)isSub << 8) | Offset;
+ }
+ static inline unsigned char getAM5Offset(unsigned AM5Opc) {
+ return AM5Opc & 0xFF;
+ }
+ static inline AddrOpc getAM5Op(unsigned AM5Opc) {
+ return ((AM5Opc >> 8) & 1) ? sub : add;
+ }
+
+ //===--------------------------------------------------------------------===//
+ // Addressing Mode #6
+ //===--------------------------------------------------------------------===//
+ //
+ // This is used for NEON load / store instructions.
+ //
+ // addrmode6 := reg with optional alignment
+ //
+ // This is stored in two operands [regaddr, align]. The first is the
+ // address register. The second operand is the value of the alignment
+ // specifier in bytes or zero if no explicit alignment.
+ // Valid alignments depend on the specific instruction.
+
+ //===--------------------------------------------------------------------===//
+ // NEON Modified Immediates
+ //===--------------------------------------------------------------------===//
+ //
+ // Several NEON instructions (e.g., VMOV) take a "modified immediate"
+ // vector operand, where a small immediate encoded in the instruction
+ // specifies a full NEON vector value. These modified immediates are
+ // represented here as encoded integers. The low 8 bits hold the immediate
+ // value; bit 12 holds the "Op" field of the instruction, and bits 11-8 hold
+ // the "Cmode" field of the instruction. The interfaces below treat the
+ // Op and Cmode values as a single 5-bit value.
+
+ static inline unsigned createNEONModImm(unsigned OpCmode, unsigned Val) {
+ return (OpCmode << 8) | Val;
+ }
+ static inline unsigned getNEONModImmOpCmode(unsigned ModImm) {
+ return (ModImm >> 8) & 0x1f;
+ }
+ static inline unsigned getNEONModImmVal(unsigned ModImm) {
+ return ModImm & 0xff;
+ }
+
+ /// decodeNEONModImm - Decode a NEON modified immediate value into the
+ /// element value and the element size in bits. (If the element size is
+ /// smaller than the vector, it is splatted into all the elements.)
+ static inline uint64_t decodeNEONModImm(unsigned ModImm, unsigned &EltBits) {
+ unsigned OpCmode = getNEONModImmOpCmode(ModImm);
+ unsigned Imm8 = getNEONModImmVal(ModImm);
+ uint64_t Val = 0;
+
+ if (OpCmode == 0xe) {
+ // 8-bit vector elements
+ Val = Imm8;
+ EltBits = 8;
+ } else if ((OpCmode & 0xc) == 0x8) {
+ // 16-bit vector elements
+ unsigned ByteNum = (OpCmode & 0x6) >> 1;
+ Val = Imm8 << (8 * ByteNum);
+ EltBits = 16;
+ } else if ((OpCmode & 0x8) == 0) {
+ // 32-bit vector elements, zero with one byte set
+ unsigned ByteNum = (OpCmode & 0x6) >> 1;
+ Val = Imm8 << (8 * ByteNum);
+ EltBits = 32;
+ } else if ((OpCmode & 0xe) == 0xc) {
+ // 32-bit vector elements, one byte with low bits set
+ unsigned ByteNum = 1 + (OpCmode & 0x1);
+ Val = (Imm8 << (8 * ByteNum)) | (0xffff >> (8 * (2 - ByteNum)));
+ EltBits = 32;
+ } else if (OpCmode == 0x1e) {
+ // 64-bit vector elements
+ for (unsigned ByteNum = 0; ByteNum < 8; ++ByteNum) {
+ if ((ModImm >> ByteNum) & 1)
+ Val |= (uint64_t)0xff << (8 * ByteNum);
+ }
+ EltBits = 64;
+ } else {
+ llvm_unreachable("Unsupported NEON immediate");
+ }
+ return Val;
+ }
+
+ // Generic validation for single-byte immediate (0X00, 00X0, etc).
+ static inline bool isNEONBytesplat(unsigned Value, unsigned Size) {
+ assert(Size >= 1 && Size <= 4 && "Invalid size");
+ unsigned count = 0;
+ for (unsigned i = 0; i < Size; ++i) {
+ if (Value & 0xff) count++;
+ Value >>= 8;
+ }
+ return count == 1;
+ }
+
+ /// Checks if Value is a correct immediate for instructions like VBIC/VORR.
+ static inline bool isNEONi16splat(unsigned Value) {
+ if (Value > 0xffff)
+ return false;
+ // i16 value with set bits only in one byte X0 or 0X.
+ return Value == 0 || isNEONBytesplat(Value, 2);
+ }
+
+ // Encode NEON 16 bits Splat immediate for instructions like VBIC/VORR
+ static inline unsigned encodeNEONi16splat(unsigned Value) {
+ assert(isNEONi16splat(Value) && "Invalid NEON splat value");
+ if (Value >= 0x100)
+ Value = (Value >> 8) | 0xa00;
+ else
+ Value |= 0x800;
+ return Value;
+ }
+
+ /// Checks if Value is a correct immediate for instructions like VBIC/VORR.
+ static inline bool isNEONi32splat(unsigned Value) {
+ // i32 value with set bits only in one byte X000, 0X00, 00X0, or 000X.
+ return Value == 0 || isNEONBytesplat(Value, 4);
+ }
+
+ /// Encode NEON 32 bits Splat immediate for instructions like VBIC/VORR.
+ static inline unsigned encodeNEONi32splat(unsigned Value) {
+ assert(isNEONi32splat(Value) && "Invalid NEON splat value");
+ if (Value >= 0x100 && Value <= 0xff00)
+ Value = (Value >> 8) | 0x200;
+ else if (Value > 0xffff && Value <= 0xff0000)
+ Value = (Value >> 16) | 0x400;
+ else if (Value > 0xffffff)
+ Value = (Value >> 24) | 0x600;
+ return Value;
+ }
+
+ //===--------------------------------------------------------------------===//
+ // Floating-point Immediates
+ //
+ static inline float getFPImmFloat(unsigned Imm) {
+ // We expect an 8-bit binary encoding of a floating-point number here.
+ union {
+ uint32_t I;
+ float F;
+ } FPUnion;
+
+ uint8_t Sign = (Imm >> 7) & 0x1;
+ uint8_t Exp = (Imm >> 4) & 0x7;
+ uint8_t Mantissa = Imm & 0xf;
+
+ // 8-bit FP iEEEE Float Encoding
+ // abcd efgh aBbbbbbc defgh000 00000000 00000000
+ //
+ // where B = NOT(b);
+
+ FPUnion.I = 0;
+ FPUnion.I |= Sign << 31;
+ FPUnion.I |= ((Exp & 0x4) != 0 ? 0 : 1) << 30;
+ FPUnion.I |= ((Exp & 0x4) != 0 ? 0x1f : 0) << 25;
+ FPUnion.I |= (Exp & 0x3) << 23;
+ FPUnion.I |= Mantissa << 19;
+ return FPUnion.F;
+ }
+
+ /// getFP32Imm - Return an 8-bit floating-point version of the 32-bit
+ /// floating-point value. If the value cannot be represented as an 8-bit
+ /// floating-point value, then return -1.
+ static inline int getFP32Imm(const APInt &Imm) {
+ uint32_t Sign = Imm.lshr(31).getZExtValue() & 1;
+ int32_t Exp = (Imm.lshr(23).getSExtValue() & 0xff) - 127; // -126 to 127
+ int64_t Mantissa = Imm.getZExtValue() & 0x7fffff; // 23 bits
+
+ // We can handle 4 bits of mantissa.
+ // mantissa = (16+UInt(e:f:g:h))/16.
+ if (Mantissa & 0x7ffff)
+ return -1;
+ Mantissa >>= 19;
+ if ((Mantissa & 0xf) != Mantissa)
+ return -1;
+
+ // We can handle 3 bits of exponent: exp == UInt(NOT(b):c:d)-3
+ if (Exp < -3 || Exp > 4)
+ return -1;
+ Exp = ((Exp+3) & 0x7) ^ 4;
+
+ return ((int)Sign << 7) | (Exp << 4) | Mantissa;
+ }
+
+ static inline int getFP32Imm(const APFloat &FPImm) {
+ return getFP32Imm(FPImm.bitcastToAPInt());
+ }
+
+ /// getFP64Imm - Return an 8-bit floating-point version of the 64-bit
+ /// floating-point value. If the value cannot be represented as an 8-bit
+ /// floating-point value, then return -1.
+ static inline int getFP64Imm(const APInt &Imm) {
+ uint64_t Sign = Imm.lshr(63).getZExtValue() & 1;
+ int64_t Exp = (Imm.lshr(52).getSExtValue() & 0x7ff) - 1023; // -1022 to 1023
+ uint64_t Mantissa = Imm.getZExtValue() & 0xfffffffffffffULL;
+
+ // We can handle 4 bits of mantissa.
+ // mantissa = (16+UInt(e:f:g:h))/16.
+ if (Mantissa & 0xffffffffffffULL)
+ return -1;
+ Mantissa >>= 48;
+ if ((Mantissa & 0xf) != Mantissa)
+ return -1;
+
+ // We can handle 3 bits of exponent: exp == UInt(NOT(b):c:d)-3
+ if (Exp < -3 || Exp > 4)
+ return -1;
+ Exp = ((Exp+3) & 0x7) ^ 4;
+
+ return ((int)Sign << 7) | (Exp << 4) | Mantissa;
+ }
+
+ static inline int getFP64Imm(const APFloat &FPImm) {
+ return getFP64Imm(FPImm.bitcastToAPInt());
+ }
+
+} // end namespace ARM_AM
+} // end namespace llvm
+
+#endif
+
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
new file mode 100644
index 0000000..fa52c93
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
@@ -0,0 +1,1089 @@
+//===-- ARMAsmBackend.cpp - ARM Assembler Backend -------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/ARMMCTargetDesc.h"
+#include "MCTargetDesc/ARMAddressingModes.h"
+#include "MCTargetDesc/ARMAsmBackend.h"
+#include "MCTargetDesc/ARMAsmBackendDarwin.h"
+#include "MCTargetDesc/ARMAsmBackendELF.h"
+#include "MCTargetDesc/ARMAsmBackendWinCOFF.h"
+#include "MCTargetDesc/ARMBaseInfo.h"
+#include "MCTargetDesc/ARMFixupKinds.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/MC/MCAsmBackend.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCDirectives.h"
+#include "llvm/MC/MCELFObjectWriter.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCFixupKindInfo.h"
+#include "llvm/MC/MCMachObjectWriter.h"
+#include "llvm/MC/MCObjectWriter.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MCValue.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ELF.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/MachO.h"
+#include "llvm/Support/TargetParser.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+namespace {
+class ARMELFObjectWriter : public MCELFObjectTargetWriter {
+public:
+ ARMELFObjectWriter(uint8_t OSABI)
+ : MCELFObjectTargetWriter(/*Is64Bit*/ false, OSABI, ELF::EM_ARM,
+ /*HasRelocationAddend*/ false) {}
+};
+
+const MCFixupKindInfo &ARMAsmBackend::getFixupKindInfo(MCFixupKind Kind) const {
+ const static MCFixupKindInfo InfosLE[ARM::NumTargetFixupKinds] = {
+ // This table *must* be in the order that the fixup_* kinds are defined in
+ // ARMFixupKinds.h.
+ //
+ // Name Offset (bits) Size (bits) Flags
+ {"fixup_arm_ldst_pcrel_12", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
+ {"fixup_t2_ldst_pcrel_12", 0, 32,
+ MCFixupKindInfo::FKF_IsPCRel |
+ MCFixupKindInfo::FKF_IsAlignedDownTo32Bits},
+ {"fixup_arm_pcrel_10_unscaled", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
+ {"fixup_arm_pcrel_10", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
+ {"fixup_t2_pcrel_10", 0, 32,
+ MCFixupKindInfo::FKF_IsPCRel |
+ MCFixupKindInfo::FKF_IsAlignedDownTo32Bits},
+ {"fixup_thumb_adr_pcrel_10", 0, 8,
+ MCFixupKindInfo::FKF_IsPCRel |
+ MCFixupKindInfo::FKF_IsAlignedDownTo32Bits},
+ {"fixup_arm_adr_pcrel_12", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
+ {"fixup_t2_adr_pcrel_12", 0, 32,
+ MCFixupKindInfo::FKF_IsPCRel |
+ MCFixupKindInfo::FKF_IsAlignedDownTo32Bits},
+ {"fixup_arm_condbranch", 0, 24, MCFixupKindInfo::FKF_IsPCRel},
+ {"fixup_arm_uncondbranch", 0, 24, MCFixupKindInfo::FKF_IsPCRel},
+ {"fixup_t2_condbranch", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
+ {"fixup_t2_uncondbranch", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
+ {"fixup_arm_thumb_br", 0, 16, MCFixupKindInfo::FKF_IsPCRel},
+ {"fixup_arm_uncondbl", 0, 24, MCFixupKindInfo::FKF_IsPCRel},
+ {"fixup_arm_condbl", 0, 24, MCFixupKindInfo::FKF_IsPCRel},
+ {"fixup_arm_blx", 0, 24, MCFixupKindInfo::FKF_IsPCRel},
+ {"fixup_arm_thumb_bl", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
+ {"fixup_arm_thumb_blx", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
+ {"fixup_arm_thumb_cb", 0, 16, MCFixupKindInfo::FKF_IsPCRel},
+ {"fixup_arm_thumb_cp", 0, 8,
+ MCFixupKindInfo::FKF_IsPCRel |
+ MCFixupKindInfo::FKF_IsAlignedDownTo32Bits},
+ {"fixup_arm_thumb_bcc", 0, 8, MCFixupKindInfo::FKF_IsPCRel},
+ // movw / movt: 16-bits immediate but scattered into two chunks 0 - 12, 16
+ // - 19.
+ {"fixup_arm_movt_hi16", 0, 20, 0},
+ {"fixup_arm_movw_lo16", 0, 20, 0},
+ {"fixup_t2_movt_hi16", 0, 20, 0},
+ {"fixup_t2_movw_lo16", 0, 20, 0},
+ };
+ const static MCFixupKindInfo InfosBE[ARM::NumTargetFixupKinds] = {
+ // This table *must* be in the order that the fixup_* kinds are defined in
+ // ARMFixupKinds.h.
+ //
+ // Name Offset (bits) Size (bits) Flags
+ {"fixup_arm_ldst_pcrel_12", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
+ {"fixup_t2_ldst_pcrel_12", 0, 32,
+ MCFixupKindInfo::FKF_IsPCRel |
+ MCFixupKindInfo::FKF_IsAlignedDownTo32Bits},
+ {"fixup_arm_pcrel_10_unscaled", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
+ {"fixup_arm_pcrel_10", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
+ {"fixup_t2_pcrel_10", 0, 32,
+ MCFixupKindInfo::FKF_IsPCRel |
+ MCFixupKindInfo::FKF_IsAlignedDownTo32Bits},
+ {"fixup_thumb_adr_pcrel_10", 8, 8,
+ MCFixupKindInfo::FKF_IsPCRel |
+ MCFixupKindInfo::FKF_IsAlignedDownTo32Bits},
+ {"fixup_arm_adr_pcrel_12", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
+ {"fixup_t2_adr_pcrel_12", 0, 32,
+ MCFixupKindInfo::FKF_IsPCRel |
+ MCFixupKindInfo::FKF_IsAlignedDownTo32Bits},
+ {"fixup_arm_condbranch", 8, 24, MCFixupKindInfo::FKF_IsPCRel},
+ {"fixup_arm_uncondbranch", 8, 24, MCFixupKindInfo::FKF_IsPCRel},
+ {"fixup_t2_condbranch", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
+ {"fixup_t2_uncondbranch", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
+ {"fixup_arm_thumb_br", 0, 16, MCFixupKindInfo::FKF_IsPCRel},
+ {"fixup_arm_uncondbl", 8, 24, MCFixupKindInfo::FKF_IsPCRel},
+ {"fixup_arm_condbl", 8, 24, MCFixupKindInfo::FKF_IsPCRel},
+ {"fixup_arm_blx", 8, 24, MCFixupKindInfo::FKF_IsPCRel},
+ {"fixup_arm_thumb_bl", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
+ {"fixup_arm_thumb_blx", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
+ {"fixup_arm_thumb_cb", 0, 16, MCFixupKindInfo::FKF_IsPCRel},
+ {"fixup_arm_thumb_cp", 8, 8,
+ MCFixupKindInfo::FKF_IsPCRel |
+ MCFixupKindInfo::FKF_IsAlignedDownTo32Bits},
+ {"fixup_arm_thumb_bcc", 8, 8, MCFixupKindInfo::FKF_IsPCRel},
+ // movw / movt: 16-bits immediate but scattered into two chunks 0 - 12, 16
+ // - 19.
+ {"fixup_arm_movt_hi16", 12, 20, 0},
+ {"fixup_arm_movw_lo16", 12, 20, 0},
+ {"fixup_t2_movt_hi16", 12, 20, 0},
+ {"fixup_t2_movw_lo16", 12, 20, 0},
+ };
+
+ if (Kind < FirstTargetFixupKind)
+ return MCAsmBackend::getFixupKindInfo(Kind);
+
+ assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() &&
+ "Invalid kind!");
+ return (IsLittleEndian ? InfosLE : InfosBE)[Kind - FirstTargetFixupKind];
+}
+
+void ARMAsmBackend::handleAssemblerFlag(MCAssemblerFlag Flag) {
+ switch (Flag) {
+ default:
+ break;
+ case MCAF_Code16:
+ setIsThumb(true);
+ break;
+ case MCAF_Code32:
+ setIsThumb(false);
+ break;
+ }
+}
+} // end anonymous namespace
+
+unsigned ARMAsmBackend::getRelaxedOpcode(unsigned Op) const {
+ bool HasThumb2 = STI->getFeatureBits()[ARM::FeatureThumb2];
+
+ switch (Op) {
+ default:
+ return Op;
+ case ARM::tBcc:
+ return HasThumb2 ? (unsigned)ARM::t2Bcc : Op;
+ case ARM::tLDRpci:
+ return HasThumb2 ? (unsigned)ARM::t2LDRpci : Op;
+ case ARM::tADR:
+ return HasThumb2 ? (unsigned)ARM::t2ADR : Op;
+ case ARM::tB:
+ return HasThumb2 ? (unsigned)ARM::t2B : Op;
+ case ARM::tCBZ:
+ return ARM::tHINT;
+ case ARM::tCBNZ:
+ return ARM::tHINT;
+ }
+}
+
+bool ARMAsmBackend::mayNeedRelaxation(const MCInst &Inst) const {
+ if (getRelaxedOpcode(Inst.getOpcode()) != Inst.getOpcode())
+ return true;
+ return false;
+}
+
+const char *ARMAsmBackend::reasonForFixupRelaxation(const MCFixup &Fixup,
+ uint64_t Value) const {
+ switch ((unsigned)Fixup.getKind()) {
+ case ARM::fixup_arm_thumb_br: {
+ // Relaxing tB to t2B. tB has a signed 12-bit displacement with the
+ // low bit being an implied zero. There's an implied +4 offset for the
+ // branch, so we adjust the other way here to determine what's
+ // encodable.
+ //
+ // Relax if the value is too big for a (signed) i8.
+ int64_t Offset = int64_t(Value) - 4;
+ if (Offset > 2046 || Offset < -2048)
+ return "out of range pc-relative fixup value";
+ break;
+ }
+ case ARM::fixup_arm_thumb_bcc: {
+ // Relaxing tBcc to t2Bcc. tBcc has a signed 9-bit displacement with the
+ // low bit being an implied zero. There's an implied +4 offset for the
+ // branch, so we adjust the other way here to determine what's
+ // encodable.
+ //
+ // Relax if the value is too big for a (signed) i8.
+ int64_t Offset = int64_t(Value) - 4;
+ if (Offset > 254 || Offset < -256)
+ return "out of range pc-relative fixup value";
+ break;
+ }
+ case ARM::fixup_thumb_adr_pcrel_10:
+ case ARM::fixup_arm_thumb_cp: {
+ // If the immediate is negative, greater than 1020, or not a multiple
+ // of four, the wide version of the instruction must be used.
+ int64_t Offset = int64_t(Value) - 4;
+ if (Offset & 3)
+ return "misaligned pc-relative fixup value";
+ else if (Offset > 1020 || Offset < 0)
+ return "out of range pc-relative fixup value";
+ break;
+ }
+ case ARM::fixup_arm_thumb_cb: {
+ // If we have a Thumb CBZ or CBNZ instruction and its target is the next
+ // instruction it is is actually out of range for the instruction.
+ // It will be changed to a NOP.
+ int64_t Offset = (Value & ~1);
+ if (Offset == 2)
+ return "will be converted to nop";
+ break;
+ }
+ default:
+ llvm_unreachable("Unexpected fixup kind in reasonForFixupRelaxation()!");
+ }
+ return nullptr;
+}
+
+bool ARMAsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value,
+ const MCRelaxableFragment *DF,
+ const MCAsmLayout &Layout) const {
+ return reasonForFixupRelaxation(Fixup, Value);
+}
+
+void ARMAsmBackend::relaxInstruction(const MCInst &Inst, MCInst &Res) const {
+ unsigned RelaxedOp = getRelaxedOpcode(Inst.getOpcode());
+
+ // Sanity check w/ diagnostic if we get here w/ a bogus instruction.
+ if (RelaxedOp == Inst.getOpcode()) {
+ SmallString<256> Tmp;
+ raw_svector_ostream OS(Tmp);
+ Inst.dump_pretty(OS);
+ OS << "\n";
+ report_fatal_error("unexpected instruction to relax: " + OS.str());
+ }
+
+ // If we are changing Thumb CBZ or CBNZ instruction to a NOP, aka tHINT, we
+ // have to change the operands too.
+ if ((Inst.getOpcode() == ARM::tCBZ || Inst.getOpcode() == ARM::tCBNZ) &&
+ RelaxedOp == ARM::tHINT) {
+ Res.setOpcode(RelaxedOp);
+ Res.addOperand(MCOperand::createImm(0));
+ Res.addOperand(MCOperand::createImm(14));
+ Res.addOperand(MCOperand::createReg(0));
+ return;
+ }
+
+ // The rest of instructions we're relaxing have the same operands.
+ // We just need to update to the proper opcode.
+ Res = Inst;
+ Res.setOpcode(RelaxedOp);
+}
+
+bool ARMAsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const {
+ const uint16_t Thumb1_16bitNopEncoding = 0x46c0; // using MOV r8,r8
+ const uint16_t Thumb2_16bitNopEncoding = 0xbf00; // NOP
+ const uint32_t ARMv4_NopEncoding = 0xe1a00000; // using MOV r0,r0
+ const uint32_t ARMv6T2_NopEncoding = 0xe320f000; // NOP
+ if (isThumb()) {
+ const uint16_t nopEncoding =
+ hasNOP() ? Thumb2_16bitNopEncoding : Thumb1_16bitNopEncoding;
+ uint64_t NumNops = Count / 2;
+ for (uint64_t i = 0; i != NumNops; ++i)
+ OW->write16(nopEncoding);
+ if (Count & 1)
+ OW->write8(0);
+ return true;
+ }
+ // ARM mode
+ const uint32_t nopEncoding =
+ hasNOP() ? ARMv6T2_NopEncoding : ARMv4_NopEncoding;
+ uint64_t NumNops = Count / 4;
+ for (uint64_t i = 0; i != NumNops; ++i)
+ OW->write32(nopEncoding);
+ // FIXME: should this function return false when unable to write exactly
+ // 'Count' bytes with NOP encodings?
+ switch (Count % 4) {
+ default:
+ break; // No leftover bytes to write
+ case 1:
+ OW->write8(0);
+ break;
+ case 2:
+ OW->write16(0);
+ break;
+ case 3:
+ OW->write16(0);
+ OW->write8(0xa0);
+ break;
+ }
+
+ return true;
+}
+
+static uint32_t swapHalfWords(uint32_t Value, bool IsLittleEndian) {
+ if (IsLittleEndian) {
+ // Note that the halfwords are stored high first and low second in thumb;
+ // so we need to swap the fixup value here to map properly.
+ uint32_t Swapped = (Value & 0xFFFF0000) >> 16;
+ Swapped |= (Value & 0x0000FFFF) << 16;
+ return Swapped;
+ } else
+ return Value;
+}
+
+static uint32_t joinHalfWords(uint32_t FirstHalf, uint32_t SecondHalf,
+ bool IsLittleEndian) {
+ uint32_t Value;
+
+ if (IsLittleEndian) {
+ Value = (SecondHalf & 0xFFFF) << 16;
+ Value |= (FirstHalf & 0xFFFF);
+ } else {
+ Value = (SecondHalf & 0xFFFF);
+ Value |= (FirstHalf & 0xFFFF) << 16;
+ }
+
+ return Value;
+}
+
+unsigned ARMAsmBackend::adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
+ bool IsPCRel, MCContext *Ctx,
+ bool IsLittleEndian,
+ bool IsResolved) const {
+ unsigned Kind = Fixup.getKind();
+ switch (Kind) {
+ default:
+ llvm_unreachable("Unknown fixup kind!");
+ case FK_Data_1:
+ case FK_Data_2:
+ case FK_Data_4:
+ return Value;
+ case FK_SecRel_2:
+ return Value;
+ case FK_SecRel_4:
+ return Value;
+ case ARM::fixup_arm_movt_hi16:
+ if (!IsPCRel)
+ Value >>= 16;
+ // Fallthrough
+ case ARM::fixup_arm_movw_lo16: {
+ unsigned Hi4 = (Value & 0xF000) >> 12;
+ unsigned Lo12 = Value & 0x0FFF;
+ // inst{19-16} = Hi4;
+ // inst{11-0} = Lo12;
+ Value = (Hi4 << 16) | (Lo12);
+ return Value;
+ }
+ case ARM::fixup_t2_movt_hi16:
+ if (!IsPCRel)
+ Value >>= 16;
+ // Fallthrough
+ case ARM::fixup_t2_movw_lo16: {
+ unsigned Hi4 = (Value & 0xF000) >> 12;
+ unsigned i = (Value & 0x800) >> 11;
+ unsigned Mid3 = (Value & 0x700) >> 8;
+ unsigned Lo8 = Value & 0x0FF;
+ // inst{19-16} = Hi4;
+ // inst{26} = i;
+ // inst{14-12} = Mid3;
+ // inst{7-0} = Lo8;
+ Value = (Hi4 << 16) | (i << 26) | (Mid3 << 12) | (Lo8);
+ return swapHalfWords(Value, IsLittleEndian);
+ }
+ case ARM::fixup_arm_ldst_pcrel_12:
+ // ARM PC-relative values are offset by 8.
+ Value -= 4;
+ // FALLTHROUGH
+ case ARM::fixup_t2_ldst_pcrel_12: {
+ // Offset by 4, adjusted by two due to the half-word ordering of thumb.
+ Value -= 4;
+ bool isAdd = true;
+ if ((int64_t)Value < 0) {
+ Value = -Value;
+ isAdd = false;
+ }
+ if (Ctx && Value >= 4096) {
+ Ctx->reportError(Fixup.getLoc(), "out of range pc-relative fixup value");
+ return 0;
+ }
+ Value |= isAdd << 23;
+
+ // Same addressing mode as fixup_arm_pcrel_10,
+ // but with 16-bit halfwords swapped.
+ if (Kind == ARM::fixup_t2_ldst_pcrel_12)
+ return swapHalfWords(Value, IsLittleEndian);
+
+ return Value;
+ }
+ case ARM::fixup_arm_adr_pcrel_12: {
+ // ARM PC-relative values are offset by 8.
+ Value -= 8;
+ unsigned opc = 4; // bits {24-21}. Default to add: 0b0100
+ if ((int64_t)Value < 0) {
+ Value = -Value;
+ opc = 2; // 0b0010
+ }
+ if (Ctx && ARM_AM::getSOImmVal(Value) == -1) {
+ Ctx->reportError(Fixup.getLoc(), "out of range pc-relative fixup value");
+ return 0;
+ }
+ // Encode the immediate and shift the opcode into place.
+ return ARM_AM::getSOImmVal(Value) | (opc << 21);
+ }
+
+ case ARM::fixup_t2_adr_pcrel_12: {
+ Value -= 4;
+ unsigned opc = 0;
+ if ((int64_t)Value < 0) {
+ Value = -Value;
+ opc = 5;
+ }
+
+ uint32_t out = (opc << 21);
+ out |= (Value & 0x800) << 15;
+ out |= (Value & 0x700) << 4;
+ out |= (Value & 0x0FF);
+
+ return swapHalfWords(out, IsLittleEndian);
+ }
+
+ case ARM::fixup_arm_condbranch:
+ case ARM::fixup_arm_uncondbranch:
+ case ARM::fixup_arm_uncondbl:
+ case ARM::fixup_arm_condbl:
+ case ARM::fixup_arm_blx:
+ // These values don't encode the low two bits since they're always zero.
+ // Offset by 8 just as above.
+ if (const MCSymbolRefExpr *SRE =
+ dyn_cast<MCSymbolRefExpr>(Fixup.getValue()))
+ if (SRE->getKind() == MCSymbolRefExpr::VK_ARM_TLSCALL)
+ return 0;
+ return 0xffffff & ((Value - 8) >> 2);
+ case ARM::fixup_t2_uncondbranch: {
+ Value = Value - 4;
+ Value >>= 1; // Low bit is not encoded.
+
+ uint32_t out = 0;
+ bool I = Value & 0x800000;
+ bool J1 = Value & 0x400000;
+ bool J2 = Value & 0x200000;
+ J1 ^= I;
+ J2 ^= I;
+
+ out |= I << 26; // S bit
+ out |= !J1 << 13; // J1 bit
+ out |= !J2 << 11; // J2 bit
+ out |= (Value & 0x1FF800) << 5; // imm6 field
+ out |= (Value & 0x0007FF); // imm11 field
+
+ return swapHalfWords(out, IsLittleEndian);
+ }
+ case ARM::fixup_t2_condbranch: {
+ Value = Value - 4;
+ Value >>= 1; // Low bit is not encoded.
+
+ uint64_t out = 0;
+ out |= (Value & 0x80000) << 7; // S bit
+ out |= (Value & 0x40000) >> 7; // J2 bit
+ out |= (Value & 0x20000) >> 4; // J1 bit
+ out |= (Value & 0x1F800) << 5; // imm6 field
+ out |= (Value & 0x007FF); // imm11 field
+
+ return swapHalfWords(out, IsLittleEndian);
+ }
+ case ARM::fixup_arm_thumb_bl: {
+ // The value doesn't encode the low bit (always zero) and is offset by
+ // four. The 32-bit immediate value is encoded as
+ // imm32 = SignExtend(S:I1:I2:imm10:imm11:0)
+ // where I1 = NOT(J1 ^ S) and I2 = NOT(J2 ^ S).
+ // The value is encoded into disjoint bit positions in the destination
+ // opcode. x = unchanged, I = immediate value bit, S = sign extension bit,
+ // J = either J1 or J2 bit
+ //
+ // BL: xxxxxSIIIIIIIIII xxJxJIIIIIIIIIII
+ //
+ // Note that the halfwords are stored high first, low second; so we need
+ // to transpose the fixup value here to map properly.
+ uint32_t offset = (Value - 4) >> 1;
+ uint32_t signBit = (offset & 0x800000) >> 23;
+ uint32_t I1Bit = (offset & 0x400000) >> 22;
+ uint32_t J1Bit = (I1Bit ^ 0x1) ^ signBit;
+ uint32_t I2Bit = (offset & 0x200000) >> 21;
+ uint32_t J2Bit = (I2Bit ^ 0x1) ^ signBit;
+ uint32_t imm10Bits = (offset & 0x1FF800) >> 11;
+ uint32_t imm11Bits = (offset & 0x000007FF);
+
+ uint32_t FirstHalf = (((uint16_t)signBit << 10) | (uint16_t)imm10Bits);
+ uint32_t SecondHalf = (((uint16_t)J1Bit << 13) | ((uint16_t)J2Bit << 11) |
+ (uint16_t)imm11Bits);
+ return joinHalfWords(FirstHalf, SecondHalf, IsLittleEndian);
+ }
+ case ARM::fixup_arm_thumb_blx: {
+ // The value doesn't encode the low two bits (always zero) and is offset by
+ // four (see fixup_arm_thumb_cp). The 32-bit immediate value is encoded as
+ // imm32 = SignExtend(S:I1:I2:imm10H:imm10L:00)
+ // where I1 = NOT(J1 ^ S) and I2 = NOT(J2 ^ S).
+ // The value is encoded into disjoint bit positions in the destination
+ // opcode. x = unchanged, I = immediate value bit, S = sign extension bit,
+ // J = either J1 or J2 bit, 0 = zero.
+ //
+ // BLX: xxxxxSIIIIIIIIII xxJxJIIIIIIIIII0
+ //
+ // Note that the halfwords are stored high first, low second; so we need
+ // to transpose the fixup value here to map properly.
+ uint32_t offset = (Value - 2) >> 2;
+ if (const MCSymbolRefExpr *SRE =
+ dyn_cast<MCSymbolRefExpr>(Fixup.getValue()))
+ if (SRE->getKind() == MCSymbolRefExpr::VK_ARM_TLSCALL)
+ offset = 0;
+ uint32_t signBit = (offset & 0x400000) >> 22;
+ uint32_t I1Bit = (offset & 0x200000) >> 21;
+ uint32_t J1Bit = (I1Bit ^ 0x1) ^ signBit;
+ uint32_t I2Bit = (offset & 0x100000) >> 20;
+ uint32_t J2Bit = (I2Bit ^ 0x1) ^ signBit;
+ uint32_t imm10HBits = (offset & 0xFFC00) >> 10;
+ uint32_t imm10LBits = (offset & 0x3FF);
+
+ uint32_t FirstHalf = (((uint16_t)signBit << 10) | (uint16_t)imm10HBits);
+ uint32_t SecondHalf = (((uint16_t)J1Bit << 13) | ((uint16_t)J2Bit << 11) |
+ ((uint16_t)imm10LBits) << 1);
+ return joinHalfWords(FirstHalf, SecondHalf, IsLittleEndian);
+ }
+ case ARM::fixup_thumb_adr_pcrel_10:
+ case ARM::fixup_arm_thumb_cp:
+ // On CPUs supporting Thumb2, this will be relaxed to an ldr.w, otherwise we
+ // could have an error on our hands.
+ if (Ctx && !STI->getFeatureBits()[ARM::FeatureThumb2] && IsResolved) {
+ const char *FixupDiagnostic = reasonForFixupRelaxation(Fixup, Value);
+ if (FixupDiagnostic) {
+ Ctx->reportError(Fixup.getLoc(), FixupDiagnostic);
+ return 0;
+ }
+ }
+ // Offset by 4, and don't encode the low two bits.
+ return ((Value - 4) >> 2) & 0xff;
+ case ARM::fixup_arm_thumb_cb: {
+ // Offset by 4 and don't encode the lower bit, which is always 0.
+ // FIXME: diagnose if no Thumb2
+ uint32_t Binary = (Value - 4) >> 1;
+ return ((Binary & 0x20) << 4) | ((Binary & 0x1f) << 3);
+ }
+ case ARM::fixup_arm_thumb_br:
+ // Offset by 4 and don't encode the lower bit, which is always 0.
+ if (Ctx && !STI->getFeatureBits()[ARM::FeatureThumb2]) {
+ const char *FixupDiagnostic = reasonForFixupRelaxation(Fixup, Value);
+ if (FixupDiagnostic) {
+ Ctx->reportError(Fixup.getLoc(), FixupDiagnostic);
+ return 0;
+ }
+ }
+ return ((Value - 4) >> 1) & 0x7ff;
+ case ARM::fixup_arm_thumb_bcc:
+ // Offset by 4 and don't encode the lower bit, which is always 0.
+ if (Ctx && !STI->getFeatureBits()[ARM::FeatureThumb2]) {
+ const char *FixupDiagnostic = reasonForFixupRelaxation(Fixup, Value);
+ if (FixupDiagnostic) {
+ Ctx->reportError(Fixup.getLoc(), FixupDiagnostic);
+ return 0;
+ }
+ }
+ return ((Value - 4) >> 1) & 0xff;
+ case ARM::fixup_arm_pcrel_10_unscaled: {
+ Value = Value - 8; // ARM fixups offset by an additional word and don't
+ // need to adjust for the half-word ordering.
+ bool isAdd = true;
+ if ((int64_t)Value < 0) {
+ Value = -Value;
+ isAdd = false;
+ }
+ // The value has the low 4 bits encoded in [3:0] and the high 4 in [11:8].
+ if (Ctx && Value >= 256) {
+ Ctx->reportError(Fixup.getLoc(), "out of range pc-relative fixup value");
+ return 0;
+ }
+ Value = (Value & 0xf) | ((Value & 0xf0) << 4);
+ return Value | (isAdd << 23);
+ }
+ case ARM::fixup_arm_pcrel_10:
+ Value = Value - 4; // ARM fixups offset by an additional word and don't
+ // need to adjust for the half-word ordering.
+ // Fall through.
+ case ARM::fixup_t2_pcrel_10: {
+ // Offset by 4, adjusted by two due to the half-word ordering of thumb.
+ Value = Value - 4;
+ bool isAdd = true;
+ if ((int64_t)Value < 0) {
+ Value = -Value;
+ isAdd = false;
+ }
+ // These values don't encode the low two bits since they're always zero.
+ Value >>= 2;
+ if (Ctx && Value >= 256) {
+ Ctx->reportError(Fixup.getLoc(), "out of range pc-relative fixup value");
+ return 0;
+ }
+ Value |= isAdd << 23;
+
+ // Same addressing mode as fixup_arm_pcrel_10, but with 16-bit halfwords
+ // swapped.
+ if (Kind == ARM::fixup_t2_pcrel_10)
+ return swapHalfWords(Value, IsLittleEndian);
+
+ return Value;
+ }
+ }
+}
+
+void ARMAsmBackend::processFixupValue(const MCAssembler &Asm,
+ const MCAsmLayout &Layout,
+ const MCFixup &Fixup,
+ const MCFragment *DF,
+ const MCValue &Target, uint64_t &Value,
+ bool &IsResolved) {
+ const MCSymbolRefExpr *A = Target.getSymA();
+ const MCSymbol *Sym = A ? &A->getSymbol() : nullptr;
+ // Some fixups to thumb function symbols need the low bit (thumb bit)
+ // twiddled.
+ if ((unsigned)Fixup.getKind() != ARM::fixup_arm_ldst_pcrel_12 &&
+ (unsigned)Fixup.getKind() != ARM::fixup_t2_ldst_pcrel_12 &&
+ (unsigned)Fixup.getKind() != ARM::fixup_arm_adr_pcrel_12 &&
+ (unsigned)Fixup.getKind() != ARM::fixup_thumb_adr_pcrel_10 &&
+ (unsigned)Fixup.getKind() != ARM::fixup_t2_adr_pcrel_12 &&
+ (unsigned)Fixup.getKind() != ARM::fixup_arm_thumb_cp) {
+ if (Sym) {
+ if (Asm.isThumbFunc(Sym))
+ Value |= 1;
+ }
+ }
+ if (IsResolved && (unsigned)Fixup.getKind() == ARM::fixup_arm_thumb_bl) {
+ assert(Sym && "How did we resolve this?");
+
+ // If the symbol is external the linker will handle it.
+ // FIXME: Should we handle it as an optimization?
+
+ // If the symbol is out of range, produce a relocation and hope the
+ // linker can handle it. GNU AS produces an error in this case.
+ if (Sym->isExternal() || Value >= 0x400004)
+ IsResolved = false;
+ }
+ // We must always generate a relocation for BL/BLX instructions if we have
+ // a symbol to reference, as the linker relies on knowing the destination
+ // symbol's thumb-ness to get interworking right.
+ if (A && ((unsigned)Fixup.getKind() == ARM::fixup_arm_thumb_blx ||
+ (unsigned)Fixup.getKind() == ARM::fixup_arm_blx ||
+ (unsigned)Fixup.getKind() == ARM::fixup_arm_uncondbl ||
+ (unsigned)Fixup.getKind() == ARM::fixup_arm_condbl))
+ IsResolved = false;
+
+ // Try to get the encoded value for the fixup as-if we're mapping it into
+ // the instruction. This allows adjustFixupValue() to issue a diagnostic
+ // if the value aren't invalid.
+ (void)adjustFixupValue(Fixup, Value, false, &Asm.getContext(),
+ IsLittleEndian, IsResolved);
+}
+
+/// getFixupKindNumBytes - The number of bytes the fixup may change.
+static unsigned getFixupKindNumBytes(unsigned Kind) {
+ switch (Kind) {
+ default:
+ llvm_unreachable("Unknown fixup kind!");
+
+ case FK_Data_1:
+ case ARM::fixup_arm_thumb_bcc:
+ case ARM::fixup_arm_thumb_cp:
+ case ARM::fixup_thumb_adr_pcrel_10:
+ return 1;
+
+ case FK_Data_2:
+ case ARM::fixup_arm_thumb_br:
+ case ARM::fixup_arm_thumb_cb:
+ return 2;
+
+ case ARM::fixup_arm_pcrel_10_unscaled:
+ case ARM::fixup_arm_ldst_pcrel_12:
+ case ARM::fixup_arm_pcrel_10:
+ case ARM::fixup_arm_adr_pcrel_12:
+ case ARM::fixup_arm_uncondbl:
+ case ARM::fixup_arm_condbl:
+ case ARM::fixup_arm_blx:
+ case ARM::fixup_arm_condbranch:
+ case ARM::fixup_arm_uncondbranch:
+ return 3;
+
+ case FK_Data_4:
+ case ARM::fixup_t2_ldst_pcrel_12:
+ case ARM::fixup_t2_condbranch:
+ case ARM::fixup_t2_uncondbranch:
+ case ARM::fixup_t2_pcrel_10:
+ case ARM::fixup_t2_adr_pcrel_12:
+ case ARM::fixup_arm_thumb_bl:
+ case ARM::fixup_arm_thumb_blx:
+ case ARM::fixup_arm_movt_hi16:
+ case ARM::fixup_arm_movw_lo16:
+ case ARM::fixup_t2_movt_hi16:
+ case ARM::fixup_t2_movw_lo16:
+ return 4;
+
+ case FK_SecRel_2:
+ return 2;
+ case FK_SecRel_4:
+ return 4;
+ }
+}
+
+/// getFixupKindContainerSizeBytes - The number of bytes of the
+/// container involved in big endian.
+static unsigned getFixupKindContainerSizeBytes(unsigned Kind) {
+ switch (Kind) {
+ default:
+ llvm_unreachable("Unknown fixup kind!");
+
+ case FK_Data_1:
+ return 1;
+ case FK_Data_2:
+ return 2;
+ case FK_Data_4:
+ return 4;
+
+ case ARM::fixup_arm_thumb_bcc:
+ case ARM::fixup_arm_thumb_cp:
+ case ARM::fixup_thumb_adr_pcrel_10:
+ case ARM::fixup_arm_thumb_br:
+ case ARM::fixup_arm_thumb_cb:
+ // Instruction size is 2 bytes.
+ return 2;
+
+ case ARM::fixup_arm_pcrel_10_unscaled:
+ case ARM::fixup_arm_ldst_pcrel_12:
+ case ARM::fixup_arm_pcrel_10:
+ case ARM::fixup_arm_adr_pcrel_12:
+ case ARM::fixup_arm_uncondbl:
+ case ARM::fixup_arm_condbl:
+ case ARM::fixup_arm_blx:
+ case ARM::fixup_arm_condbranch:
+ case ARM::fixup_arm_uncondbranch:
+ case ARM::fixup_t2_ldst_pcrel_12:
+ case ARM::fixup_t2_condbranch:
+ case ARM::fixup_t2_uncondbranch:
+ case ARM::fixup_t2_pcrel_10:
+ case ARM::fixup_t2_adr_pcrel_12:
+ case ARM::fixup_arm_thumb_bl:
+ case ARM::fixup_arm_thumb_blx:
+ case ARM::fixup_arm_movt_hi16:
+ case ARM::fixup_arm_movw_lo16:
+ case ARM::fixup_t2_movt_hi16:
+ case ARM::fixup_t2_movw_lo16:
+ // Instruction size is 4 bytes.
+ return 4;
+ }
+}
+
+void ARMAsmBackend::applyFixup(const MCFixup &Fixup, char *Data,
+ unsigned DataSize, uint64_t Value,
+ bool IsPCRel) const {
+ unsigned NumBytes = getFixupKindNumBytes(Fixup.getKind());
+ Value =
+ adjustFixupValue(Fixup, Value, IsPCRel, nullptr, IsLittleEndian, true);
+ if (!Value)
+ return; // Doesn't change encoding.
+
+ unsigned Offset = Fixup.getOffset();
+ assert(Offset + NumBytes <= DataSize && "Invalid fixup offset!");
+
+ // Used to point to big endian bytes.
+ unsigned FullSizeBytes;
+ if (!IsLittleEndian) {
+ FullSizeBytes = getFixupKindContainerSizeBytes(Fixup.getKind());
+ assert((Offset + FullSizeBytes) <= DataSize && "Invalid fixup size!");
+ assert(NumBytes <= FullSizeBytes && "Invalid fixup size!");
+ }
+
+ // For each byte of the fragment that the fixup touches, mask in the bits from
+ // the fixup value. The Value has been "split up" into the appropriate
+ // bitfields above.
+ for (unsigned i = 0; i != NumBytes; ++i) {
+ unsigned Idx = IsLittleEndian ? i : (FullSizeBytes - 1 - i);
+ Data[Offset + Idx] |= uint8_t((Value >> (i * 8)) & 0xff);
+ }
+}
+
+namespace CU {
+
+/// \brief Compact unwind encoding values.
+enum CompactUnwindEncodings {
+ UNWIND_ARM_MODE_MASK = 0x0F000000,
+ UNWIND_ARM_MODE_FRAME = 0x01000000,
+ UNWIND_ARM_MODE_FRAME_D = 0x02000000,
+ UNWIND_ARM_MODE_DWARF = 0x04000000,
+
+ UNWIND_ARM_FRAME_STACK_ADJUST_MASK = 0x00C00000,
+
+ UNWIND_ARM_FRAME_FIRST_PUSH_R4 = 0x00000001,
+ UNWIND_ARM_FRAME_FIRST_PUSH_R5 = 0x00000002,
+ UNWIND_ARM_FRAME_FIRST_PUSH_R6 = 0x00000004,
+
+ UNWIND_ARM_FRAME_SECOND_PUSH_R8 = 0x00000008,
+ UNWIND_ARM_FRAME_SECOND_PUSH_R9 = 0x00000010,
+ UNWIND_ARM_FRAME_SECOND_PUSH_R10 = 0x00000020,
+ UNWIND_ARM_FRAME_SECOND_PUSH_R11 = 0x00000040,
+ UNWIND_ARM_FRAME_SECOND_PUSH_R12 = 0x00000080,
+
+ UNWIND_ARM_FRAME_D_REG_COUNT_MASK = 0x00000F00,
+
+ UNWIND_ARM_DWARF_SECTION_OFFSET = 0x00FFFFFF
+};
+
+} // end CU namespace
+
+/// Generate compact unwind encoding for the function based on the CFI
+/// instructions. If the CFI instructions describe a frame that cannot be
+/// encoded in compact unwind, the method returns UNWIND_ARM_MODE_DWARF which
+/// tells the runtime to fallback and unwind using dwarf.
+uint32_t ARMAsmBackendDarwin::generateCompactUnwindEncoding(
+ ArrayRef<MCCFIInstruction> Instrs) const {
+ DEBUG_WITH_TYPE("compact-unwind", llvm::dbgs() << "generateCU()\n");
+ // Only armv7k uses CFI based unwinding.
+ if (Subtype != MachO::CPU_SUBTYPE_ARM_V7K)
+ return 0;
+ // No .cfi directives means no frame.
+ if (Instrs.empty())
+ return 0;
+ // Start off assuming CFA is at SP+0.
+ int CFARegister = ARM::SP;
+ int CFARegisterOffset = 0;
+ // Mark savable registers as initially unsaved
+ DenseMap<unsigned, int> RegOffsets;
+ int FloatRegCount = 0;
+ // Process each .cfi directive and build up compact unwind info.
+ for (size_t i = 0, e = Instrs.size(); i != e; ++i) {
+ int Reg;
+ const MCCFIInstruction &Inst = Instrs[i];
+ switch (Inst.getOperation()) {
+ case MCCFIInstruction::OpDefCfa: // DW_CFA_def_cfa
+ CFARegisterOffset = -Inst.getOffset();
+ CFARegister = MRI.getLLVMRegNum(Inst.getRegister(), true);
+ break;
+ case MCCFIInstruction::OpDefCfaOffset: // DW_CFA_def_cfa_offset
+ CFARegisterOffset = -Inst.getOffset();
+ break;
+ case MCCFIInstruction::OpDefCfaRegister: // DW_CFA_def_cfa_register
+ CFARegister = MRI.getLLVMRegNum(Inst.getRegister(), true);
+ break;
+ case MCCFIInstruction::OpOffset: // DW_CFA_offset
+ Reg = MRI.getLLVMRegNum(Inst.getRegister(), true);
+ if (ARMMCRegisterClasses[ARM::GPRRegClassID].contains(Reg))
+ RegOffsets[Reg] = Inst.getOffset();
+ else if (ARMMCRegisterClasses[ARM::DPRRegClassID].contains(Reg)) {
+ RegOffsets[Reg] = Inst.getOffset();
+ ++FloatRegCount;
+ } else {
+ DEBUG_WITH_TYPE("compact-unwind",
+ llvm::dbgs() << ".cfi_offset on unknown register="
+ << Inst.getRegister() << "\n");
+ return CU::UNWIND_ARM_MODE_DWARF;
+ }
+ break;
+ case MCCFIInstruction::OpRelOffset: // DW_CFA_advance_loc
+ // Ignore
+ break;
+ default:
+ // Directive not convertable to compact unwind, bail out.
+ DEBUG_WITH_TYPE("compact-unwind",
+ llvm::dbgs()
+ << "CFI directive not compatiable with comact "
+ "unwind encoding, opcode=" << Inst.getOperation()
+ << "\n");
+ return CU::UNWIND_ARM_MODE_DWARF;
+ break;
+ }
+ }
+
+ // If no frame set up, return no unwind info.
+ if ((CFARegister == ARM::SP) && (CFARegisterOffset == 0))
+ return 0;
+
+ // Verify standard frame (lr/r7) was used.
+ if (CFARegister != ARM::R7) {
+ DEBUG_WITH_TYPE("compact-unwind", llvm::dbgs() << "frame register is "
+ << CFARegister
+ << " instead of r7\n");
+ return CU::UNWIND_ARM_MODE_DWARF;
+ }
+ int StackAdjust = CFARegisterOffset - 8;
+ if (RegOffsets.lookup(ARM::LR) != (-4 - StackAdjust)) {
+ DEBUG_WITH_TYPE("compact-unwind",
+ llvm::dbgs()
+ << "LR not saved as standard frame, StackAdjust="
+ << StackAdjust
+ << ", CFARegisterOffset=" << CFARegisterOffset
+ << ", lr save at offset=" << RegOffsets[14] << "\n");
+ return CU::UNWIND_ARM_MODE_DWARF;
+ }
+ if (RegOffsets.lookup(ARM::R7) != (-8 - StackAdjust)) {
+ DEBUG_WITH_TYPE("compact-unwind",
+ llvm::dbgs() << "r7 not saved as standard frame\n");
+ return CU::UNWIND_ARM_MODE_DWARF;
+ }
+ uint32_t CompactUnwindEncoding = CU::UNWIND_ARM_MODE_FRAME;
+
+ // If var-args are used, there may be a stack adjust required.
+ switch (StackAdjust) {
+ case 0:
+ break;
+ case 4:
+ CompactUnwindEncoding |= 0x00400000;
+ break;
+ case 8:
+ CompactUnwindEncoding |= 0x00800000;
+ break;
+ case 12:
+ CompactUnwindEncoding |= 0x00C00000;
+ break;
+ default:
+ DEBUG_WITH_TYPE("compact-unwind", llvm::dbgs()
+ << ".cfi_def_cfa stack adjust ("
+ << StackAdjust << ") out of range\n");
+ return CU::UNWIND_ARM_MODE_DWARF;
+ }
+
+ // If r6 is saved, it must be right below r7.
+ static struct {
+ unsigned Reg;
+ unsigned Encoding;
+ } GPRCSRegs[] = {{ARM::R6, CU::UNWIND_ARM_FRAME_FIRST_PUSH_R6},
+ {ARM::R5, CU::UNWIND_ARM_FRAME_FIRST_PUSH_R5},
+ {ARM::R4, CU::UNWIND_ARM_FRAME_FIRST_PUSH_R4},
+ {ARM::R12, CU::UNWIND_ARM_FRAME_SECOND_PUSH_R12},
+ {ARM::R11, CU::UNWIND_ARM_FRAME_SECOND_PUSH_R11},
+ {ARM::R10, CU::UNWIND_ARM_FRAME_SECOND_PUSH_R10},
+ {ARM::R9, CU::UNWIND_ARM_FRAME_SECOND_PUSH_R9},
+ {ARM::R8, CU::UNWIND_ARM_FRAME_SECOND_PUSH_R8}};
+
+ int CurOffset = -8 - StackAdjust;
+ for (auto CSReg : GPRCSRegs) {
+ auto Offset = RegOffsets.find(CSReg.Reg);
+ if (Offset == RegOffsets.end())
+ continue;
+
+ int RegOffset = Offset->second;
+ if (RegOffset != CurOffset - 4) {
+ DEBUG_WITH_TYPE("compact-unwind",
+ llvm::dbgs() << MRI.getName(CSReg.Reg) << " saved at "
+ << RegOffset << " but only supported at "
+ << CurOffset << "\n");
+ return CU::UNWIND_ARM_MODE_DWARF;
+ }
+ CompactUnwindEncoding |= CSReg.Encoding;
+ CurOffset -= 4;
+ }
+
+ // If no floats saved, we are done.
+ if (FloatRegCount == 0)
+ return CompactUnwindEncoding;
+
+ // Switch mode to include D register saving.
+ CompactUnwindEncoding &= ~CU::UNWIND_ARM_MODE_MASK;
+ CompactUnwindEncoding |= CU::UNWIND_ARM_MODE_FRAME_D;
+
+ // FIXME: supporting more than 4 saved D-registers compactly would be trivial,
+ // but needs coordination with the linker and libunwind.
+ if (FloatRegCount > 4) {
+ DEBUG_WITH_TYPE("compact-unwind",
+ llvm::dbgs() << "unsupported number of D registers saved ("
+ << FloatRegCount << ")\n");
+ return CU::UNWIND_ARM_MODE_DWARF;
+ }
+
+ // Floating point registers must either be saved sequentially, or we defer to
+ // DWARF. No gaps allowed here so check that each saved d-register is
+ // precisely where it should be.
+ static unsigned FPRCSRegs[] = { ARM::D8, ARM::D10, ARM::D12, ARM::D14 };
+ for (int Idx = FloatRegCount - 1; Idx >= 0; --Idx) {
+ auto Offset = RegOffsets.find(FPRCSRegs[Idx]);
+ if (Offset == RegOffsets.end()) {
+ DEBUG_WITH_TYPE("compact-unwind",
+ llvm::dbgs() << FloatRegCount << " D-regs saved, but "
+ << MRI.getName(FPRCSRegs[Idx])
+ << " not saved\n");
+ return CU::UNWIND_ARM_MODE_DWARF;
+ } else if (Offset->second != CurOffset - 8) {
+ DEBUG_WITH_TYPE("compact-unwind",
+ llvm::dbgs() << FloatRegCount << " D-regs saved, but "
+ << MRI.getName(FPRCSRegs[Idx])
+ << " saved at " << Offset->second
+ << ", expected at " << CurOffset - 8
+ << "\n");
+ return CU::UNWIND_ARM_MODE_DWARF;
+ }
+ CurOffset -= 8;
+ }
+
+ return CompactUnwindEncoding | ((FloatRegCount - 1) << 8);
+}
+
+static MachO::CPUSubTypeARM getMachOSubTypeFromArch(StringRef Arch) {
+ unsigned AK = ARM::parseArch(Arch);
+ switch (AK) {
+ default:
+ return MachO::CPU_SUBTYPE_ARM_V7;
+ case ARM::AK_ARMV4T:
+ return MachO::CPU_SUBTYPE_ARM_V4T;
+ case ARM::AK_ARMV5T:
+ case ARM::AK_ARMV5TE:
+ case ARM::AK_ARMV5TEJ:
+ return MachO::CPU_SUBTYPE_ARM_V5;
+ case ARM::AK_ARMV6:
+ case ARM::AK_ARMV6K:
+ return MachO::CPU_SUBTYPE_ARM_V6;
+ case ARM::AK_ARMV7A:
+ return MachO::CPU_SUBTYPE_ARM_V7;
+ case ARM::AK_ARMV7S:
+ return MachO::CPU_SUBTYPE_ARM_V7S;
+ case ARM::AK_ARMV7K:
+ return MachO::CPU_SUBTYPE_ARM_V7K;
+ case ARM::AK_ARMV6M:
+ return MachO::CPU_SUBTYPE_ARM_V6M;
+ case ARM::AK_ARMV7M:
+ return MachO::CPU_SUBTYPE_ARM_V7M;
+ case ARM::AK_ARMV7EM:
+ return MachO::CPU_SUBTYPE_ARM_V7EM;
+ }
+}
+
+MCAsmBackend *llvm::createARMAsmBackend(const Target &T,
+ const MCRegisterInfo &MRI,
+ const Triple &TheTriple, StringRef CPU,
+ bool isLittle) {
+ switch (TheTriple.getObjectFormat()) {
+ default:
+ llvm_unreachable("unsupported object format");
+ case Triple::MachO: {
+ MachO::CPUSubTypeARM CS = getMachOSubTypeFromArch(TheTriple.getArchName());
+ return new ARMAsmBackendDarwin(T, TheTriple, MRI, CS);
+ }
+ case Triple::COFF:
+ assert(TheTriple.isOSWindows() && "non-Windows ARM COFF is not supported");
+ return new ARMAsmBackendWinCOFF(T, TheTriple);
+ case Triple::ELF:
+ assert(TheTriple.isOSBinFormatELF() && "using ELF for non-ELF target");
+ uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TheTriple.getOS());
+ return new ARMAsmBackendELF(T, TheTriple, OSABI, isLittle);
+ }
+}
+
+MCAsmBackend *llvm::createARMLEAsmBackend(const Target &T,
+ const MCRegisterInfo &MRI,
+ const Triple &TT, StringRef CPU) {
+ return createARMAsmBackend(T, MRI, TT, CPU, true);
+}
+
+MCAsmBackend *llvm::createARMBEAsmBackend(const Target &T,
+ const MCRegisterInfo &MRI,
+ const Triple &TT, StringRef CPU) {
+ return createARMAsmBackend(T, MRI, TT, CPU, false);
+}
+
+MCAsmBackend *llvm::createThumbLEAsmBackend(const Target &T,
+ const MCRegisterInfo &MRI,
+ const Triple &TT, StringRef CPU) {
+ return createARMAsmBackend(T, MRI, TT, CPU, true);
+}
+
+MCAsmBackend *llvm::createThumbBEAsmBackend(const Target &T,
+ const MCRegisterInfo &MRI,
+ const Triple &TT, StringRef CPU) {
+ return createARMAsmBackend(T, MRI, TT, CPU, false);
+}
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h
new file mode 100644
index 0000000..28a6213
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h
@@ -0,0 +1,79 @@
+//===-- ARMAsmBackend.h - ARM Assembler Backend -----------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_ARM_ARMASMBACKEND_H
+#define LLVM_LIB_TARGET_ARM_ARMASMBACKEND_H
+
+#include "MCTargetDesc/ARMFixupKinds.h"
+#include "llvm/MC/MCAsmBackend.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+
+using namespace llvm;
+
+namespace {
+
+class ARMAsmBackend : public MCAsmBackend {
+ const MCSubtargetInfo *STI;
+ bool isThumbMode; // Currently emitting Thumb code.
+ bool IsLittleEndian; // Big or little endian.
+public:
+ ARMAsmBackend(const Target &T, const Triple &TT, bool IsLittle)
+ : MCAsmBackend(), STI(ARM_MC::createARMMCSubtargetInfo(TT, "", "")),
+ isThumbMode(TT.getArchName().startswith("thumb")),
+ IsLittleEndian(IsLittle) {}
+
+ ~ARMAsmBackend() override { delete STI; }
+
+ unsigned getNumFixupKinds() const override {
+ return ARM::NumTargetFixupKinds;
+ }
+
+ bool hasNOP() const { return STI->getFeatureBits()[ARM::HasV6T2Ops]; }
+
+ const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override;
+
+ /// processFixupValue - Target hook to process the literal value of a fixup
+ /// if necessary.
+ void processFixupValue(const MCAssembler &Asm, const MCAsmLayout &Layout,
+ const MCFixup &Fixup, const MCFragment *DF,
+ const MCValue &Target, uint64_t &Value,
+ bool &IsResolved) override;
+
+ unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value, bool IsPCRel,
+ MCContext *Ctx, bool IsLittleEndian,
+ bool IsResolved) const;
+
+ void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
+ uint64_t Value, bool IsPCRel) const override;
+
+ unsigned getRelaxedOpcode(unsigned Op) const;
+
+ bool mayNeedRelaxation(const MCInst &Inst) const override;
+
+ const char *reasonForFixupRelaxation(const MCFixup &Fixup,
+ uint64_t Value) const;
+
+ bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value,
+ const MCRelaxableFragment *DF,
+ const MCAsmLayout &Layout) const override;
+
+ void relaxInstruction(const MCInst &Inst, MCInst &Res) const override;
+
+ bool writeNopData(uint64_t Count, MCObjectWriter *OW) const override;
+
+ void handleAssemblerFlag(MCAssemblerFlag Flag) override;
+
+ unsigned getPointerSize() const { return 4; }
+ bool isThumb() const { return isThumbMode; }
+ void setIsThumb(bool it) { isThumbMode = it; }
+ bool isLittle() const { return IsLittleEndian; }
+};
+} // end anonymous namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h
new file mode 100644
index 0000000..995dd0f
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h
@@ -0,0 +1,38 @@
+//===-- ARMAsmBackendDarwin.h ARM Asm Backend Darwin ----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_ARM_ARMASMBACKENDDARWIN_H
+#define LLVM_LIB_TARGET_ARM_ARMASMBACKENDDARWIN_H
+
+#include "llvm/Support/MachO.h"
+
+using namespace llvm;
+
+namespace {
+class ARMAsmBackendDarwin : public ARMAsmBackend {
+ const MCRegisterInfo &MRI;
+public:
+ const MachO::CPUSubTypeARM Subtype;
+ ARMAsmBackendDarwin(const Target &T, const Triple &TT,
+ const MCRegisterInfo &MRI, MachO::CPUSubTypeARM st)
+ : ARMAsmBackend(T, TT, /* IsLittleEndian */ true), MRI(MRI), Subtype(st) {
+ HasDataInCodeSupport = true;
+ }
+
+ MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override {
+ return createARMMachObjectWriter(OS, /*Is64Bit=*/false, MachO::CPU_TYPE_ARM,
+ Subtype);
+ }
+
+ uint32_t generateCompactUnwindEncoding(
+ ArrayRef<MCCFIInstruction> Instrs) const override;
+};
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackendELF.h b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackendELF.h
new file mode 100644
index 0000000..68b12ed
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackendELF.h
@@ -0,0 +1,28 @@
+//===-- ARMAsmBackendELF.h ARM Asm Backend ELF -----------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_ARM_ELFARMASMBACKEND_H
+#define LLVM_LIB_TARGET_ARM_ELFARMASMBACKEND_H
+
+using namespace llvm;
+namespace {
+class ARMAsmBackendELF : public ARMAsmBackend {
+public:
+ uint8_t OSABI;
+ ARMAsmBackendELF(const Target &T, const Triple &TT, uint8_t OSABI,
+ bool IsLittle)
+ : ARMAsmBackend(T, TT, IsLittle), OSABI(OSABI) {}
+
+ MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override {
+ return createARMELFObjectWriter(OS, OSABI, isLittle());
+ }
+};
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackendWinCOFF.h b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackendWinCOFF.h
new file mode 100644
index 0000000..170f59a
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackendWinCOFF.h
@@ -0,0 +1,26 @@
+//===-- ARMAsmBackendWinCOFF.h - ARM Asm Backend WinCOFF --------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_ARM_ARMASMBACKENDWINCOFF_H
+#define LLVM_LIB_TARGET_ARM_ARMASMBACKENDWINCOFF_H
+
+using namespace llvm;
+
+namespace {
+class ARMAsmBackendWinCOFF : public ARMAsmBackend {
+public:
+ ARMAsmBackendWinCOFF(const Target &T, const Triple &TheTriple)
+ : ARMAsmBackend(T, TheTriple, true) {}
+ MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override {
+ return createARMWinCOFFObjectWriter(OS, /*Is64Bit=*/false);
+ }
+};
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h
new file mode 100644
index 0000000..4289a73
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h
@@ -0,0 +1,464 @@
+//===-- ARMBaseInfo.h - Top level definitions for ARM -------- --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains small standalone helper functions and enum definitions for
+// the ARM target useful for the compiler back-end and the MC libraries.
+// As such, it deliberately does not include references to LLVM core
+// code gen types, passes, etc..
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_ARM_MCTARGETDESC_ARMBASEINFO_H
+#define LLVM_LIB_TARGET_ARM_MCTARGETDESC_ARMBASEINFO_H
+
+#include "ARMMCTargetDesc.h"
+#include "llvm/Support/ErrorHandling.h"
+
+namespace llvm {
+
+// Enums corresponding to ARM condition codes
+namespace ARMCC {
+ // The CondCodes constants map directly to the 4-bit encoding of the
+ // condition field for predicated instructions.
+ enum CondCodes { // Meaning (integer) Meaning (floating-point)
+ EQ, // Equal Equal
+ NE, // Not equal Not equal, or unordered
+ HS, // Carry set >, ==, or unordered
+ LO, // Carry clear Less than
+ MI, // Minus, negative Less than
+ PL, // Plus, positive or zero >, ==, or unordered
+ VS, // Overflow Unordered
+ VC, // No overflow Not unordered
+ HI, // Unsigned higher Greater than, or unordered
+ LS, // Unsigned lower or same Less than or equal
+ GE, // Greater than or equal Greater than or equal
+ LT, // Less than Less than, or unordered
+ GT, // Greater than Greater than
+ LE, // Less than or equal <, ==, or unordered
+ AL // Always (unconditional) Always (unconditional)
+ };
+
+ inline static CondCodes getOppositeCondition(CondCodes CC) {
+ switch (CC) {
+ default: llvm_unreachable("Unknown condition code");
+ case EQ: return NE;
+ case NE: return EQ;
+ case HS: return LO;
+ case LO: return HS;
+ case MI: return PL;
+ case PL: return MI;
+ case VS: return VC;
+ case VC: return VS;
+ case HI: return LS;
+ case LS: return HI;
+ case GE: return LT;
+ case LT: return GE;
+ case GT: return LE;
+ case LE: return GT;
+ }
+ }
+} // namespace ARMCC
+
+inline static const char *ARMCondCodeToString(ARMCC::CondCodes CC) {
+ switch (CC) {
+ case ARMCC::EQ: return "eq";
+ case ARMCC::NE: return "ne";
+ case ARMCC::HS: return "hs";
+ case ARMCC::LO: return "lo";
+ case ARMCC::MI: return "mi";
+ case ARMCC::PL: return "pl";
+ case ARMCC::VS: return "vs";
+ case ARMCC::VC: return "vc";
+ case ARMCC::HI: return "hi";
+ case ARMCC::LS: return "ls";
+ case ARMCC::GE: return "ge";
+ case ARMCC::LT: return "lt";
+ case ARMCC::GT: return "gt";
+ case ARMCC::LE: return "le";
+ case ARMCC::AL: return "al";
+ }
+ llvm_unreachable("Unknown condition code");
+}
+
+namespace ARM_PROC {
+ enum IMod {
+ IE = 2,
+ ID = 3
+ };
+
+ enum IFlags {
+ F = 1,
+ I = 2,
+ A = 4
+ };
+
+ inline static const char *IFlagsToString(unsigned val) {
+ switch (val) {
+ default: llvm_unreachable("Unknown iflags operand");
+ case F: return "f";
+ case I: return "i";
+ case A: return "a";
+ }
+ }
+
+ inline static const char *IModToString(unsigned val) {
+ switch (val) {
+ default: llvm_unreachable("Unknown imod operand");
+ case IE: return "ie";
+ case ID: return "id";
+ }
+ }
+}
+
+namespace ARM_MB {
+ // The Memory Barrier Option constants map directly to the 4-bit encoding of
+ // the option field for memory barrier operations.
+ enum MemBOpt {
+ RESERVED_0 = 0,
+ OSHLD = 1,
+ OSHST = 2,
+ OSH = 3,
+ RESERVED_4 = 4,
+ NSHLD = 5,
+ NSHST = 6,
+ NSH = 7,
+ RESERVED_8 = 8,
+ ISHLD = 9,
+ ISHST = 10,
+ ISH = 11,
+ RESERVED_12 = 12,
+ LD = 13,
+ ST = 14,
+ SY = 15
+ };
+
+ inline static const char *MemBOptToString(unsigned val, bool HasV8) {
+ switch (val) {
+ default: llvm_unreachable("Unknown memory operation");
+ case SY: return "sy";
+ case ST: return "st";
+ case LD: return HasV8 ? "ld" : "#0xd";
+ case RESERVED_12: return "#0xc";
+ case ISH: return "ish";
+ case ISHST: return "ishst";
+ case ISHLD: return HasV8 ? "ishld" : "#0x9";
+ case RESERVED_8: return "#0x8";
+ case NSH: return "nsh";
+ case NSHST: return "nshst";
+ case NSHLD: return HasV8 ? "nshld" : "#0x5";
+ case RESERVED_4: return "#0x4";
+ case OSH: return "osh";
+ case OSHST: return "oshst";
+ case OSHLD: return HasV8 ? "oshld" : "#0x1";
+ case RESERVED_0: return "#0x0";
+ }
+ }
+} // namespace ARM_MB
+
+namespace ARM_ISB {
+ enum InstSyncBOpt {
+ RESERVED_0 = 0,
+ RESERVED_1 = 1,
+ RESERVED_2 = 2,
+ RESERVED_3 = 3,
+ RESERVED_4 = 4,
+ RESERVED_5 = 5,
+ RESERVED_6 = 6,
+ RESERVED_7 = 7,
+ RESERVED_8 = 8,
+ RESERVED_9 = 9,
+ RESERVED_10 = 10,
+ RESERVED_11 = 11,
+ RESERVED_12 = 12,
+ RESERVED_13 = 13,
+ RESERVED_14 = 14,
+ SY = 15
+ };
+
+ inline static const char *InstSyncBOptToString(unsigned val) {
+ switch (val) {
+ default:
+ llvm_unreachable("Unknown memory operation");
+ case RESERVED_0: return "#0x0";
+ case RESERVED_1: return "#0x1";
+ case RESERVED_2: return "#0x2";
+ case RESERVED_3: return "#0x3";
+ case RESERVED_4: return "#0x4";
+ case RESERVED_5: return "#0x5";
+ case RESERVED_6: return "#0x6";
+ case RESERVED_7: return "#0x7";
+ case RESERVED_8: return "#0x8";
+ case RESERVED_9: return "#0x9";
+ case RESERVED_10: return "#0xa";
+ case RESERVED_11: return "#0xb";
+ case RESERVED_12: return "#0xc";
+ case RESERVED_13: return "#0xd";
+ case RESERVED_14: return "#0xe";
+ case SY: return "sy";
+ }
+ }
+} // namespace ARM_ISB
+
+/// isARMLowRegister - Returns true if the register is a low register (r0-r7).
+///
+static inline bool isARMLowRegister(unsigned Reg) {
+ using namespace ARM;
+ switch (Reg) {
+ case R0: case R1: case R2: case R3:
+ case R4: case R5: case R6: case R7:
+ return true;
+ default:
+ return false;
+ }
+}
+
+/// ARMII - This namespace holds all of the target specific flags that
+/// instruction info tracks.
+///
+namespace ARMII {
+
+ /// ARM Index Modes
+ enum IndexMode {
+ IndexModeNone = 0,
+ IndexModePre = 1,
+ IndexModePost = 2,
+ IndexModeUpd = 3
+ };
+
+ /// ARM Addressing Modes
+ enum AddrMode {
+ AddrModeNone = 0,
+ AddrMode1 = 1,
+ AddrMode2 = 2,
+ AddrMode3 = 3,
+ AddrMode4 = 4,
+ AddrMode5 = 5,
+ AddrMode6 = 6,
+ AddrModeT1_1 = 7,
+ AddrModeT1_2 = 8,
+ AddrModeT1_4 = 9,
+ AddrModeT1_s = 10, // i8 * 4 for pc and sp relative data
+ AddrModeT2_i12 = 11,
+ AddrModeT2_i8 = 12,
+ AddrModeT2_so = 13,
+ AddrModeT2_pc = 14, // +/- i12 for pc relative data
+ AddrModeT2_i8s4 = 15, // i8 * 4
+ AddrMode_i12 = 16
+ };
+
+ inline static const char *AddrModeToString(AddrMode addrmode) {
+ switch (addrmode) {
+ case AddrModeNone: return "AddrModeNone";
+ case AddrMode1: return "AddrMode1";
+ case AddrMode2: return "AddrMode2";
+ case AddrMode3: return "AddrMode3";
+ case AddrMode4: return "AddrMode4";
+ case AddrMode5: return "AddrMode5";
+ case AddrMode6: return "AddrMode6";
+ case AddrModeT1_1: return "AddrModeT1_1";
+ case AddrModeT1_2: return "AddrModeT1_2";
+ case AddrModeT1_4: return "AddrModeT1_4";
+ case AddrModeT1_s: return "AddrModeT1_s";
+ case AddrModeT2_i12: return "AddrModeT2_i12";
+ case AddrModeT2_i8: return "AddrModeT2_i8";
+ case AddrModeT2_so: return "AddrModeT2_so";
+ case AddrModeT2_pc: return "AddrModeT2_pc";
+ case AddrModeT2_i8s4: return "AddrModeT2_i8s4";
+ case AddrMode_i12: return "AddrMode_i12";
+ }
+ }
+
+ /// Target Operand Flag enum.
+ enum TOF {
+ //===------------------------------------------------------------------===//
+ // ARM Specific MachineOperand flags.
+
+ MO_NO_FLAG = 0,
+
+ /// MO_LO16 - On a symbol operand, this represents a relocation containing
+ /// lower 16 bit of the address. Used only via movw instruction.
+ MO_LO16 = 0x1,
+
+ /// MO_HI16 - On a symbol operand, this represents a relocation containing
+ /// higher 16 bit of the address. Used only via movt instruction.
+ MO_HI16 = 0x2,
+
+ /// MO_PLT - On a symbol operand, this represents an ELF PLT reference on a
+ /// call operand.
+ MO_PLT = 0x3,
+
+ /// MO_OPTION_MASK - Most flags are mutually exclusive; this mask selects
+ /// just that part of the flag set.
+ MO_OPTION_MASK = 0x3f,
+
+ /// MO_DLLIMPORT - On a symbol operand, this represents that the reference
+ /// to the symbol is for an import stub. This is used for DLL import
+ /// storage class indication on Windows.
+ MO_DLLIMPORT = 0x40,
+
+ /// MO_NONLAZY - This is an independent flag, on a symbol operand "FOO" it
+ /// represents a symbol which, if indirect, will get special Darwin mangling
+ /// as a non-lazy-ptr indirect symbol (i.e. "L_FOO$non_lazy_ptr"). Can be
+ /// combined with MO_LO16, MO_HI16 or MO_NO_FLAG (in a constant-pool, for
+ /// example).
+ MO_NONLAZY = 0x80,
+
+ // It's undefined behaviour if an enum overflows the range between its
+ // smallest and largest values, but since these are |ed together, it can
+ // happen. Put a sentinel in (values of this enum are stored as "unsigned
+ // char").
+ MO_UNUSED_MAXIMUM = 0xff
+ };
+
+ enum {
+ //===------------------------------------------------------------------===//
+ // Instruction Flags.
+
+ //===------------------------------------------------------------------===//
+ // This four-bit field describes the addressing mode used.
+ AddrModeMask = 0x1f, // The AddrMode enums are declared in ARMBaseInfo.h
+
+ // IndexMode - Unindex, pre-indexed, or post-indexed are valid for load
+ // and store ops only. Generic "updating" flag is used for ld/st multiple.
+ // The index mode enums are declared in ARMBaseInfo.h
+ IndexModeShift = 5,
+ IndexModeMask = 3 << IndexModeShift,
+
+ //===------------------------------------------------------------------===//
+ // Instruction encoding formats.
+ //
+ FormShift = 7,
+ FormMask = 0x3f << FormShift,
+
+ // Pseudo instructions
+ Pseudo = 0 << FormShift,
+
+ // Multiply instructions
+ MulFrm = 1 << FormShift,
+
+ // Branch instructions
+ BrFrm = 2 << FormShift,
+ BrMiscFrm = 3 << FormShift,
+
+ // Data Processing instructions
+ DPFrm = 4 << FormShift,
+ DPSoRegFrm = 5 << FormShift,
+
+ // Load and Store
+ LdFrm = 6 << FormShift,
+ StFrm = 7 << FormShift,
+ LdMiscFrm = 8 << FormShift,
+ StMiscFrm = 9 << FormShift,
+ LdStMulFrm = 10 << FormShift,
+
+ LdStExFrm = 11 << FormShift,
+
+ // Miscellaneous arithmetic instructions
+ ArithMiscFrm = 12 << FormShift,
+ SatFrm = 13 << FormShift,
+
+ // Extend instructions
+ ExtFrm = 14 << FormShift,
+
+ // VFP formats
+ VFPUnaryFrm = 15 << FormShift,
+ VFPBinaryFrm = 16 << FormShift,
+ VFPConv1Frm = 17 << FormShift,
+ VFPConv2Frm = 18 << FormShift,
+ VFPConv3Frm = 19 << FormShift,
+ VFPConv4Frm = 20 << FormShift,
+ VFPConv5Frm = 21 << FormShift,
+ VFPLdStFrm = 22 << FormShift,
+ VFPLdStMulFrm = 23 << FormShift,
+ VFPMiscFrm = 24 << FormShift,
+
+ // Thumb format
+ ThumbFrm = 25 << FormShift,
+
+ // Miscelleaneous format
+ MiscFrm = 26 << FormShift,
+
+ // NEON formats
+ NGetLnFrm = 27 << FormShift,
+ NSetLnFrm = 28 << FormShift,
+ NDupFrm = 29 << FormShift,
+ NLdStFrm = 30 << FormShift,
+ N1RegModImmFrm= 31 << FormShift,
+ N2RegFrm = 32 << FormShift,
+ NVCVTFrm = 33 << FormShift,
+ NVDupLnFrm = 34 << FormShift,
+ N2RegVShLFrm = 35 << FormShift,
+ N2RegVShRFrm = 36 << FormShift,
+ N3RegFrm = 37 << FormShift,
+ N3RegVShFrm = 38 << FormShift,
+ NVExtFrm = 39 << FormShift,
+ NVMulSLFrm = 40 << FormShift,
+ NVTBLFrm = 41 << FormShift,
+
+ //===------------------------------------------------------------------===//
+ // Misc flags.
+
+ // UnaryDP - Indicates this is a unary data processing instruction, i.e.
+ // it doesn't have a Rn operand.
+ UnaryDP = 1 << 13,
+
+ // Xform16Bit - Indicates this Thumb2 instruction may be transformed into
+ // a 16-bit Thumb instruction if certain conditions are met.
+ Xform16Bit = 1 << 14,
+
+ // ThumbArithFlagSetting - The instruction is a 16-bit flag setting Thumb
+ // instruction. Used by the parser to determine whether to require the 'S'
+ // suffix on the mnemonic (when not in an IT block) or preclude it (when
+ // in an IT block).
+ ThumbArithFlagSetting = 1 << 18,
+
+ //===------------------------------------------------------------------===//
+ // Code domain.
+ DomainShift = 15,
+ DomainMask = 7 << DomainShift,
+ DomainGeneral = 0 << DomainShift,
+ DomainVFP = 1 << DomainShift,
+ DomainNEON = 2 << DomainShift,
+ DomainNEONA8 = 4 << DomainShift,
+
+ //===------------------------------------------------------------------===//
+ // Field shifts - such shifts are used to set field while generating
+ // machine instructions.
+ //
+ // FIXME: This list will need adjusting/fixing as the MC code emitter
+ // takes shape and the ARMCodeEmitter.cpp bits go away.
+ ShiftTypeShift = 4,
+
+ M_BitShift = 5,
+ ShiftImmShift = 5,
+ ShiftShift = 7,
+ N_BitShift = 7,
+ ImmHiShift = 8,
+ SoRotImmShift = 8,
+ RegRsShift = 8,
+ ExtRotImmShift = 10,
+ RegRdLoShift = 12,
+ RegRdShift = 12,
+ RegRdHiShift = 16,
+ RegRnShift = 16,
+ S_BitShift = 20,
+ W_BitShift = 21,
+ AM3_I_BitShift = 22,
+ D_BitShift = 22,
+ U_BitShift = 23,
+ P_BitShift = 24,
+ I_BitShift = 25,
+ CondShift = 28
+ };
+
+} // end namespace ARMII
+
+} // end namespace llvm;
+
+#endif
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp
new file mode 100644
index 0000000..52eba8be
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp
@@ -0,0 +1,259 @@
+//===-- ARMELFObjectWriter.cpp - ARM ELF Writer ---------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/ARMMCTargetDesc.h"
+#include "MCTargetDesc/ARMFixupKinds.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/MC/MCELFObjectWriter.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCValue.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+namespace {
+ class ARMELFObjectWriter : public MCELFObjectTargetWriter {
+ enum { DefaultEABIVersion = 0x05000000U };
+ unsigned GetRelocTypeInner(const MCValue &Target,
+ const MCFixup &Fixup,
+ bool IsPCRel) const;
+
+
+ public:
+ ARMELFObjectWriter(uint8_t OSABI);
+
+ ~ARMELFObjectWriter() override;
+
+ unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup,
+ bool IsPCRel) const override;
+
+ bool needsRelocateWithSymbol(const MCSymbol &Sym,
+ unsigned Type) const override;
+ };
+}
+
+ARMELFObjectWriter::ARMELFObjectWriter(uint8_t OSABI)
+ : MCELFObjectTargetWriter(/*Is64Bit*/ false, OSABI,
+ ELF::EM_ARM,
+ /*HasRelocationAddend*/ false) {}
+
+ARMELFObjectWriter::~ARMELFObjectWriter() {}
+
+bool ARMELFObjectWriter::needsRelocateWithSymbol(const MCSymbol &Sym,
+ unsigned Type) const {
+ // FIXME: This is extremely conservative. This really needs to use a
+ // whitelist with a clear explanation for why each realocation needs to
+ // point to the symbol, not to the section.
+ switch (Type) {
+ default:
+ return true;
+
+ case ELF::R_ARM_PREL31:
+ case ELF::R_ARM_ABS32:
+ return false;
+ }
+}
+
+// Need to examine the Fixup when determining whether to
+// emit the relocation as an explicit symbol or as a section relative
+// offset
+unsigned ARMELFObjectWriter::GetRelocType(const MCValue &Target,
+ const MCFixup &Fixup,
+ bool IsPCRel) const {
+ return GetRelocTypeInner(Target, Fixup, IsPCRel);
+}
+
+unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target,
+ const MCFixup &Fixup,
+ bool IsPCRel) const {
+ MCSymbolRefExpr::VariantKind Modifier = Target.getAccessVariant();
+
+ unsigned Type = 0;
+ if (IsPCRel) {
+ switch ((unsigned)Fixup.getKind()) {
+ default:
+ report_fatal_error("unsupported relocation on symbol");
+ return ELF::R_ARM_NONE;
+ case FK_Data_4:
+ switch (Modifier) {
+ default: llvm_unreachable("Unsupported Modifier");
+ case MCSymbolRefExpr::VK_None:
+ Type = ELF::R_ARM_REL32;
+ break;
+ case MCSymbolRefExpr::VK_TLSGD:
+ llvm_unreachable("unimplemented");
+ case MCSymbolRefExpr::VK_GOTTPOFF:
+ Type = ELF::R_ARM_TLS_IE32;
+ break;
+ case MCSymbolRefExpr::VK_ARM_GOT_PREL:
+ Type = ELF::R_ARM_GOT_PREL;
+ break;
+ }
+ break;
+ case ARM::fixup_arm_blx:
+ case ARM::fixup_arm_uncondbl:
+ switch (Modifier) {
+ case MCSymbolRefExpr::VK_PLT:
+ Type = ELF::R_ARM_CALL;
+ break;
+ case MCSymbolRefExpr::VK_ARM_TLSCALL:
+ Type = ELF::R_ARM_TLS_CALL;
+ break;
+ default:
+ Type = ELF::R_ARM_CALL;
+ break;
+ }
+ break;
+ case ARM::fixup_arm_condbl:
+ case ARM::fixup_arm_condbranch:
+ case ARM::fixup_arm_uncondbranch:
+ Type = ELF::R_ARM_JUMP24;
+ break;
+ case ARM::fixup_t2_condbranch:
+ case ARM::fixup_t2_uncondbranch:
+ Type = ELF::R_ARM_THM_JUMP24;
+ break;
+ case ARM::fixup_arm_movt_hi16:
+ Type = ELF::R_ARM_MOVT_PREL;
+ break;
+ case ARM::fixup_arm_movw_lo16:
+ Type = ELF::R_ARM_MOVW_PREL_NC;
+ break;
+ case ARM::fixup_t2_movt_hi16:
+ Type = ELF::R_ARM_THM_MOVT_PREL;
+ break;
+ case ARM::fixup_t2_movw_lo16:
+ Type = ELF::R_ARM_THM_MOVW_PREL_NC;
+ break;
+ case ARM::fixup_arm_thumb_bl:
+ case ARM::fixup_arm_thumb_blx:
+ switch (Modifier) {
+ case MCSymbolRefExpr::VK_ARM_TLSCALL:
+ Type = ELF::R_ARM_THM_TLS_CALL;
+ break;
+ default:
+ Type = ELF::R_ARM_THM_CALL;
+ break;
+ }
+ break;
+ }
+ } else {
+ switch ((unsigned)Fixup.getKind()) {
+ default:
+ report_fatal_error("unsupported relocation on symbol");
+ return ELF::R_ARM_NONE;
+ case FK_Data_1:
+ switch (Modifier) {
+ default: llvm_unreachable("unsupported Modifier");
+ case MCSymbolRefExpr::VK_None:
+ Type = ELF::R_ARM_ABS8;
+ break;
+ }
+ break;
+ case FK_Data_2:
+ switch (Modifier) {
+ default: llvm_unreachable("unsupported modifier");
+ case MCSymbolRefExpr::VK_None:
+ Type = ELF::R_ARM_ABS16;
+ break;
+ }
+ break;
+ case FK_Data_4:
+ switch (Modifier) {
+ default: llvm_unreachable("Unsupported Modifier");
+ case MCSymbolRefExpr::VK_ARM_NONE:
+ Type = ELF::R_ARM_NONE;
+ break;
+ case MCSymbolRefExpr::VK_GOT:
+ Type = ELF::R_ARM_GOT_BREL;
+ break;
+ case MCSymbolRefExpr::VK_TLSGD:
+ Type = ELF::R_ARM_TLS_GD32;
+ break;
+ case MCSymbolRefExpr::VK_TPOFF:
+ Type = ELF::R_ARM_TLS_LE32;
+ break;
+ case MCSymbolRefExpr::VK_GOTTPOFF:
+ Type = ELF::R_ARM_TLS_IE32;
+ break;
+ case MCSymbolRefExpr::VK_None:
+ Type = ELF::R_ARM_ABS32;
+ break;
+ case MCSymbolRefExpr::VK_GOTOFF:
+ Type = ELF::R_ARM_GOTOFF32;
+ break;
+ case MCSymbolRefExpr::VK_ARM_GOT_PREL:
+ Type = ELF::R_ARM_GOT_PREL;
+ break;
+ case MCSymbolRefExpr::VK_ARM_TARGET1:
+ Type = ELF::R_ARM_TARGET1;
+ break;
+ case MCSymbolRefExpr::VK_ARM_TARGET2:
+ Type = ELF::R_ARM_TARGET2;
+ break;
+ case MCSymbolRefExpr::VK_ARM_PREL31:
+ Type = ELF::R_ARM_PREL31;
+ break;
+ case MCSymbolRefExpr::VK_ARM_SBREL:
+ Type = ELF::R_ARM_SBREL32;
+ break;
+ case MCSymbolRefExpr::VK_ARM_TLSLDO:
+ Type = ELF::R_ARM_TLS_LDO32;
+ break;
+ case MCSymbolRefExpr::VK_ARM_TLSCALL:
+ Type = ELF::R_ARM_TLS_CALL;
+ break;
+ case MCSymbolRefExpr::VK_ARM_TLSDESC:
+ Type = ELF::R_ARM_TLS_GOTDESC;
+ break;
+ case MCSymbolRefExpr::VK_ARM_TLSDESCSEQ:
+ Type = ELF::R_ARM_TLS_DESCSEQ;
+ break;
+ }
+ break;
+ case ARM::fixup_arm_ldst_pcrel_12:
+ case ARM::fixup_arm_pcrel_10:
+ case ARM::fixup_arm_adr_pcrel_12:
+ case ARM::fixup_arm_thumb_bl:
+ case ARM::fixup_arm_thumb_cb:
+ case ARM::fixup_arm_thumb_cp:
+ case ARM::fixup_arm_thumb_br:
+ llvm_unreachable("Unimplemented");
+ case ARM::fixup_arm_condbranch:
+ case ARM::fixup_arm_uncondbranch:
+ Type = ELF::R_ARM_JUMP24;
+ break;
+ case ARM::fixup_arm_movt_hi16:
+ Type = ELF::R_ARM_MOVT_ABS;
+ break;
+ case ARM::fixup_arm_movw_lo16:
+ Type = ELF::R_ARM_MOVW_ABS_NC;
+ break;
+ case ARM::fixup_t2_movt_hi16:
+ Type = ELF::R_ARM_THM_MOVT_ABS;
+ break;
+ case ARM::fixup_t2_movw_lo16:
+ Type = ELF::R_ARM_THM_MOVW_ABS_NC;
+ break;
+ }
+ }
+
+ return Type;
+}
+
+MCObjectWriter *llvm::createARMELFObjectWriter(raw_pwrite_stream &OS,
+ uint8_t OSABI,
+ bool IsLittleEndian) {
+ MCELFObjectTargetWriter *MOTW = new ARMELFObjectWriter(OSABI);
+ return createELFObjectWriter(MOTW, OS, IsLittleEndian);
+}
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
new file mode 100644
index 0000000..6084f22
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
@@ -0,0 +1,1377 @@
+//===- lib/MC/ARMELFStreamer.cpp - ELF Object Output for ARM --------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file assembles .s files and emits ARM ELF .o object files. Different
+// from generic ELF streamer in emitting mapping symbols ($a, $t and $d) to
+// delimit regions of data and code.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARMRegisterInfo.h"
+#include "ARMUnwindOpAsm.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/MC/MCAsmBackend.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCELFStreamer.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstPrinter.h"
+#include "llvm/MC/MCObjectFileInfo.h"
+#include "llvm/MC/MCObjectStreamer.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSection.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbolELF.h"
+#include "llvm/MC/MCValue.h"
+#include "llvm/Support/ARMBuildAttributes.h"
+#include "llvm/Support/ARMEHABI.h"
+#include "llvm/Support/TargetParser.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ELF.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/LEB128.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+
+using namespace llvm;
+
+static std::string GetAEABIUnwindPersonalityName(unsigned Index) {
+ assert(Index < ARM::EHABI::NUM_PERSONALITY_INDEX &&
+ "Invalid personality index");
+ return (Twine("__aeabi_unwind_cpp_pr") + Twine(Index)).str();
+}
+
+namespace {
+
+class ARMELFStreamer;
+
+class ARMTargetAsmStreamer : public ARMTargetStreamer {
+ formatted_raw_ostream &OS;
+ MCInstPrinter &InstPrinter;
+ bool IsVerboseAsm;
+
+ void emitFnStart() override;
+ void emitFnEnd() override;
+ void emitCantUnwind() override;
+ void emitPersonality(const MCSymbol *Personality) override;
+ void emitPersonalityIndex(unsigned Index) override;
+ void emitHandlerData() override;
+ void emitSetFP(unsigned FpReg, unsigned SpReg, int64_t Offset = 0) override;
+ void emitMovSP(unsigned Reg, int64_t Offset = 0) override;
+ void emitPad(int64_t Offset) override;
+ void emitRegSave(const SmallVectorImpl<unsigned> &RegList,
+ bool isVector) override;
+ void emitUnwindRaw(int64_t Offset,
+ const SmallVectorImpl<uint8_t> &Opcodes) override;
+
+ void switchVendor(StringRef Vendor) override;
+ void emitAttribute(unsigned Attribute, unsigned Value) override;
+ void emitTextAttribute(unsigned Attribute, StringRef String) override;
+ void emitIntTextAttribute(unsigned Attribute, unsigned IntValue,
+ StringRef StringValue) override;
+ void emitArch(unsigned Arch) override;
+ void emitArchExtension(unsigned ArchExt) override;
+ void emitObjectArch(unsigned Arch) override;
+ void emitFPU(unsigned FPU) override;
+ void emitInst(uint32_t Inst, char Suffix = '\0') override;
+ void finishAttributeSection() override;
+
+ void AnnotateTLSDescriptorSequence(const MCSymbolRefExpr *SRE) override;
+ void emitThumbSet(MCSymbol *Symbol, const MCExpr *Value) override;
+
+public:
+ ARMTargetAsmStreamer(MCStreamer &S, formatted_raw_ostream &OS,
+ MCInstPrinter &InstPrinter, bool VerboseAsm);
+};
+
+ARMTargetAsmStreamer::ARMTargetAsmStreamer(MCStreamer &S,
+ formatted_raw_ostream &OS,
+ MCInstPrinter &InstPrinter,
+ bool VerboseAsm)
+ : ARMTargetStreamer(S), OS(OS), InstPrinter(InstPrinter),
+ IsVerboseAsm(VerboseAsm) {}
+void ARMTargetAsmStreamer::emitFnStart() { OS << "\t.fnstart\n"; }
+void ARMTargetAsmStreamer::emitFnEnd() { OS << "\t.fnend\n"; }
+void ARMTargetAsmStreamer::emitCantUnwind() { OS << "\t.cantunwind\n"; }
+void ARMTargetAsmStreamer::emitPersonality(const MCSymbol *Personality) {
+ OS << "\t.personality " << Personality->getName() << '\n';
+}
+void ARMTargetAsmStreamer::emitPersonalityIndex(unsigned Index) {
+ OS << "\t.personalityindex " << Index << '\n';
+}
+void ARMTargetAsmStreamer::emitHandlerData() { OS << "\t.handlerdata\n"; }
+void ARMTargetAsmStreamer::emitSetFP(unsigned FpReg, unsigned SpReg,
+ int64_t Offset) {
+ OS << "\t.setfp\t";
+ InstPrinter.printRegName(OS, FpReg);
+ OS << ", ";
+ InstPrinter.printRegName(OS, SpReg);
+ if (Offset)
+ OS << ", #" << Offset;
+ OS << '\n';
+}
+void ARMTargetAsmStreamer::emitMovSP(unsigned Reg, int64_t Offset) {
+ assert((Reg != ARM::SP && Reg != ARM::PC) &&
+ "the operand of .movsp cannot be either sp or pc");
+
+ OS << "\t.movsp\t";
+ InstPrinter.printRegName(OS, Reg);
+ if (Offset)
+ OS << ", #" << Offset;
+ OS << '\n';
+}
+void ARMTargetAsmStreamer::emitPad(int64_t Offset) {
+ OS << "\t.pad\t#" << Offset << '\n';
+}
+void ARMTargetAsmStreamer::emitRegSave(const SmallVectorImpl<unsigned> &RegList,
+ bool isVector) {
+ assert(RegList.size() && "RegList should not be empty");
+ if (isVector)
+ OS << "\t.vsave\t{";
+ else
+ OS << "\t.save\t{";
+
+ InstPrinter.printRegName(OS, RegList[0]);
+
+ for (unsigned i = 1, e = RegList.size(); i != e; ++i) {
+ OS << ", ";
+ InstPrinter.printRegName(OS, RegList[i]);
+ }
+
+ OS << "}\n";
+}
+void ARMTargetAsmStreamer::switchVendor(StringRef Vendor) {
+}
+void ARMTargetAsmStreamer::emitAttribute(unsigned Attribute, unsigned Value) {
+ OS << "\t.eabi_attribute\t" << Attribute << ", " << Twine(Value);
+ if (IsVerboseAsm) {
+ StringRef Name = ARMBuildAttrs::AttrTypeAsString(Attribute);
+ if (!Name.empty())
+ OS << "\t@ " << Name;
+ }
+ OS << "\n";
+}
+void ARMTargetAsmStreamer::emitTextAttribute(unsigned Attribute,
+ StringRef String) {
+ switch (Attribute) {
+ case ARMBuildAttrs::CPU_name:
+ OS << "\t.cpu\t" << String.lower();
+ break;
+ default:
+ OS << "\t.eabi_attribute\t" << Attribute << ", \"" << String << "\"";
+ if (IsVerboseAsm) {
+ StringRef Name = ARMBuildAttrs::AttrTypeAsString(Attribute);
+ if (!Name.empty())
+ OS << "\t@ " << Name;
+ }
+ break;
+ }
+ OS << "\n";
+}
+void ARMTargetAsmStreamer::emitIntTextAttribute(unsigned Attribute,
+ unsigned IntValue,
+ StringRef StringValue) {
+ switch (Attribute) {
+ default: llvm_unreachable("unsupported multi-value attribute in asm mode");
+ case ARMBuildAttrs::compatibility:
+ OS << "\t.eabi_attribute\t" << Attribute << ", " << IntValue;
+ if (!StringValue.empty())
+ OS << ", \"" << StringValue << "\"";
+ if (IsVerboseAsm)
+ OS << "\t@ " << ARMBuildAttrs::AttrTypeAsString(Attribute);
+ break;
+ }
+ OS << "\n";
+}
+void ARMTargetAsmStreamer::emitArch(unsigned Arch) {
+ OS << "\t.arch\t" << ARM::getArchName(Arch) << "\n";
+}
+void ARMTargetAsmStreamer::emitArchExtension(unsigned ArchExt) {
+ OS << "\t.arch_extension\t" << ARM::getArchExtName(ArchExt) << "\n";
+}
+void ARMTargetAsmStreamer::emitObjectArch(unsigned Arch) {
+ OS << "\t.object_arch\t" << ARM::getArchName(Arch) << '\n';
+}
+void ARMTargetAsmStreamer::emitFPU(unsigned FPU) {
+ OS << "\t.fpu\t" << ARM::getFPUName(FPU) << "\n";
+}
+void ARMTargetAsmStreamer::finishAttributeSection() {
+}
+void
+ARMTargetAsmStreamer::AnnotateTLSDescriptorSequence(const MCSymbolRefExpr *S) {
+ OS << "\t.tlsdescseq\t" << S->getSymbol().getName();
+}
+
+void ARMTargetAsmStreamer::emitThumbSet(MCSymbol *Symbol, const MCExpr *Value) {
+ const MCAsmInfo *MAI = Streamer.getContext().getAsmInfo();
+
+ OS << "\t.thumb_set\t";
+ Symbol->print(OS, MAI);
+ OS << ", ";
+ Value->print(OS, MAI);
+ OS << '\n';
+}
+
+void ARMTargetAsmStreamer::emitInst(uint32_t Inst, char Suffix) {
+ OS << "\t.inst";
+ if (Suffix)
+ OS << "." << Suffix;
+ OS << "\t0x" << Twine::utohexstr(Inst) << "\n";
+}
+
+void ARMTargetAsmStreamer::emitUnwindRaw(int64_t Offset,
+ const SmallVectorImpl<uint8_t> &Opcodes) {
+ OS << "\t.unwind_raw " << Offset;
+ for (SmallVectorImpl<uint8_t>::const_iterator OCI = Opcodes.begin(),
+ OCE = Opcodes.end();
+ OCI != OCE; ++OCI)
+ OS << ", 0x" << Twine::utohexstr(*OCI);
+ OS << '\n';
+}
+
+class ARMTargetELFStreamer : public ARMTargetStreamer {
+private:
+ // This structure holds all attributes, accounting for
+ // their string/numeric value, so we can later emit them
+ // in declaration order, keeping all in the same vector
+ struct AttributeItem {
+ enum {
+ HiddenAttribute = 0,
+ NumericAttribute,
+ TextAttribute,
+ NumericAndTextAttributes
+ } Type;
+ unsigned Tag;
+ unsigned IntValue;
+ std::string StringValue;
+
+ static bool LessTag(const AttributeItem &LHS, const AttributeItem &RHS) {
+ // The conformance tag must be emitted first when serialised
+ // into an object file. Specifically, the addenda to the ARM ABI
+ // states that (2.3.7.4):
+ //
+ // "To simplify recognition by consumers in the common case of
+ // claiming conformity for the whole file, this tag should be
+ // emitted first in a file-scope sub-subsection of the first
+ // public subsection of the attributes section."
+ //
+ // So it is special-cased in this comparison predicate when the
+ // attributes are sorted in finishAttributeSection().
+ return (RHS.Tag != ARMBuildAttrs::conformance) &&
+ ((LHS.Tag == ARMBuildAttrs::conformance) || (LHS.Tag < RHS.Tag));
+ }
+ };
+
+ StringRef CurrentVendor;
+ unsigned FPU;
+ unsigned Arch;
+ unsigned EmittedArch;
+ SmallVector<AttributeItem, 64> Contents;
+
+ MCSection *AttributeSection;
+
+ AttributeItem *getAttributeItem(unsigned Attribute) {
+ for (size_t i = 0; i < Contents.size(); ++i)
+ if (Contents[i].Tag == Attribute)
+ return &Contents[i];
+ return nullptr;
+ }
+
+ void setAttributeItem(unsigned Attribute, unsigned Value,
+ bool OverwriteExisting) {
+ // Look for existing attribute item
+ if (AttributeItem *Item = getAttributeItem(Attribute)) {
+ if (!OverwriteExisting)
+ return;
+ Item->Type = AttributeItem::NumericAttribute;
+ Item->IntValue = Value;
+ return;
+ }
+
+ // Create new attribute item
+ AttributeItem Item = {
+ AttributeItem::NumericAttribute,
+ Attribute,
+ Value,
+ StringRef("")
+ };
+ Contents.push_back(Item);
+ }
+
+ void setAttributeItem(unsigned Attribute, StringRef Value,
+ bool OverwriteExisting) {
+ // Look for existing attribute item
+ if (AttributeItem *Item = getAttributeItem(Attribute)) {
+ if (!OverwriteExisting)
+ return;
+ Item->Type = AttributeItem::TextAttribute;
+ Item->StringValue = Value;
+ return;
+ }
+
+ // Create new attribute item
+ AttributeItem Item = {
+ AttributeItem::TextAttribute,
+ Attribute,
+ 0,
+ Value
+ };
+ Contents.push_back(Item);
+ }
+
+ void setAttributeItems(unsigned Attribute, unsigned IntValue,
+ StringRef StringValue, bool OverwriteExisting) {
+ // Look for existing attribute item
+ if (AttributeItem *Item = getAttributeItem(Attribute)) {
+ if (!OverwriteExisting)
+ return;
+ Item->Type = AttributeItem::NumericAndTextAttributes;
+ Item->IntValue = IntValue;
+ Item->StringValue = StringValue;
+ return;
+ }
+
+ // Create new attribute item
+ AttributeItem Item = {
+ AttributeItem::NumericAndTextAttributes,
+ Attribute,
+ IntValue,
+ StringValue
+ };
+ Contents.push_back(Item);
+ }
+
+ void emitArchDefaultAttributes();
+ void emitFPUDefaultAttributes();
+
+ ARMELFStreamer &getStreamer();
+
+ void emitFnStart() override;
+ void emitFnEnd() override;
+ void emitCantUnwind() override;
+ void emitPersonality(const MCSymbol *Personality) override;
+ void emitPersonalityIndex(unsigned Index) override;
+ void emitHandlerData() override;
+ void emitSetFP(unsigned FpReg, unsigned SpReg, int64_t Offset = 0) override;
+ void emitMovSP(unsigned Reg, int64_t Offset = 0) override;
+ void emitPad(int64_t Offset) override;
+ void emitRegSave(const SmallVectorImpl<unsigned> &RegList,
+ bool isVector) override;
+ void emitUnwindRaw(int64_t Offset,
+ const SmallVectorImpl<uint8_t> &Opcodes) override;
+
+ void switchVendor(StringRef Vendor) override;
+ void emitAttribute(unsigned Attribute, unsigned Value) override;
+ void emitTextAttribute(unsigned Attribute, StringRef String) override;
+ void emitIntTextAttribute(unsigned Attribute, unsigned IntValue,
+ StringRef StringValue) override;
+ void emitArch(unsigned Arch) override;
+ void emitObjectArch(unsigned Arch) override;
+ void emitFPU(unsigned FPU) override;
+ void emitInst(uint32_t Inst, char Suffix = '\0') override;
+ void finishAttributeSection() override;
+ void emitLabel(MCSymbol *Symbol) override;
+
+ void AnnotateTLSDescriptorSequence(const MCSymbolRefExpr *SRE) override;
+ void emitThumbSet(MCSymbol *Symbol, const MCExpr *Value) override;
+
+ size_t calculateContentSize() const;
+
+public:
+ ARMTargetELFStreamer(MCStreamer &S)
+ : ARMTargetStreamer(S), CurrentVendor("aeabi"), FPU(ARM::FK_INVALID),
+ Arch(ARM::AK_INVALID), EmittedArch(ARM::AK_INVALID),
+ AttributeSection(nullptr) {}
+};
+
+/// Extend the generic ELFStreamer class so that it can emit mapping symbols at
+/// the appropriate points in the object files. These symbols are defined in the
+/// ARM ELF ABI: infocenter.arm.com/help/topic/com.arm.../IHI0044D_aaelf.pdf.
+///
+/// In brief: $a, $t or $d should be emitted at the start of each contiguous
+/// region of ARM code, Thumb code or data in a section. In practice, this
+/// emission does not rely on explicit assembler directives but on inherent
+/// properties of the directives doing the emission (e.g. ".byte" is data, "add
+/// r0, r0, r0" an instruction).
+///
+/// As a result this system is orthogonal to the DataRegion infrastructure used
+/// by MachO. Beware!
+class ARMELFStreamer : public MCELFStreamer {
+public:
+ friend class ARMTargetELFStreamer;
+
+ ARMELFStreamer(MCContext &Context, MCAsmBackend &TAB, raw_pwrite_stream &OS,
+ MCCodeEmitter *Emitter, bool IsThumb)
+ : MCELFStreamer(Context, TAB, OS, Emitter), IsThumb(IsThumb),
+ MappingSymbolCounter(0), LastEMS(EMS_None) {
+ Reset();
+ }
+
+ ~ARMELFStreamer() {}
+
+ void FinishImpl() override;
+
+ // ARM exception handling directives
+ void emitFnStart();
+ void emitFnEnd();
+ void emitCantUnwind();
+ void emitPersonality(const MCSymbol *Per);
+ void emitPersonalityIndex(unsigned index);
+ void emitHandlerData();
+ void emitSetFP(unsigned NewFpReg, unsigned NewSpReg, int64_t Offset = 0);
+ void emitMovSP(unsigned Reg, int64_t Offset = 0);
+ void emitPad(int64_t Offset);
+ void emitRegSave(const SmallVectorImpl<unsigned> &RegList, bool isVector);
+ void emitUnwindRaw(int64_t Offset, const SmallVectorImpl<uint8_t> &Opcodes);
+
+ void ChangeSection(MCSection *Section, const MCExpr *Subsection) override {
+ // We have to keep track of the mapping symbol state of any sections we
+ // use. Each one should start off as EMS_None, which is provided as the
+ // default constructor by DenseMap::lookup.
+ LastMappingSymbols[getPreviousSection().first] = LastEMS;
+ LastEMS = LastMappingSymbols.lookup(Section);
+
+ MCELFStreamer::ChangeSection(Section, Subsection);
+ }
+
+ /// This function is the one used to emit instruction data into the ELF
+ /// streamer. We override it to add the appropriate mapping symbol if
+ /// necessary.
+ void EmitInstruction(const MCInst& Inst,
+ const MCSubtargetInfo &STI) override {
+ if (IsThumb)
+ EmitThumbMappingSymbol();
+ else
+ EmitARMMappingSymbol();
+
+ MCELFStreamer::EmitInstruction(Inst, STI);
+ }
+
+ void emitInst(uint32_t Inst, char Suffix) {
+ unsigned Size;
+ char Buffer[4];
+ const bool LittleEndian = getContext().getAsmInfo()->isLittleEndian();
+
+ switch (Suffix) {
+ case '\0':
+ Size = 4;
+
+ assert(!IsThumb);
+ EmitARMMappingSymbol();
+ for (unsigned II = 0, IE = Size; II != IE; II++) {
+ const unsigned I = LittleEndian ? (Size - II - 1) : II;
+ Buffer[Size - II - 1] = uint8_t(Inst >> I * CHAR_BIT);
+ }
+
+ break;
+ case 'n':
+ case 'w':
+ Size = (Suffix == 'n' ? 2 : 4);
+
+ assert(IsThumb);
+ EmitThumbMappingSymbol();
+ for (unsigned II = 0, IE = Size; II != IE; II = II + 2) {
+ const unsigned I0 = LittleEndian ? II + 0 : (Size - II - 1);
+ const unsigned I1 = LittleEndian ? II + 1 : (Size - II - 2);
+ Buffer[Size - II - 2] = uint8_t(Inst >> I0 * CHAR_BIT);
+ Buffer[Size - II - 1] = uint8_t(Inst >> I1 * CHAR_BIT);
+ }
+
+ break;
+ default:
+ llvm_unreachable("Invalid Suffix");
+ }
+
+ MCELFStreamer::EmitBytes(StringRef(Buffer, Size));
+ }
+
+ /// This is one of the functions used to emit data into an ELF section, so the
+ /// ARM streamer overrides it to add the appropriate mapping symbol ($d) if
+ /// necessary.
+ void EmitBytes(StringRef Data) override {
+ EmitDataMappingSymbol();
+ MCELFStreamer::EmitBytes(Data);
+ }
+
+ /// This is one of the functions used to emit data into an ELF section, so the
+ /// ARM streamer overrides it to add the appropriate mapping symbol ($d) if
+ /// necessary.
+ void EmitValueImpl(const MCExpr *Value, unsigned Size, SMLoc Loc) override {
+ if (const MCSymbolRefExpr *SRE = dyn_cast_or_null<MCSymbolRefExpr>(Value))
+ if (SRE->getKind() == MCSymbolRefExpr::VK_ARM_SBREL && !(Size == 4)) {
+ getContext().reportError(Loc, "relocated expression must be 32-bit");
+ return;
+ }
+
+ EmitDataMappingSymbol();
+ MCELFStreamer::EmitValueImpl(Value, Size, Loc);
+ }
+
+ void EmitAssemblerFlag(MCAssemblerFlag Flag) override {
+ MCELFStreamer::EmitAssemblerFlag(Flag);
+
+ switch (Flag) {
+ case MCAF_SyntaxUnified:
+ return; // no-op here.
+ case MCAF_Code16:
+ IsThumb = true;
+ return; // Change to Thumb mode
+ case MCAF_Code32:
+ IsThumb = false;
+ return; // Change to ARM mode
+ case MCAF_Code64:
+ return;
+ case MCAF_SubsectionsViaSymbols:
+ return;
+ }
+ }
+
+private:
+ enum ElfMappingSymbol {
+ EMS_None,
+ EMS_ARM,
+ EMS_Thumb,
+ EMS_Data
+ };
+
+ void EmitDataMappingSymbol() {
+ if (LastEMS == EMS_Data) return;
+ EmitMappingSymbol("$d");
+ LastEMS = EMS_Data;
+ }
+
+ void EmitThumbMappingSymbol() {
+ if (LastEMS == EMS_Thumb) return;
+ EmitMappingSymbol("$t");
+ LastEMS = EMS_Thumb;
+ }
+
+ void EmitARMMappingSymbol() {
+ if (LastEMS == EMS_ARM) return;
+ EmitMappingSymbol("$a");
+ LastEMS = EMS_ARM;
+ }
+
+ void EmitMappingSymbol(StringRef Name) {
+ auto *Symbol = cast<MCSymbolELF>(getContext().getOrCreateSymbol(
+ Name + "." + Twine(MappingSymbolCounter++)));
+ EmitLabel(Symbol);
+
+ Symbol->setType(ELF::STT_NOTYPE);
+ Symbol->setBinding(ELF::STB_LOCAL);
+ Symbol->setExternal(false);
+ }
+
+ void EmitThumbFunc(MCSymbol *Func) override {
+ getAssembler().setIsThumbFunc(Func);
+ EmitSymbolAttribute(Func, MCSA_ELF_TypeFunction);
+ }
+
+ // Helper functions for ARM exception handling directives
+ void Reset();
+
+ void EmitPersonalityFixup(StringRef Name);
+ void FlushPendingOffset();
+ void FlushUnwindOpcodes(bool NoHandlerData);
+
+ void SwitchToEHSection(const char *Prefix, unsigned Type, unsigned Flags,
+ SectionKind Kind, const MCSymbol &Fn);
+ void SwitchToExTabSection(const MCSymbol &FnStart);
+ void SwitchToExIdxSection(const MCSymbol &FnStart);
+
+ void EmitFixup(const MCExpr *Expr, MCFixupKind Kind);
+
+ bool IsThumb;
+ int64_t MappingSymbolCounter;
+
+ DenseMap<const MCSection *, ElfMappingSymbol> LastMappingSymbols;
+ ElfMappingSymbol LastEMS;
+
+ // ARM Exception Handling Frame Information
+ MCSymbol *ExTab;
+ MCSymbol *FnStart;
+ const MCSymbol *Personality;
+ unsigned PersonalityIndex;
+ unsigned FPReg; // Frame pointer register
+ int64_t FPOffset; // Offset: (final frame pointer) - (initial $sp)
+ int64_t SPOffset; // Offset: (final $sp) - (initial $sp)
+ int64_t PendingOffset; // Offset: (final $sp) - (emitted $sp)
+ bool UsedFP;
+ bool CantUnwind;
+ SmallVector<uint8_t, 64> Opcodes;
+ UnwindOpcodeAssembler UnwindOpAsm;
+};
+} // end anonymous namespace
+
+ARMELFStreamer &ARMTargetELFStreamer::getStreamer() {
+ return static_cast<ARMELFStreamer &>(Streamer);
+}
+
+void ARMTargetELFStreamer::emitFnStart() { getStreamer().emitFnStart(); }
+void ARMTargetELFStreamer::emitFnEnd() { getStreamer().emitFnEnd(); }
+void ARMTargetELFStreamer::emitCantUnwind() { getStreamer().emitCantUnwind(); }
+void ARMTargetELFStreamer::emitPersonality(const MCSymbol *Personality) {
+ getStreamer().emitPersonality(Personality);
+}
+void ARMTargetELFStreamer::emitPersonalityIndex(unsigned Index) {
+ getStreamer().emitPersonalityIndex(Index);
+}
+void ARMTargetELFStreamer::emitHandlerData() {
+ getStreamer().emitHandlerData();
+}
+void ARMTargetELFStreamer::emitSetFP(unsigned FpReg, unsigned SpReg,
+ int64_t Offset) {
+ getStreamer().emitSetFP(FpReg, SpReg, Offset);
+}
+void ARMTargetELFStreamer::emitMovSP(unsigned Reg, int64_t Offset) {
+ getStreamer().emitMovSP(Reg, Offset);
+}
+void ARMTargetELFStreamer::emitPad(int64_t Offset) {
+ getStreamer().emitPad(Offset);
+}
+void ARMTargetELFStreamer::emitRegSave(const SmallVectorImpl<unsigned> &RegList,
+ bool isVector) {
+ getStreamer().emitRegSave(RegList, isVector);
+}
+void ARMTargetELFStreamer::emitUnwindRaw(int64_t Offset,
+ const SmallVectorImpl<uint8_t> &Opcodes) {
+ getStreamer().emitUnwindRaw(Offset, Opcodes);
+}
+void ARMTargetELFStreamer::switchVendor(StringRef Vendor) {
+ assert(!Vendor.empty() && "Vendor cannot be empty.");
+
+ if (CurrentVendor == Vendor)
+ return;
+
+ if (!CurrentVendor.empty())
+ finishAttributeSection();
+
+ assert(Contents.empty() &&
+ ".ARM.attributes should be flushed before changing vendor");
+ CurrentVendor = Vendor;
+
+}
+void ARMTargetELFStreamer::emitAttribute(unsigned Attribute, unsigned Value) {
+ setAttributeItem(Attribute, Value, /* OverwriteExisting= */ true);
+}
+void ARMTargetELFStreamer::emitTextAttribute(unsigned Attribute,
+ StringRef Value) {
+ setAttributeItem(Attribute, Value, /* OverwriteExisting= */ true);
+}
+void ARMTargetELFStreamer::emitIntTextAttribute(unsigned Attribute,
+ unsigned IntValue,
+ StringRef StringValue) {
+ setAttributeItems(Attribute, IntValue, StringValue,
+ /* OverwriteExisting= */ true);
+}
+void ARMTargetELFStreamer::emitArch(unsigned Value) {
+ Arch = Value;
+}
+void ARMTargetELFStreamer::emitObjectArch(unsigned Value) {
+ EmittedArch = Value;
+}
+void ARMTargetELFStreamer::emitArchDefaultAttributes() {
+ using namespace ARMBuildAttrs;
+
+ setAttributeItem(CPU_name,
+ ARM::getCPUAttr(Arch),
+ false);
+
+ if (EmittedArch == ARM::AK_INVALID)
+ setAttributeItem(CPU_arch,
+ ARM::getArchAttr(Arch),
+ false);
+ else
+ setAttributeItem(CPU_arch,
+ ARM::getArchAttr(EmittedArch),
+ false);
+
+ switch (Arch) {
+ case ARM::AK_ARMV2:
+ case ARM::AK_ARMV2A:
+ case ARM::AK_ARMV3:
+ case ARM::AK_ARMV3M:
+ case ARM::AK_ARMV4:
+ setAttributeItem(ARM_ISA_use, Allowed, false);
+ break;
+
+ case ARM::AK_ARMV4T:
+ case ARM::AK_ARMV5T:
+ case ARM::AK_ARMV5TE:
+ case ARM::AK_ARMV6:
+ setAttributeItem(ARM_ISA_use, Allowed, false);
+ setAttributeItem(THUMB_ISA_use, Allowed, false);
+ break;
+
+ case ARM::AK_ARMV6T2:
+ setAttributeItem(ARM_ISA_use, Allowed, false);
+ setAttributeItem(THUMB_ISA_use, AllowThumb32, false);
+ break;
+
+ case ARM::AK_ARMV6K:
+ case ARM::AK_ARMV6KZ:
+ setAttributeItem(ARM_ISA_use, Allowed, false);
+ setAttributeItem(THUMB_ISA_use, Allowed, false);
+ setAttributeItem(Virtualization_use, AllowTZ, false);
+ break;
+
+ case ARM::AK_ARMV6M:
+ setAttributeItem(THUMB_ISA_use, Allowed, false);
+ break;
+
+ case ARM::AK_ARMV7A:
+ setAttributeItem(CPU_arch_profile, ApplicationProfile, false);
+ setAttributeItem(ARM_ISA_use, Allowed, false);
+ setAttributeItem(THUMB_ISA_use, AllowThumb32, false);
+ break;
+
+ case ARM::AK_ARMV7R:
+ setAttributeItem(CPU_arch_profile, RealTimeProfile, false);
+ setAttributeItem(ARM_ISA_use, Allowed, false);
+ setAttributeItem(THUMB_ISA_use, AllowThumb32, false);
+ break;
+
+ case ARM::AK_ARMV7M:
+ setAttributeItem(CPU_arch_profile, MicroControllerProfile, false);
+ setAttributeItem(THUMB_ISA_use, AllowThumb32, false);
+ break;
+
+ case ARM::AK_ARMV8A:
+ case ARM::AK_ARMV8_1A:
+ case ARM::AK_ARMV8_2A:
+ setAttributeItem(CPU_arch_profile, ApplicationProfile, false);
+ setAttributeItem(ARM_ISA_use, Allowed, false);
+ setAttributeItem(THUMB_ISA_use, AllowThumb32, false);
+ setAttributeItem(MPextension_use, Allowed, false);
+ setAttributeItem(Virtualization_use, AllowTZVirtualization, false);
+ break;
+
+ case ARM::AK_IWMMXT:
+ setAttributeItem(ARM_ISA_use, Allowed, false);
+ setAttributeItem(THUMB_ISA_use, Allowed, false);
+ setAttributeItem(WMMX_arch, AllowWMMXv1, false);
+ break;
+
+ case ARM::AK_IWMMXT2:
+ setAttributeItem(ARM_ISA_use, Allowed, false);
+ setAttributeItem(THUMB_ISA_use, Allowed, false);
+ setAttributeItem(WMMX_arch, AllowWMMXv2, false);
+ break;
+
+ default:
+ report_fatal_error("Unknown Arch: " + Twine(Arch));
+ break;
+ }
+}
+void ARMTargetELFStreamer::emitFPU(unsigned Value) {
+ FPU = Value;
+}
+void ARMTargetELFStreamer::emitFPUDefaultAttributes() {
+ switch (FPU) {
+ case ARM::FK_VFP:
+ case ARM::FK_VFPV2:
+ setAttributeItem(ARMBuildAttrs::FP_arch,
+ ARMBuildAttrs::AllowFPv2,
+ /* OverwriteExisting= */ false);
+ break;
+
+ case ARM::FK_VFPV3:
+ setAttributeItem(ARMBuildAttrs::FP_arch,
+ ARMBuildAttrs::AllowFPv3A,
+ /* OverwriteExisting= */ false);
+ break;
+
+ case ARM::FK_VFPV3_FP16:
+ setAttributeItem(ARMBuildAttrs::FP_arch,
+ ARMBuildAttrs::AllowFPv3A,
+ /* OverwriteExisting= */ false);
+ setAttributeItem(ARMBuildAttrs::FP_HP_extension,
+ ARMBuildAttrs::AllowHPFP,
+ /* OverwriteExisting= */ false);
+ break;
+
+ case ARM::FK_VFPV3_D16:
+ setAttributeItem(ARMBuildAttrs::FP_arch,
+ ARMBuildAttrs::AllowFPv3B,
+ /* OverwriteExisting= */ false);
+ break;
+
+ case ARM::FK_VFPV3_D16_FP16:
+ setAttributeItem(ARMBuildAttrs::FP_arch,
+ ARMBuildAttrs::AllowFPv3B,
+ /* OverwriteExisting= */ false);
+ setAttributeItem(ARMBuildAttrs::FP_HP_extension,
+ ARMBuildAttrs::AllowHPFP,
+ /* OverwriteExisting= */ false);
+ break;
+
+ case ARM::FK_VFPV3XD:
+ setAttributeItem(ARMBuildAttrs::FP_arch,
+ ARMBuildAttrs::AllowFPv3B,
+ /* OverwriteExisting= */ false);
+ break;
+ case ARM::FK_VFPV3XD_FP16:
+ setAttributeItem(ARMBuildAttrs::FP_arch,
+ ARMBuildAttrs::AllowFPv3B,
+ /* OverwriteExisting= */ false);
+ setAttributeItem(ARMBuildAttrs::FP_HP_extension,
+ ARMBuildAttrs::AllowHPFP,
+ /* OverwriteExisting= */ false);
+ break;
+
+ case ARM::FK_VFPV4:
+ setAttributeItem(ARMBuildAttrs::FP_arch,
+ ARMBuildAttrs::AllowFPv4A,
+ /* OverwriteExisting= */ false);
+ break;
+
+ // ABI_HardFP_use is handled in ARMAsmPrinter, so _SP_D16 is treated the same
+ // as _D16 here.
+ case ARM::FK_FPV4_SP_D16:
+ case ARM::FK_VFPV4_D16:
+ setAttributeItem(ARMBuildAttrs::FP_arch,
+ ARMBuildAttrs::AllowFPv4B,
+ /* OverwriteExisting= */ false);
+ break;
+
+ case ARM::FK_FP_ARMV8:
+ setAttributeItem(ARMBuildAttrs::FP_arch,
+ ARMBuildAttrs::AllowFPARMv8A,
+ /* OverwriteExisting= */ false);
+ break;
+
+ // FPV5_D16 is identical to FP_ARMV8 except for the number of D registers, so
+ // uses the FP_ARMV8_D16 build attribute.
+ case ARM::FK_FPV5_SP_D16:
+ case ARM::FK_FPV5_D16:
+ setAttributeItem(ARMBuildAttrs::FP_arch,
+ ARMBuildAttrs::AllowFPARMv8B,
+ /* OverwriteExisting= */ false);
+ break;
+
+ case ARM::FK_NEON:
+ setAttributeItem(ARMBuildAttrs::FP_arch,
+ ARMBuildAttrs::AllowFPv3A,
+ /* OverwriteExisting= */ false);
+ setAttributeItem(ARMBuildAttrs::Advanced_SIMD_arch,
+ ARMBuildAttrs::AllowNeon,
+ /* OverwriteExisting= */ false);
+ break;
+
+ case ARM::FK_NEON_FP16:
+ setAttributeItem(ARMBuildAttrs::FP_arch,
+ ARMBuildAttrs::AllowFPv3A,
+ /* OverwriteExisting= */ false);
+ setAttributeItem(ARMBuildAttrs::Advanced_SIMD_arch,
+ ARMBuildAttrs::AllowNeon,
+ /* OverwriteExisting= */ false);
+ setAttributeItem(ARMBuildAttrs::FP_HP_extension,
+ ARMBuildAttrs::AllowHPFP,
+ /* OverwriteExisting= */ false);
+ break;
+
+ case ARM::FK_NEON_VFPV4:
+ setAttributeItem(ARMBuildAttrs::FP_arch,
+ ARMBuildAttrs::AllowFPv4A,
+ /* OverwriteExisting= */ false);
+ setAttributeItem(ARMBuildAttrs::Advanced_SIMD_arch,
+ ARMBuildAttrs::AllowNeon2,
+ /* OverwriteExisting= */ false);
+ break;
+
+ case ARM::FK_NEON_FP_ARMV8:
+ case ARM::FK_CRYPTO_NEON_FP_ARMV8:
+ setAttributeItem(ARMBuildAttrs::FP_arch,
+ ARMBuildAttrs::AllowFPARMv8A,
+ /* OverwriteExisting= */ false);
+ // 'Advanced_SIMD_arch' must be emitted not here, but within
+ // ARMAsmPrinter::emitAttributes(), depending on hasV8Ops() and hasV8_1a()
+ break;
+
+ case ARM::FK_SOFTVFP:
+ case ARM::FK_NONE:
+ break;
+
+ default:
+ report_fatal_error("Unknown FPU: " + Twine(FPU));
+ break;
+ }
+}
+size_t ARMTargetELFStreamer::calculateContentSize() const {
+ size_t Result = 0;
+ for (size_t i = 0; i < Contents.size(); ++i) {
+ AttributeItem item = Contents[i];
+ switch (item.Type) {
+ case AttributeItem::HiddenAttribute:
+ break;
+ case AttributeItem::NumericAttribute:
+ Result += getULEB128Size(item.Tag);
+ Result += getULEB128Size(item.IntValue);
+ break;
+ case AttributeItem::TextAttribute:
+ Result += getULEB128Size(item.Tag);
+ Result += item.StringValue.size() + 1; // string + '\0'
+ break;
+ case AttributeItem::NumericAndTextAttributes:
+ Result += getULEB128Size(item.Tag);
+ Result += getULEB128Size(item.IntValue);
+ Result += item.StringValue.size() + 1; // string + '\0';
+ break;
+ }
+ }
+ return Result;
+}
+void ARMTargetELFStreamer::finishAttributeSection() {
+ // <format-version>
+ // [ <section-length> "vendor-name"
+ // [ <file-tag> <size> <attribute>*
+ // | <section-tag> <size> <section-number>* 0 <attribute>*
+ // | <symbol-tag> <size> <symbol-number>* 0 <attribute>*
+ // ]+
+ // ]*
+
+ if (FPU != ARM::FK_INVALID)
+ emitFPUDefaultAttributes();
+
+ if (Arch != ARM::AK_INVALID)
+ emitArchDefaultAttributes();
+
+ if (Contents.empty())
+ return;
+
+ std::sort(Contents.begin(), Contents.end(), AttributeItem::LessTag);
+
+ ARMELFStreamer &Streamer = getStreamer();
+
+ // Switch to .ARM.attributes section
+ if (AttributeSection) {
+ Streamer.SwitchSection(AttributeSection);
+ } else {
+ AttributeSection = Streamer.getContext().getELFSection(
+ ".ARM.attributes", ELF::SHT_ARM_ATTRIBUTES, 0);
+ Streamer.SwitchSection(AttributeSection);
+
+ // Format version
+ Streamer.EmitIntValue(0x41, 1);
+ }
+
+ // Vendor size + Vendor name + '\0'
+ const size_t VendorHeaderSize = 4 + CurrentVendor.size() + 1;
+
+ // Tag + Tag Size
+ const size_t TagHeaderSize = 1 + 4;
+
+ const size_t ContentsSize = calculateContentSize();
+
+ Streamer.EmitIntValue(VendorHeaderSize + TagHeaderSize + ContentsSize, 4);
+ Streamer.EmitBytes(CurrentVendor);
+ Streamer.EmitIntValue(0, 1); // '\0'
+
+ Streamer.EmitIntValue(ARMBuildAttrs::File, 1);
+ Streamer.EmitIntValue(TagHeaderSize + ContentsSize, 4);
+
+ // Size should have been accounted for already, now
+ // emit each field as its type (ULEB or String)
+ for (size_t i = 0; i < Contents.size(); ++i) {
+ AttributeItem item = Contents[i];
+ Streamer.EmitULEB128IntValue(item.Tag);
+ switch (item.Type) {
+ default: llvm_unreachable("Invalid attribute type");
+ case AttributeItem::NumericAttribute:
+ Streamer.EmitULEB128IntValue(item.IntValue);
+ break;
+ case AttributeItem::TextAttribute:
+ Streamer.EmitBytes(item.StringValue);
+ Streamer.EmitIntValue(0, 1); // '\0'
+ break;
+ case AttributeItem::NumericAndTextAttributes:
+ Streamer.EmitULEB128IntValue(item.IntValue);
+ Streamer.EmitBytes(item.StringValue);
+ Streamer.EmitIntValue(0, 1); // '\0'
+ break;
+ }
+ }
+
+ Contents.clear();
+ FPU = ARM::FK_INVALID;
+}
+
+void ARMTargetELFStreamer::emitLabel(MCSymbol *Symbol) {
+ ARMELFStreamer &Streamer = getStreamer();
+ if (!Streamer.IsThumb)
+ return;
+
+ Streamer.getAssembler().registerSymbol(*Symbol);
+ unsigned Type = cast<MCSymbolELF>(Symbol)->getType();
+ if (Type == ELF::STT_FUNC || Type == ELF::STT_GNU_IFUNC)
+ Streamer.EmitThumbFunc(Symbol);
+}
+
+void
+ARMTargetELFStreamer::AnnotateTLSDescriptorSequence(const MCSymbolRefExpr *S) {
+ getStreamer().EmitFixup(S, FK_Data_4);
+}
+
+void ARMTargetELFStreamer::emitThumbSet(MCSymbol *Symbol, const MCExpr *Value) {
+ if (const MCSymbolRefExpr *SRE = dyn_cast<MCSymbolRefExpr>(Value)) {
+ const MCSymbol &Sym = SRE->getSymbol();
+ if (!Sym.isDefined()) {
+ getStreamer().EmitAssignment(Symbol, Value);
+ return;
+ }
+ }
+
+ getStreamer().EmitThumbFunc(Symbol);
+ getStreamer().EmitAssignment(Symbol, Value);
+}
+
+void ARMTargetELFStreamer::emitInst(uint32_t Inst, char Suffix) {
+ getStreamer().emitInst(Inst, Suffix);
+}
+
+void ARMELFStreamer::FinishImpl() {
+ MCTargetStreamer &TS = *getTargetStreamer();
+ ARMTargetStreamer &ATS = static_cast<ARMTargetStreamer &>(TS);
+ ATS.finishAttributeSection();
+
+ MCELFStreamer::FinishImpl();
+}
+
+inline void ARMELFStreamer::SwitchToEHSection(const char *Prefix,
+ unsigned Type,
+ unsigned Flags,
+ SectionKind Kind,
+ const MCSymbol &Fn) {
+ const MCSectionELF &FnSection =
+ static_cast<const MCSectionELF &>(Fn.getSection());
+
+ // Create the name for new section
+ StringRef FnSecName(FnSection.getSectionName());
+ SmallString<128> EHSecName(Prefix);
+ if (FnSecName != ".text") {
+ EHSecName += FnSecName;
+ }
+
+ // Get .ARM.extab or .ARM.exidx section
+ const MCSymbolELF *Group = FnSection.getGroup();
+ if (Group)
+ Flags |= ELF::SHF_GROUP;
+ MCSectionELF *EHSection =
+ getContext().getELFSection(EHSecName, Type, Flags, 0, Group,
+ FnSection.getUniqueID(), nullptr, &FnSection);
+
+ assert(EHSection && "Failed to get the required EH section");
+
+ // Switch to .ARM.extab or .ARM.exidx section
+ SwitchSection(EHSection);
+ EmitCodeAlignment(4);
+}
+
+inline void ARMELFStreamer::SwitchToExTabSection(const MCSymbol &FnStart) {
+ SwitchToEHSection(".ARM.extab", ELF::SHT_PROGBITS, ELF::SHF_ALLOC,
+ SectionKind::getData(), FnStart);
+}
+
+inline void ARMELFStreamer::SwitchToExIdxSection(const MCSymbol &FnStart) {
+ SwitchToEHSection(".ARM.exidx", ELF::SHT_ARM_EXIDX,
+ ELF::SHF_ALLOC | ELF::SHF_LINK_ORDER,
+ SectionKind::getData(), FnStart);
+}
+void ARMELFStreamer::EmitFixup(const MCExpr *Expr, MCFixupKind Kind) {
+ MCDataFragment *Frag = getOrCreateDataFragment();
+ Frag->getFixups().push_back(MCFixup::create(Frag->getContents().size(), Expr,
+ Kind));
+}
+
+void ARMELFStreamer::Reset() {
+ ExTab = nullptr;
+ FnStart = nullptr;
+ Personality = nullptr;
+ PersonalityIndex = ARM::EHABI::NUM_PERSONALITY_INDEX;
+ FPReg = ARM::SP;
+ FPOffset = 0;
+ SPOffset = 0;
+ PendingOffset = 0;
+ UsedFP = false;
+ CantUnwind = false;
+
+ Opcodes.clear();
+ UnwindOpAsm.Reset();
+}
+
+void ARMELFStreamer::emitFnStart() {
+ assert(FnStart == nullptr);
+ FnStart = getContext().createTempSymbol();
+ EmitLabel(FnStart);
+}
+
+void ARMELFStreamer::emitFnEnd() {
+ assert(FnStart && ".fnstart must precedes .fnend");
+
+ // Emit unwind opcodes if there is no .handlerdata directive
+ if (!ExTab && !CantUnwind)
+ FlushUnwindOpcodes(true);
+
+ // Emit the exception index table entry
+ SwitchToExIdxSection(*FnStart);
+
+ if (PersonalityIndex < ARM::EHABI::NUM_PERSONALITY_INDEX)
+ EmitPersonalityFixup(GetAEABIUnwindPersonalityName(PersonalityIndex));
+
+ const MCSymbolRefExpr *FnStartRef =
+ MCSymbolRefExpr::create(FnStart,
+ MCSymbolRefExpr::VK_ARM_PREL31,
+ getContext());
+
+ EmitValue(FnStartRef, 4);
+
+ if (CantUnwind) {
+ EmitIntValue(ARM::EHABI::EXIDX_CANTUNWIND, 4);
+ } else if (ExTab) {
+ // Emit a reference to the unwind opcodes in the ".ARM.extab" section.
+ const MCSymbolRefExpr *ExTabEntryRef =
+ MCSymbolRefExpr::create(ExTab,
+ MCSymbolRefExpr::VK_ARM_PREL31,
+ getContext());
+ EmitValue(ExTabEntryRef, 4);
+ } else {
+ // For the __aeabi_unwind_cpp_pr0, we have to emit the unwind opcodes in
+ // the second word of exception index table entry. The size of the unwind
+ // opcodes should always be 4 bytes.
+ assert(PersonalityIndex == ARM::EHABI::AEABI_UNWIND_CPP_PR0 &&
+ "Compact model must use __aeabi_unwind_cpp_pr0 as personality");
+ assert(Opcodes.size() == 4u &&
+ "Unwind opcode size for __aeabi_unwind_cpp_pr0 must be equal to 4");
+ uint64_t Intval = Opcodes[0] |
+ Opcodes[1] << 8 |
+ Opcodes[2] << 16 |
+ Opcodes[3] << 24;
+ EmitIntValue(Intval, Opcodes.size());
+ }
+
+ // Switch to the section containing FnStart
+ SwitchSection(&FnStart->getSection());
+
+ // Clean exception handling frame information
+ Reset();
+}
+
+void ARMELFStreamer::emitCantUnwind() { CantUnwind = true; }
+
+// Add the R_ARM_NONE fixup at the same position
+void ARMELFStreamer::EmitPersonalityFixup(StringRef Name) {
+ const MCSymbol *PersonalitySym = getContext().getOrCreateSymbol(Name);
+
+ const MCSymbolRefExpr *PersonalityRef = MCSymbolRefExpr::create(
+ PersonalitySym, MCSymbolRefExpr::VK_ARM_NONE, getContext());
+
+ visitUsedExpr(*PersonalityRef);
+ MCDataFragment *DF = getOrCreateDataFragment();
+ DF->getFixups().push_back(MCFixup::create(DF->getContents().size(),
+ PersonalityRef,
+ MCFixup::getKindForSize(4, false)));
+}
+
+void ARMELFStreamer::FlushPendingOffset() {
+ if (PendingOffset != 0) {
+ UnwindOpAsm.EmitSPOffset(-PendingOffset);
+ PendingOffset = 0;
+ }
+}
+
+void ARMELFStreamer::FlushUnwindOpcodes(bool NoHandlerData) {
+ // Emit the unwind opcode to restore $sp.
+ if (UsedFP) {
+ const MCRegisterInfo *MRI = getContext().getRegisterInfo();
+ int64_t LastRegSaveSPOffset = SPOffset - PendingOffset;
+ UnwindOpAsm.EmitSPOffset(LastRegSaveSPOffset - FPOffset);
+ UnwindOpAsm.EmitSetSP(MRI->getEncodingValue(FPReg));
+ } else {
+ FlushPendingOffset();
+ }
+
+ // Finalize the unwind opcode sequence
+ UnwindOpAsm.Finalize(PersonalityIndex, Opcodes);
+
+ // For compact model 0, we have to emit the unwind opcodes in the .ARM.exidx
+ // section. Thus, we don't have to create an entry in the .ARM.extab
+ // section.
+ if (NoHandlerData && PersonalityIndex == ARM::EHABI::AEABI_UNWIND_CPP_PR0)
+ return;
+
+ // Switch to .ARM.extab section.
+ SwitchToExTabSection(*FnStart);
+
+ // Create .ARM.extab label for offset in .ARM.exidx
+ assert(!ExTab);
+ ExTab = getContext().createTempSymbol();
+ EmitLabel(ExTab);
+
+ // Emit personality
+ if (Personality) {
+ const MCSymbolRefExpr *PersonalityRef =
+ MCSymbolRefExpr::create(Personality,
+ MCSymbolRefExpr::VK_ARM_PREL31,
+ getContext());
+
+ EmitValue(PersonalityRef, 4);
+ }
+
+ // Emit unwind opcodes
+ assert((Opcodes.size() % 4) == 0 &&
+ "Unwind opcode size for __aeabi_cpp_unwind_pr0 must be multiple of 4");
+ for (unsigned I = 0; I != Opcodes.size(); I += 4) {
+ uint64_t Intval = Opcodes[I] |
+ Opcodes[I + 1] << 8 |
+ Opcodes[I + 2] << 16 |
+ Opcodes[I + 3] << 24;
+ EmitIntValue(Intval, 4);
+ }
+
+ // According to ARM EHABI section 9.2, if the __aeabi_unwind_cpp_pr1() or
+ // __aeabi_unwind_cpp_pr2() is used, then the handler data must be emitted
+ // after the unwind opcodes. The handler data consists of several 32-bit
+ // words, and should be terminated by zero.
+ //
+ // In case that the .handlerdata directive is not specified by the
+ // programmer, we should emit zero to terminate the handler data.
+ if (NoHandlerData && !Personality)
+ EmitIntValue(0, 4);
+}
+
+void ARMELFStreamer::emitHandlerData() { FlushUnwindOpcodes(false); }
+
+void ARMELFStreamer::emitPersonality(const MCSymbol *Per) {
+ Personality = Per;
+ UnwindOpAsm.setPersonality(Per);
+}
+
+void ARMELFStreamer::emitPersonalityIndex(unsigned Index) {
+ assert(Index < ARM::EHABI::NUM_PERSONALITY_INDEX && "invalid index");
+ PersonalityIndex = Index;
+}
+
+void ARMELFStreamer::emitSetFP(unsigned NewFPReg, unsigned NewSPReg,
+ int64_t Offset) {
+ assert((NewSPReg == ARM::SP || NewSPReg == FPReg) &&
+ "the operand of .setfp directive should be either $sp or $fp");
+
+ UsedFP = true;
+ FPReg = NewFPReg;
+
+ if (NewSPReg == ARM::SP)
+ FPOffset = SPOffset + Offset;
+ else
+ FPOffset += Offset;
+}
+
+void ARMELFStreamer::emitMovSP(unsigned Reg, int64_t Offset) {
+ assert((Reg != ARM::SP && Reg != ARM::PC) &&
+ "the operand of .movsp cannot be either sp or pc");
+ assert(FPReg == ARM::SP && "current FP must be SP");
+
+ FlushPendingOffset();
+
+ FPReg = Reg;
+ FPOffset = SPOffset + Offset;
+
+ const MCRegisterInfo *MRI = getContext().getRegisterInfo();
+ UnwindOpAsm.EmitSetSP(MRI->getEncodingValue(FPReg));
+}
+
+void ARMELFStreamer::emitPad(int64_t Offset) {
+ // Track the change of the $sp offset
+ SPOffset -= Offset;
+
+ // To squash multiple .pad directives, we should delay the unwind opcode
+ // until the .save, .vsave, .handlerdata, or .fnend directives.
+ PendingOffset -= Offset;
+}
+
+void ARMELFStreamer::emitRegSave(const SmallVectorImpl<unsigned> &RegList,
+ bool IsVector) {
+ // Collect the registers in the register list
+ unsigned Count = 0;
+ uint32_t Mask = 0;
+ const MCRegisterInfo *MRI = getContext().getRegisterInfo();
+ for (size_t i = 0; i < RegList.size(); ++i) {
+ unsigned Reg = MRI->getEncodingValue(RegList[i]);
+ assert(Reg < (IsVector ? 32U : 16U) && "Register out of range");
+ unsigned Bit = (1u << Reg);
+ if ((Mask & Bit) == 0) {
+ Mask |= Bit;
+ ++Count;
+ }
+ }
+
+ // Track the change the $sp offset: For the .save directive, the
+ // corresponding push instruction will decrease the $sp by (4 * Count).
+ // For the .vsave directive, the corresponding vpush instruction will
+ // decrease $sp by (8 * Count).
+ SPOffset -= Count * (IsVector ? 8 : 4);
+
+ // Emit the opcode
+ FlushPendingOffset();
+ if (IsVector)
+ UnwindOpAsm.EmitVFPRegSave(Mask);
+ else
+ UnwindOpAsm.EmitRegSave(Mask);
+}
+
+void ARMELFStreamer::emitUnwindRaw(int64_t Offset,
+ const SmallVectorImpl<uint8_t> &Opcodes) {
+ FlushPendingOffset();
+ SPOffset = SPOffset - Offset;
+ UnwindOpAsm.EmitRaw(Opcodes);
+}
+
+namespace llvm {
+
+MCTargetStreamer *createARMTargetAsmStreamer(MCStreamer &S,
+ formatted_raw_ostream &OS,
+ MCInstPrinter *InstPrint,
+ bool isVerboseAsm) {
+ return new ARMTargetAsmStreamer(S, OS, *InstPrint, isVerboseAsm);
+}
+
+MCTargetStreamer *createARMNullTargetStreamer(MCStreamer &S) {
+ return new ARMTargetStreamer(S);
+}
+
+MCTargetStreamer *createARMObjectTargetStreamer(MCStreamer &S,
+ const MCSubtargetInfo &STI) {
+ const Triple &TT = STI.getTargetTriple();
+ if (TT.isOSBinFormatELF())
+ return new ARMTargetELFStreamer(S);
+ return new ARMTargetStreamer(S);
+}
+
+MCELFStreamer *createARMELFStreamer(MCContext &Context, MCAsmBackend &TAB,
+ raw_pwrite_stream &OS,
+ MCCodeEmitter *Emitter, bool RelaxAll,
+ bool IsThumb) {
+ ARMELFStreamer *S = new ARMELFStreamer(Context, TAB, OS, Emitter, IsThumb);
+ // FIXME: This should eventually end up somewhere else where more
+ // intelligent flag decisions can be made. For now we are just maintaining
+ // the status quo for ARM and setting EF_ARM_EABI_VER5 as the default.
+ S->getAssembler().setELFHeaderEFlags(ELF::EF_ARM_EABI_VER5);
+
+ if (RelaxAll)
+ S->getAssembler().setRelaxAll(true);
+ return S;
+ }
+
+}
+
+
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h
new file mode 100644
index 0000000..46ba571
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h
@@ -0,0 +1,110 @@
+//===-- ARMFixupKinds.h - ARM Specific Fixup Entries ------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_ARM_MCTARGETDESC_ARMFIXUPKINDS_H
+#define LLVM_LIB_TARGET_ARM_MCTARGETDESC_ARMFIXUPKINDS_H
+
+#include "llvm/MC/MCFixup.h"
+
+namespace llvm {
+namespace ARM {
+enum Fixups {
+ // fixup_arm_ldst_pcrel_12 - 12-bit PC relative relocation for symbol
+ // addresses
+ fixup_arm_ldst_pcrel_12 = FirstTargetFixupKind,
+
+ // fixup_t2_ldst_pcrel_12 - Equivalent to fixup_arm_ldst_pcrel_12, with
+ // the 16-bit halfwords reordered.
+ fixup_t2_ldst_pcrel_12,
+
+ // fixup_arm_pcrel_10_unscaled - 10-bit PC relative relocation for symbol
+ // addresses used in LDRD/LDRH/LDRB/etc. instructions. All bits are encoded.
+ fixup_arm_pcrel_10_unscaled,
+ // fixup_arm_pcrel_10 - 10-bit PC relative relocation for symbol addresses
+ // used in VFP instructions where the lower 2 bits are not encoded
+ // (so it's encoded as an 8-bit immediate).
+ fixup_arm_pcrel_10,
+ // fixup_t2_pcrel_10 - Equivalent to fixup_arm_pcrel_10, accounting for
+ // the short-swapped encoding of Thumb2 instructions.
+ fixup_t2_pcrel_10,
+ // fixup_thumb_adr_pcrel_10 - 10-bit PC relative relocation for symbol
+ // addresses where the lower 2 bits are not encoded (so it's encoded as an
+ // 8-bit immediate).
+ fixup_thumb_adr_pcrel_10,
+ // fixup_arm_adr_pcrel_12 - 12-bit PC relative relocation for the ADR
+ // instruction.
+ fixup_arm_adr_pcrel_12,
+ // fixup_t2_adr_pcrel_12 - 12-bit PC relative relocation for the ADR
+ // instruction.
+ fixup_t2_adr_pcrel_12,
+ // fixup_arm_condbranch - 24-bit PC relative relocation for conditional branch
+ // instructions.
+ fixup_arm_condbranch,
+ // fixup_arm_uncondbranch - 24-bit PC relative relocation for
+ // branch instructions. (unconditional)
+ fixup_arm_uncondbranch,
+ // fixup_t2_condbranch - 20-bit PC relative relocation for Thumb2 direct
+ // uconditional branch instructions.
+ fixup_t2_condbranch,
+ // fixup_t2_uncondbranch - 20-bit PC relative relocation for Thumb2 direct
+ // branch unconditional branch instructions.
+ fixup_t2_uncondbranch,
+
+ // fixup_arm_thumb_br - 12-bit fixup for Thumb B instructions.
+ fixup_arm_thumb_br,
+
+ // The following fixups handle the ARM BL instructions. These can be
+ // conditionalised; however, the ARM ELF ABI requires a different relocation
+ // in that case: R_ARM_JUMP24 instead of R_ARM_CALL. The difference is that
+ // R_ARM_CALL is allowed to change the instruction to a BLX inline, which has
+ // no conditional version; R_ARM_JUMP24 would have to insert a veneer.
+ //
+ // MachO does not draw a distinction between the two cases, so it will treat
+ // fixup_arm_uncondbl and fixup_arm_condbl as identical fixups.
+
+ // fixup_arm_uncondbl - Fixup for unconditional ARM BL instructions.
+ fixup_arm_uncondbl,
+
+ // fixup_arm_condbl - Fixup for ARM BL instructions with nontrivial
+ // conditionalisation.
+ fixup_arm_condbl,
+
+ // fixup_arm_blx - Fixup for ARM BLX instructions.
+ fixup_arm_blx,
+
+ // fixup_arm_thumb_bl - Fixup for Thumb BL instructions.
+ fixup_arm_thumb_bl,
+
+ // fixup_arm_thumb_blx - Fixup for Thumb BLX instructions.
+ fixup_arm_thumb_blx,
+
+ // fixup_arm_thumb_cb - Fixup for Thumb branch instructions.
+ fixup_arm_thumb_cb,
+
+ // fixup_arm_thumb_cp - Fixup for Thumb load/store from constant pool instrs.
+ fixup_arm_thumb_cp,
+
+ // fixup_arm_thumb_bcc - Fixup for Thumb conditional branching instructions.
+ fixup_arm_thumb_bcc,
+
+ // The next two are for the movt/movw pair
+ // the 16bit imm field are split into imm{15-12} and imm{11-0}
+ fixup_arm_movt_hi16, // :upper16:
+ fixup_arm_movw_lo16, // :lower16:
+ fixup_t2_movt_hi16, // :upper16:
+ fixup_t2_movw_lo16, // :lower16:
+
+ // Marker
+ LastTargetFixupKind,
+ NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind
+};
+}
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp
new file mode 100644
index 0000000..bda37f6
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp
@@ -0,0 +1,115 @@
+//===-- ARMMCAsmInfo.cpp - ARM asm properties -----------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declarations of the ARMMCAsmInfo properties.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARMMCAsmInfo.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/Support/CommandLine.h"
+
+using namespace llvm;
+
+void ARMMCAsmInfoDarwin::anchor() { }
+
+ARMMCAsmInfoDarwin::ARMMCAsmInfoDarwin(const Triple &TheTriple) {
+ if ((TheTriple.getArch() == Triple::armeb) ||
+ (TheTriple.getArch() == Triple::thumbeb))
+ IsLittleEndian = false;
+
+ Data64bitsDirective = nullptr;
+ CommentString = "@";
+ Code16Directive = ".code\t16";
+ Code32Directive = ".code\t32";
+ UseDataRegionDirectives = true;
+
+ SupportsDebugInformation = true;
+
+ // Exceptions handling
+ ExceptionsType = TheTriple.isOSDarwin() && !TheTriple.isWatchOS()
+ ? ExceptionHandling::SjLj
+ : ExceptionHandling::DwarfCFI;
+
+ UseIntegratedAssembler = true;
+}
+
+void ARMELFMCAsmInfo::anchor() { }
+
+ARMELFMCAsmInfo::ARMELFMCAsmInfo(const Triple &TheTriple) {
+ if ((TheTriple.getArch() == Triple::armeb) ||
+ (TheTriple.getArch() == Triple::thumbeb))
+ IsLittleEndian = false;
+
+ // ".comm align is in bytes but .align is pow-2."
+ AlignmentIsInBytes = false;
+
+ Data64bitsDirective = nullptr;
+ CommentString = "@";
+ Code16Directive = ".code\t16";
+ Code32Directive = ".code\t32";
+
+ SupportsDebugInformation = true;
+
+ // Exceptions handling
+ switch (TheTriple.getOS()) {
+ case Triple::Bitrig:
+ case Triple::NetBSD:
+ ExceptionsType = ExceptionHandling::DwarfCFI;
+ break;
+ default:
+ ExceptionsType = ExceptionHandling::ARM;
+ break;
+ }
+
+ // foo(plt) instead of foo@plt
+ UseParensForSymbolVariant = true;
+
+ UseIntegratedAssembler = true;
+}
+
+void ARMELFMCAsmInfo::setUseIntegratedAssembler(bool Value) {
+ UseIntegratedAssembler = Value;
+ if (!UseIntegratedAssembler) {
+ // gas doesn't handle VFP register names in cfi directives,
+ // so don't use register names with external assembler.
+ // See https://sourceware.org/bugzilla/show_bug.cgi?id=16694
+ DwarfRegNumForCFI = true;
+ }
+}
+
+void ARMCOFFMCAsmInfoMicrosoft::anchor() { }
+
+ARMCOFFMCAsmInfoMicrosoft::ARMCOFFMCAsmInfoMicrosoft() {
+ AlignmentIsInBytes = false;
+
+ PrivateGlobalPrefix = "$M";
+ PrivateLabelPrefix = "$M";
+}
+
+void ARMCOFFMCAsmInfoGNU::anchor() { }
+
+ARMCOFFMCAsmInfoGNU::ARMCOFFMCAsmInfoGNU() {
+ AlignmentIsInBytes = false;
+ HasSingleParameterDotFile = true;
+
+ CommentString = "@";
+ Code16Directive = ".code\t16";
+ Code32Directive = ".code\t32";
+ PrivateGlobalPrefix = ".L";
+ PrivateLabelPrefix = ".L";
+
+ SupportsDebugInformation = true;
+ ExceptionsType = ExceptionHandling::None;
+ UseParensForSymbolVariant = true;
+
+ UseIntegratedAssembler = false;
+ DwarfRegNumForCFI = true;
+}
+
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.h b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.h
new file mode 100644
index 0000000..5e54816
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.h
@@ -0,0 +1,56 @@
+//===-- ARMMCAsmInfo.h - ARM asm properties --------------------*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the ARMMCAsmInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_ARM_MCTARGETDESC_ARMMCASMINFO_H
+#define LLVM_LIB_TARGET_ARM_MCTARGETDESC_ARMMCASMINFO_H
+
+#include "llvm/MC/MCAsmInfoCOFF.h"
+#include "llvm/MC/MCAsmInfoDarwin.h"
+#include "llvm/MC/MCAsmInfoELF.h"
+
+namespace llvm {
+class Triple;
+
+class ARMMCAsmInfoDarwin : public MCAsmInfoDarwin {
+ virtual void anchor();
+
+public:
+ explicit ARMMCAsmInfoDarwin(const Triple &TheTriple);
+};
+
+class ARMELFMCAsmInfo : public MCAsmInfoELF {
+ void anchor() override;
+
+public:
+ explicit ARMELFMCAsmInfo(const Triple &TT);
+
+ void setUseIntegratedAssembler(bool Value) override;
+};
+
+class ARMCOFFMCAsmInfoMicrosoft : public MCAsmInfoMicrosoft {
+ void anchor() override;
+
+public:
+ explicit ARMCOFFMCAsmInfoMicrosoft();
+};
+
+class ARMCOFFMCAsmInfoGNU : public MCAsmInfoGNUCOFF {
+ void anchor() override;
+
+public:
+ explicit ARMCOFFMCAsmInfoGNU();
+};
+
+} // namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
new file mode 100644
index 0000000..b885783
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
@@ -0,0 +1,1695 @@
+//===-- ARM/ARMMCCodeEmitter.cpp - Convert ARM code to machine code -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the ARMMCCodeEmitter class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/ARMMCTargetDesc.h"
+#include "MCTargetDesc/ARMAddressingModes.h"
+#include "MCTargetDesc/ARMBaseInfo.h"
+#include "MCTargetDesc/ARMFixupKinds.h"
+#include "MCTargetDesc/ARMMCExpr.h"
+#include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "mccodeemitter"
+
+STATISTIC(MCNumEmitted, "Number of MC instructions emitted.");
+STATISTIC(MCNumCPRelocations, "Number of constant pool relocations created.");
+
+namespace {
+class ARMMCCodeEmitter : public MCCodeEmitter {
+ ARMMCCodeEmitter(const ARMMCCodeEmitter &) = delete;
+ void operator=(const ARMMCCodeEmitter &) = delete;
+ const MCInstrInfo &MCII;
+ const MCContext &CTX;
+ bool IsLittleEndian;
+
+public:
+ ARMMCCodeEmitter(const MCInstrInfo &mcii, MCContext &ctx, bool IsLittle)
+ : MCII(mcii), CTX(ctx), IsLittleEndian(IsLittle) {
+ }
+
+ ~ARMMCCodeEmitter() override {}
+
+ bool isThumb(const MCSubtargetInfo &STI) const {
+ return STI.getFeatureBits()[ARM::ModeThumb];
+ }
+ bool isThumb2(const MCSubtargetInfo &STI) const {
+ return isThumb(STI) && STI.getFeatureBits()[ARM::FeatureThumb2];
+ }
+ bool isTargetMachO(const MCSubtargetInfo &STI) const {
+ const Triple &TT = STI.getTargetTriple();
+ return TT.isOSBinFormatMachO();
+ }
+
+ unsigned getMachineSoImmOpValue(unsigned SoImm) const;
+
+ // getBinaryCodeForInstr - TableGen'erated function for getting the
+ // binary encoding for an instruction.
+ uint64_t getBinaryCodeForInstr(const MCInst &MI,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+
+ /// getMachineOpValue - Return binary encoding of operand. If the machine
+ /// operand requires relocation, record the relocation and return zero.
+ unsigned getMachineOpValue(const MCInst &MI,const MCOperand &MO,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+
+ /// getHiLo16ImmOpValue - Return the encoding for the hi / low 16-bit of
+ /// the specified operand. This is used for operands with :lower16: and
+ /// :upper16: prefixes.
+ uint32_t getHiLo16ImmOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+
+ bool EncodeAddrModeOpValues(const MCInst &MI, unsigned OpIdx,
+ unsigned &Reg, unsigned &Imm,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+
+ /// getThumbBLTargetOpValue - Return encoding info for Thumb immediate
+ /// BL branch target.
+ uint32_t getThumbBLTargetOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+
+ /// getThumbBLXTargetOpValue - Return encoding info for Thumb immediate
+ /// BLX branch target.
+ uint32_t getThumbBLXTargetOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+
+ /// getThumbBRTargetOpValue - Return encoding info for Thumb branch target.
+ uint32_t getThumbBRTargetOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+
+ /// getThumbBCCTargetOpValue - Return encoding info for Thumb branch target.
+ uint32_t getThumbBCCTargetOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+
+ /// getThumbCBTargetOpValue - Return encoding info for Thumb branch target.
+ uint32_t getThumbCBTargetOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+
+ /// getBranchTargetOpValue - Return encoding info for 24-bit immediate
+ /// branch target.
+ uint32_t getBranchTargetOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+
+ /// getUnconditionalBranchTargetOpValue - Return encoding info for 24-bit
+ /// immediate Thumb2 direct branch target.
+ uint32_t getUnconditionalBranchTargetOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+
+ /// getARMBranchTargetOpValue - Return encoding info for 24-bit immediate
+ /// branch target.
+ uint32_t getARMBranchTargetOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+ uint32_t getARMBLTargetOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+ uint32_t getARMBLXTargetOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+
+ /// getAdrLabelOpValue - Return encoding info for 12-bit immediate
+ /// ADR label target.
+ uint32_t getAdrLabelOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+ uint32_t getThumbAdrLabelOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+ uint32_t getT2AdrLabelOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+
+
+ /// getAddrModeImm12OpValue - Return encoding info for 'reg +/- imm12'
+ /// operand.
+ uint32_t getAddrModeImm12OpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+
+ /// getThumbAddrModeRegRegOpValue - Return encoding for 'reg + reg' operand.
+ uint32_t getThumbAddrModeRegRegOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+
+ /// getT2AddrModeImm8s4OpValue - Return encoding info for 'reg +/- imm8<<2'
+ /// operand.
+ uint32_t getT2AddrModeImm8s4OpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+
+ /// getT2AddrModeImm0_1020s4OpValue - Return encoding info for 'reg + imm8<<2'
+ /// operand.
+ uint32_t getT2AddrModeImm0_1020s4OpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+
+ /// getT2Imm8s4OpValue - Return encoding info for '+/- imm8<<2'
+ /// operand.
+ uint32_t getT2Imm8s4OpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+
+
+ /// getLdStSORegOpValue - Return encoding info for 'reg +/- reg shop imm'
+ /// operand as needed by load/store instructions.
+ uint32_t getLdStSORegOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+
+ /// getLdStmModeOpValue - Return encoding for load/store multiple mode.
+ uint32_t getLdStmModeOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ ARM_AM::AMSubMode Mode = (ARM_AM::AMSubMode)MI.getOperand(OpIdx).getImm();
+ switch (Mode) {
+ default: llvm_unreachable("Unknown addressing sub-mode!");
+ case ARM_AM::da: return 0;
+ case ARM_AM::ia: return 1;
+ case ARM_AM::db: return 2;
+ case ARM_AM::ib: return 3;
+ }
+ }
+ /// getShiftOp - Return the shift opcode (bit[6:5]) of the immediate value.
+ ///
+ unsigned getShiftOp(ARM_AM::ShiftOpc ShOpc) const {
+ switch (ShOpc) {
+ case ARM_AM::no_shift:
+ case ARM_AM::lsl: return 0;
+ case ARM_AM::lsr: return 1;
+ case ARM_AM::asr: return 2;
+ case ARM_AM::ror:
+ case ARM_AM::rrx: return 3;
+ }
+ llvm_unreachable("Invalid ShiftOpc!");
+ }
+
+ /// getAddrMode2OpValue - Return encoding for addrmode2 operands.
+ uint32_t getAddrMode2OpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+
+ /// getAddrMode2OffsetOpValue - Return encoding for am2offset operands.
+ uint32_t getAddrMode2OffsetOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+
+ /// getPostIdxRegOpValue - Return encoding for postidx_reg operands.
+ uint32_t getPostIdxRegOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+
+ /// getAddrMode3OffsetOpValue - Return encoding for am3offset operands.
+ uint32_t getAddrMode3OffsetOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+
+ /// getAddrMode3OpValue - Return encoding for addrmode3 operands.
+ uint32_t getAddrMode3OpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+
+ /// getAddrModeThumbSPOpValue - Return encoding info for 'reg +/- imm12'
+ /// operand.
+ uint32_t getAddrModeThumbSPOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+
+ /// getAddrModeISOpValue - Encode the t_addrmode_is# operands.
+ uint32_t getAddrModeISOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+
+ /// getAddrModePCOpValue - Return encoding for t_addrmode_pc operands.
+ uint32_t getAddrModePCOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+
+ /// getAddrMode5OpValue - Return encoding info for 'reg +/- imm8' operand.
+ uint32_t getAddrMode5OpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+
+ /// getCCOutOpValue - Return encoding of the 's' bit.
+ unsigned getCCOutOpValue(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ // The operand is either reg0 or CPSR. The 's' bit is encoded as '0' or
+ // '1' respectively.
+ return MI.getOperand(Op).getReg() == ARM::CPSR;
+ }
+
+ /// getSOImmOpValue - Return an encoded 12-bit shifted-immediate value.
+ unsigned getSOImmOpValue(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+
+ const MCOperand &MO = MI.getOperand(Op);
+
+ // We expect MO to be an immediate or an expression,
+ // if it is an immediate - that's fine, just encode the value.
+ // Otherwise - create a Fixup.
+ if (MO.isExpr()) {
+ const MCExpr *Expr = MO.getExpr();
+ // In instruction code this value always encoded as lowest 12 bits,
+ // so we don't have to perform any specific adjustments.
+ // Due to requirements of relocatable records we have to use FK_Data_4.
+ // See ARMELFObjectWriter::ExplicitRelSym and
+ // ARMELFObjectWriter::GetRelocTypeInner for more details.
+ MCFixupKind Kind = MCFixupKind(FK_Data_4);
+ Fixups.push_back(MCFixup::create(0, Expr, Kind, MI.getLoc()));
+ return 0;
+ }
+
+ unsigned SoImm = MO.getImm();
+ int SoImmVal = ARM_AM::getSOImmVal(SoImm);
+ assert(SoImmVal != -1 && "Not a valid so_imm value!");
+
+ // Encode rotate_imm.
+ unsigned Binary = (ARM_AM::getSOImmValRot((unsigned)SoImmVal) >> 1)
+ << ARMII::SoRotImmShift;
+
+ // Encode immed_8.
+ Binary |= ARM_AM::getSOImmValImm((unsigned)SoImmVal);
+ return Binary;
+ }
+
+ unsigned getModImmOpValue(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &ST) const {
+ const MCOperand &MO = MI.getOperand(Op);
+
+ // Support for fixups (MCFixup)
+ if (MO.isExpr()) {
+ const MCExpr *Expr = MO.getExpr();
+ // In instruction code this value always encoded as lowest 12 bits,
+ // so we don't have to perform any specific adjustments.
+ // Due to requirements of relocatable records we have to use FK_Data_4.
+ // See ARMELFObjectWriter::ExplicitRelSym and
+ // ARMELFObjectWriter::GetRelocTypeInner for more details.
+ MCFixupKind Kind = MCFixupKind(FK_Data_4);
+ Fixups.push_back(MCFixup::create(0, Expr, Kind, MI.getLoc()));
+ return 0;
+ }
+
+ // Immediate is already in its encoded format
+ return MO.getImm();
+ }
+
+ /// getT2SOImmOpValue - Return an encoded 12-bit shifted-immediate value.
+ unsigned getT2SOImmOpValue(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ unsigned SoImm = MI.getOperand(Op).getImm();
+ unsigned Encoded = ARM_AM::getT2SOImmVal(SoImm);
+ assert(Encoded != ~0U && "Not a Thumb2 so_imm value?");
+ return Encoded;
+ }
+
+ unsigned getT2AddrModeSORegOpValue(const MCInst &MI, unsigned OpNum,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+ unsigned getT2AddrModeImm8OpValue(const MCInst &MI, unsigned OpNum,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+ unsigned getT2AddrModeImm8OffsetOpValue(const MCInst &MI, unsigned OpNum,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+ unsigned getT2AddrModeImm12OffsetOpValue(const MCInst &MI, unsigned OpNum,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+
+ /// getSORegOpValue - Return an encoded so_reg shifted register value.
+ unsigned getSORegRegOpValue(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+ unsigned getSORegImmOpValue(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+ unsigned getT2SORegOpValue(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+
+ unsigned getNEONVcvtImm32OpValue(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ return 64 - MI.getOperand(Op).getImm();
+ }
+
+ unsigned getBitfieldInvertedMaskOpValue(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+
+ unsigned getRegisterListOpValue(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+ unsigned getAddrMode6AddressOpValue(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+ unsigned getAddrMode6OneLane32AddressOpValue(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+ unsigned getAddrMode6DupAddressOpValue(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+ unsigned getAddrMode6OffsetOpValue(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+
+ unsigned getShiftRight8Imm(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+ unsigned getShiftRight16Imm(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+ unsigned getShiftRight32Imm(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+ unsigned getShiftRight64Imm(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+
+ unsigned getThumbSRImmOpValue(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+
+ unsigned NEONThumb2DataIPostEncoder(const MCInst &MI,
+ unsigned EncodedValue,
+ const MCSubtargetInfo &STI) const;
+ unsigned NEONThumb2LoadStorePostEncoder(const MCInst &MI,
+ unsigned EncodedValue,
+ const MCSubtargetInfo &STI) const;
+ unsigned NEONThumb2DupPostEncoder(const MCInst &MI,
+ unsigned EncodedValue,
+ const MCSubtargetInfo &STI) const;
+ unsigned NEONThumb2V8PostEncoder(const MCInst &MI,
+ unsigned EncodedValue,
+ const MCSubtargetInfo &STI) const;
+
+ unsigned VFPThumb2PostEncoder(const MCInst &MI,
+ unsigned EncodedValue,
+ const MCSubtargetInfo &STI) const;
+
+ void EmitByte(unsigned char C, raw_ostream &OS) const {
+ OS << (char)C;
+ }
+
+ void EmitConstant(uint64_t Val, unsigned Size, raw_ostream &OS) const {
+ // Output the constant in little endian byte order.
+ for (unsigned i = 0; i != Size; ++i) {
+ unsigned Shift = IsLittleEndian ? i * 8 : (Size - 1 - i) * 8;
+ EmitByte((Val >> Shift) & 0xff, OS);
+ }
+ }
+
+ void encodeInstruction(const MCInst &MI, raw_ostream &OS,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const override;
+};
+
+} // end anonymous namespace
+
+MCCodeEmitter *llvm::createARMLEMCCodeEmitter(const MCInstrInfo &MCII,
+ const MCRegisterInfo &MRI,
+ MCContext &Ctx) {
+ return new ARMMCCodeEmitter(MCII, Ctx, true);
+}
+
+MCCodeEmitter *llvm::createARMBEMCCodeEmitter(const MCInstrInfo &MCII,
+ const MCRegisterInfo &MRI,
+ MCContext &Ctx) {
+ return new ARMMCCodeEmitter(MCII, Ctx, false);
+}
+
+/// NEONThumb2DataIPostEncoder - Post-process encoded NEON data-processing
+/// instructions, and rewrite them to their Thumb2 form if we are currently in
+/// Thumb2 mode.
+unsigned ARMMCCodeEmitter::NEONThumb2DataIPostEncoder(const MCInst &MI,
+ unsigned EncodedValue,
+ const MCSubtargetInfo &STI) const {
+ if (isThumb2(STI)) {
+ // NEON Thumb2 data-processsing encodings are very simple: bit 24 is moved
+ // to bit 12 of the high half-word (i.e. bit 28), and bits 27-24 are
+ // set to 1111.
+ unsigned Bit24 = EncodedValue & 0x01000000;
+ unsigned Bit28 = Bit24 << 4;
+ EncodedValue &= 0xEFFFFFFF;
+ EncodedValue |= Bit28;
+ EncodedValue |= 0x0F000000;
+ }
+
+ return EncodedValue;
+}
+
+/// NEONThumb2LoadStorePostEncoder - Post-process encoded NEON load/store
+/// instructions, and rewrite them to their Thumb2 form if we are currently in
+/// Thumb2 mode.
+unsigned ARMMCCodeEmitter::NEONThumb2LoadStorePostEncoder(const MCInst &MI,
+ unsigned EncodedValue,
+ const MCSubtargetInfo &STI) const {
+ if (isThumb2(STI)) {
+ EncodedValue &= 0xF0FFFFFF;
+ EncodedValue |= 0x09000000;
+ }
+
+ return EncodedValue;
+}
+
+/// NEONThumb2DupPostEncoder - Post-process encoded NEON vdup
+/// instructions, and rewrite them to their Thumb2 form if we are currently in
+/// Thumb2 mode.
+unsigned ARMMCCodeEmitter::NEONThumb2DupPostEncoder(const MCInst &MI,
+ unsigned EncodedValue,
+ const MCSubtargetInfo &STI) const {
+ if (isThumb2(STI)) {
+ EncodedValue &= 0x00FFFFFF;
+ EncodedValue |= 0xEE000000;
+ }
+
+ return EncodedValue;
+}
+
+/// Post-process encoded NEON v8 instructions, and rewrite them to Thumb2 form
+/// if we are in Thumb2.
+unsigned ARMMCCodeEmitter::NEONThumb2V8PostEncoder(const MCInst &MI,
+ unsigned EncodedValue,
+ const MCSubtargetInfo &STI) const {
+ if (isThumb2(STI)) {
+ EncodedValue |= 0xC000000; // Set bits 27-26
+ }
+
+ return EncodedValue;
+}
+
+/// VFPThumb2PostEncoder - Post-process encoded VFP instructions and rewrite
+/// them to their Thumb2 form if we are currently in Thumb2 mode.
+unsigned ARMMCCodeEmitter::
+VFPThumb2PostEncoder(const MCInst &MI, unsigned EncodedValue,
+ const MCSubtargetInfo &STI) const {
+ if (isThumb2(STI)) {
+ EncodedValue &= 0x0FFFFFFF;
+ EncodedValue |= 0xE0000000;
+ }
+ return EncodedValue;
+}
+
+/// getMachineOpValue - Return binary encoding of operand. If the machine
+/// operand requires relocation, record the relocation and return zero.
+unsigned ARMMCCodeEmitter::
+getMachineOpValue(const MCInst &MI, const MCOperand &MO,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ if (MO.isReg()) {
+ unsigned Reg = MO.getReg();
+ unsigned RegNo = CTX.getRegisterInfo()->getEncodingValue(Reg);
+
+ // Q registers are encoded as 2x their register number.
+ switch (Reg) {
+ default:
+ return RegNo;
+ case ARM::Q0: case ARM::Q1: case ARM::Q2: case ARM::Q3:
+ case ARM::Q4: case ARM::Q5: case ARM::Q6: case ARM::Q7:
+ case ARM::Q8: case ARM::Q9: case ARM::Q10: case ARM::Q11:
+ case ARM::Q12: case ARM::Q13: case ARM::Q14: case ARM::Q15:
+ return 2 * RegNo;
+ }
+ } else if (MO.isImm()) {
+ return static_cast<unsigned>(MO.getImm());
+ } else if (MO.isFPImm()) {
+ return static_cast<unsigned>(APFloat(MO.getFPImm())
+ .bitcastToAPInt().getHiBits(32).getLimitedValue());
+ }
+
+ llvm_unreachable("Unable to encode MCOperand!");
+}
+
+/// getAddrModeImmOpValue - Return encoding info for 'reg +/- imm' operand.
+bool ARMMCCodeEmitter::
+EncodeAddrModeOpValues(const MCInst &MI, unsigned OpIdx, unsigned &Reg,
+ unsigned &Imm, SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ const MCOperand &MO = MI.getOperand(OpIdx);
+ const MCOperand &MO1 = MI.getOperand(OpIdx + 1);
+
+ Reg = CTX.getRegisterInfo()->getEncodingValue(MO.getReg());
+
+ int32_t SImm = MO1.getImm();
+ bool isAdd = true;
+
+ // Special value for #-0
+ if (SImm == INT32_MIN) {
+ SImm = 0;
+ isAdd = false;
+ }
+
+ // Immediate is always encoded as positive. The 'U' bit controls add vs sub.
+ if (SImm < 0) {
+ SImm = -SImm;
+ isAdd = false;
+ }
+
+ Imm = SImm;
+ return isAdd;
+}
+
+/// getBranchTargetOpValue - Helper function to get the branch target operand,
+/// which is either an immediate or requires a fixup.
+static uint32_t getBranchTargetOpValue(const MCInst &MI, unsigned OpIdx,
+ unsigned FixupKind,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) {
+ const MCOperand &MO = MI.getOperand(OpIdx);
+
+ // If the destination is an immediate, we have nothing to do.
+ if (MO.isImm()) return MO.getImm();
+ assert(MO.isExpr() && "Unexpected branch target type!");
+ const MCExpr *Expr = MO.getExpr();
+ MCFixupKind Kind = MCFixupKind(FixupKind);
+ Fixups.push_back(MCFixup::create(0, Expr, Kind, MI.getLoc()));
+
+ // All of the information is in the fixup.
+ return 0;
+}
+
+// Thumb BL and BLX use a strange offset encoding where bits 22 and 21 are
+// determined by negating them and XOR'ing them with bit 23.
+static int32_t encodeThumbBLOffset(int32_t offset) {
+ offset >>= 1;
+ uint32_t S = (offset & 0x800000) >> 23;
+ uint32_t J1 = (offset & 0x400000) >> 22;
+ uint32_t J2 = (offset & 0x200000) >> 21;
+ J1 = (~J1 & 0x1);
+ J2 = (~J2 & 0x1);
+ J1 ^= S;
+ J2 ^= S;
+
+ offset &= ~0x600000;
+ offset |= J1 << 22;
+ offset |= J2 << 21;
+
+ return offset;
+}
+
+/// getThumbBLTargetOpValue - Return encoding info for immediate branch target.
+uint32_t ARMMCCodeEmitter::
+getThumbBLTargetOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ const MCOperand MO = MI.getOperand(OpIdx);
+ if (MO.isExpr())
+ return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_arm_thumb_bl,
+ Fixups, STI);
+ return encodeThumbBLOffset(MO.getImm());
+}
+
+/// getThumbBLXTargetOpValue - Return encoding info for Thumb immediate
+/// BLX branch target.
+uint32_t ARMMCCodeEmitter::
+getThumbBLXTargetOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ const MCOperand MO = MI.getOperand(OpIdx);
+ if (MO.isExpr())
+ return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_arm_thumb_blx,
+ Fixups, STI);
+ return encodeThumbBLOffset(MO.getImm());
+}
+
+/// getThumbBRTargetOpValue - Return encoding info for Thumb branch target.
+uint32_t ARMMCCodeEmitter::
+getThumbBRTargetOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ const MCOperand MO = MI.getOperand(OpIdx);
+ if (MO.isExpr())
+ return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_arm_thumb_br,
+ Fixups, STI);
+ return (MO.getImm() >> 1);
+}
+
+/// getThumbBCCTargetOpValue - Return encoding info for Thumb branch target.
+uint32_t ARMMCCodeEmitter::
+getThumbBCCTargetOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ const MCOperand MO = MI.getOperand(OpIdx);
+ if (MO.isExpr())
+ return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_arm_thumb_bcc,
+ Fixups, STI);
+ return (MO.getImm() >> 1);
+}
+
+/// getThumbCBTargetOpValue - Return encoding info for Thumb branch target.
+uint32_t ARMMCCodeEmitter::
+getThumbCBTargetOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ const MCOperand MO = MI.getOperand(OpIdx);
+ if (MO.isExpr())
+ return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_arm_thumb_cb, Fixups, STI);
+ return (MO.getImm() >> 1);
+}
+
+/// Return true if this branch has a non-always predication
+static bool HasConditionalBranch(const MCInst &MI) {
+ int NumOp = MI.getNumOperands();
+ if (NumOp >= 2) {
+ for (int i = 0; i < NumOp-1; ++i) {
+ const MCOperand &MCOp1 = MI.getOperand(i);
+ const MCOperand &MCOp2 = MI.getOperand(i + 1);
+ if (MCOp1.isImm() && MCOp2.isReg() &&
+ (MCOp2.getReg() == 0 || MCOp2.getReg() == ARM::CPSR)) {
+ if (ARMCC::CondCodes(MCOp1.getImm()) != ARMCC::AL)
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
+/// getBranchTargetOpValue - Return encoding info for 24-bit immediate branch
+/// target.
+uint32_t ARMMCCodeEmitter::
+getBranchTargetOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ // FIXME: This really, really shouldn't use TargetMachine. We don't want
+ // coupling between MC and TM anywhere we can help it.
+ if (isThumb2(STI))
+ return
+ ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_t2_condbranch, Fixups, STI);
+ return getARMBranchTargetOpValue(MI, OpIdx, Fixups, STI);
+}
+
+/// getBranchTargetOpValue - Return encoding info for 24-bit immediate branch
+/// target.
+uint32_t ARMMCCodeEmitter::
+getARMBranchTargetOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ const MCOperand MO = MI.getOperand(OpIdx);
+ if (MO.isExpr()) {
+ if (HasConditionalBranch(MI))
+ return ::getBranchTargetOpValue(MI, OpIdx,
+ ARM::fixup_arm_condbranch, Fixups, STI);
+ return ::getBranchTargetOpValue(MI, OpIdx,
+ ARM::fixup_arm_uncondbranch, Fixups, STI);
+ }
+
+ return MO.getImm() >> 2;
+}
+
+uint32_t ARMMCCodeEmitter::
+getARMBLTargetOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ const MCOperand MO = MI.getOperand(OpIdx);
+ if (MO.isExpr()) {
+ if (HasConditionalBranch(MI))
+ return ::getBranchTargetOpValue(MI, OpIdx,
+ ARM::fixup_arm_condbl, Fixups, STI);
+ return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_arm_uncondbl, Fixups, STI);
+ }
+
+ return MO.getImm() >> 2;
+}
+
+uint32_t ARMMCCodeEmitter::
+getARMBLXTargetOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ const MCOperand MO = MI.getOperand(OpIdx);
+ if (MO.isExpr())
+ return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_arm_blx, Fixups, STI);
+
+ return MO.getImm() >> 1;
+}
+
+/// getUnconditionalBranchTargetOpValue - Return encoding info for 24-bit
+/// immediate branch target.
+uint32_t ARMMCCodeEmitter::
+getUnconditionalBranchTargetOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ unsigned Val = 0;
+ const MCOperand MO = MI.getOperand(OpIdx);
+
+ if(MO.isExpr())
+ return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_t2_uncondbranch, Fixups, STI);
+ else
+ Val = MO.getImm() >> 1;
+
+ bool I = (Val & 0x800000);
+ bool J1 = (Val & 0x400000);
+ bool J2 = (Val & 0x200000);
+ if (I ^ J1)
+ Val &= ~0x400000;
+ else
+ Val |= 0x400000;
+
+ if (I ^ J2)
+ Val &= ~0x200000;
+ else
+ Val |= 0x200000;
+
+ return Val;
+}
+
+/// getAdrLabelOpValue - Return encoding info for 12-bit shifted-immediate
+/// ADR label target.
+uint32_t ARMMCCodeEmitter::
+getAdrLabelOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ const MCOperand MO = MI.getOperand(OpIdx);
+ if (MO.isExpr())
+ return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_arm_adr_pcrel_12,
+ Fixups, STI);
+ int64_t offset = MO.getImm();
+ uint32_t Val = 0x2000;
+
+ int SoImmVal;
+ if (offset == INT32_MIN) {
+ Val = 0x1000;
+ SoImmVal = 0;
+ } else if (offset < 0) {
+ Val = 0x1000;
+ offset *= -1;
+ SoImmVal = ARM_AM::getSOImmVal(offset);
+ if(SoImmVal == -1) {
+ Val = 0x2000;
+ offset *= -1;
+ SoImmVal = ARM_AM::getSOImmVal(offset);
+ }
+ } else {
+ SoImmVal = ARM_AM::getSOImmVal(offset);
+ if(SoImmVal == -1) {
+ Val = 0x1000;
+ offset *= -1;
+ SoImmVal = ARM_AM::getSOImmVal(offset);
+ }
+ }
+
+ assert(SoImmVal != -1 && "Not a valid so_imm value!");
+
+ Val |= SoImmVal;
+ return Val;
+}
+
+/// getT2AdrLabelOpValue - Return encoding info for 12-bit immediate ADR label
+/// target.
+uint32_t ARMMCCodeEmitter::
+getT2AdrLabelOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ const MCOperand MO = MI.getOperand(OpIdx);
+ if (MO.isExpr())
+ return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_t2_adr_pcrel_12,
+ Fixups, STI);
+ int32_t Val = MO.getImm();
+ if (Val == INT32_MIN)
+ Val = 0x1000;
+ else if (Val < 0) {
+ Val *= -1;
+ Val |= 0x1000;
+ }
+ return Val;
+}
+
+/// getThumbAdrLabelOpValue - Return encoding info for 8-bit immediate ADR label
+/// target.
+uint32_t ARMMCCodeEmitter::
+getThumbAdrLabelOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ const MCOperand MO = MI.getOperand(OpIdx);
+ if (MO.isExpr())
+ return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_thumb_adr_pcrel_10,
+ Fixups, STI);
+ return MO.getImm();
+}
+
+/// getThumbAddrModeRegRegOpValue - Return encoding info for 'reg + reg'
+/// operand.
+uint32_t ARMMCCodeEmitter::
+getThumbAddrModeRegRegOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &,
+ const MCSubtargetInfo &STI) const {
+ // [Rn, Rm]
+ // {5-3} = Rm
+ // {2-0} = Rn
+ const MCOperand &MO1 = MI.getOperand(OpIdx);
+ const MCOperand &MO2 = MI.getOperand(OpIdx + 1);
+ unsigned Rn = CTX.getRegisterInfo()->getEncodingValue(MO1.getReg());
+ unsigned Rm = CTX.getRegisterInfo()->getEncodingValue(MO2.getReg());
+ return (Rm << 3) | Rn;
+}
+
+/// getAddrModeImm12OpValue - Return encoding info for 'reg +/- imm12' operand.
+uint32_t ARMMCCodeEmitter::
+getAddrModeImm12OpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ // {17-13} = reg
+ // {12} = (U)nsigned (add == '1', sub == '0')
+ // {11-0} = imm12
+ unsigned Reg, Imm12;
+ bool isAdd = true;
+ // If The first operand isn't a register, we have a label reference.
+ const MCOperand &MO = MI.getOperand(OpIdx);
+ if (!MO.isReg()) {
+ Reg = CTX.getRegisterInfo()->getEncodingValue(ARM::PC); // Rn is PC.
+ Imm12 = 0;
+
+ if (MO.isExpr()) {
+ const MCExpr *Expr = MO.getExpr();
+ isAdd = false ; // 'U' bit is set as part of the fixup.
+
+ MCFixupKind Kind;
+ if (isThumb2(STI))
+ Kind = MCFixupKind(ARM::fixup_t2_ldst_pcrel_12);
+ else
+ Kind = MCFixupKind(ARM::fixup_arm_ldst_pcrel_12);
+ Fixups.push_back(MCFixup::create(0, Expr, Kind, MI.getLoc()));
+
+ ++MCNumCPRelocations;
+ } else {
+ Reg = ARM::PC;
+ int32_t Offset = MO.getImm();
+ if (Offset == INT32_MIN) {
+ Offset = 0;
+ isAdd = false;
+ } else if (Offset < 0) {
+ Offset *= -1;
+ isAdd = false;
+ }
+ Imm12 = Offset;
+ }
+ } else
+ isAdd = EncodeAddrModeOpValues(MI, OpIdx, Reg, Imm12, Fixups, STI);
+
+ uint32_t Binary = Imm12 & 0xfff;
+ // Immediate is always encoded as positive. The 'U' bit controls add vs sub.
+ if (isAdd)
+ Binary |= (1 << 12);
+ Binary |= (Reg << 13);
+ return Binary;
+}
+
+/// getT2Imm8s4OpValue - Return encoding info for
+/// '+/- imm8<<2' operand.
+uint32_t ARMMCCodeEmitter::
+getT2Imm8s4OpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ // FIXME: The immediate operand should have already been encoded like this
+ // before ever getting here. The encoder method should just need to combine
+ // the MI operands for the register and the offset into a single
+ // representation for the complex operand in the .td file. This isn't just
+ // style, unfortunately. As-is, we can't represent the distinct encoding
+ // for #-0.
+
+ // {8} = (U)nsigned (add == '1', sub == '0')
+ // {7-0} = imm8
+ int32_t Imm8 = MI.getOperand(OpIdx).getImm();
+ bool isAdd = Imm8 >= 0;
+
+ // Immediate is always encoded as positive. The 'U' bit controls add vs sub.
+ if (Imm8 < 0)
+ Imm8 = -(uint32_t)Imm8;
+
+ // Scaled by 4.
+ Imm8 /= 4;
+
+ uint32_t Binary = Imm8 & 0xff;
+ // Immediate is always encoded as positive. The 'U' bit controls add vs sub.
+ if (isAdd)
+ Binary |= (1 << 8);
+ return Binary;
+}
+
+/// getT2AddrModeImm8s4OpValue - Return encoding info for
+/// 'reg +/- imm8<<2' operand.
+uint32_t ARMMCCodeEmitter::
+getT2AddrModeImm8s4OpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ // {12-9} = reg
+ // {8} = (U)nsigned (add == '1', sub == '0')
+ // {7-0} = imm8
+ unsigned Reg, Imm8;
+ bool isAdd = true;
+ // If The first operand isn't a register, we have a label reference.
+ const MCOperand &MO = MI.getOperand(OpIdx);
+ if (!MO.isReg()) {
+ Reg = CTX.getRegisterInfo()->getEncodingValue(ARM::PC); // Rn is PC.
+ Imm8 = 0;
+ isAdd = false ; // 'U' bit is set as part of the fixup.
+
+ assert(MO.isExpr() && "Unexpected machine operand type!");
+ const MCExpr *Expr = MO.getExpr();
+ MCFixupKind Kind = MCFixupKind(ARM::fixup_t2_pcrel_10);
+ Fixups.push_back(MCFixup::create(0, Expr, Kind, MI.getLoc()));
+
+ ++MCNumCPRelocations;
+ } else
+ isAdd = EncodeAddrModeOpValues(MI, OpIdx, Reg, Imm8, Fixups, STI);
+
+ // FIXME: The immediate operand should have already been encoded like this
+ // before ever getting here. The encoder method should just need to combine
+ // the MI operands for the register and the offset into a single
+ // representation for the complex operand in the .td file. This isn't just
+ // style, unfortunately. As-is, we can't represent the distinct encoding
+ // for #-0.
+ uint32_t Binary = (Imm8 >> 2) & 0xff;
+ // Immediate is always encoded as positive. The 'U' bit controls add vs sub.
+ if (isAdd)
+ Binary |= (1 << 8);
+ Binary |= (Reg << 9);
+ return Binary;
+}
+
+/// getT2AddrModeImm0_1020s4OpValue - Return encoding info for
+/// 'reg + imm8<<2' operand.
+uint32_t ARMMCCodeEmitter::
+getT2AddrModeImm0_1020s4OpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ // {11-8} = reg
+ // {7-0} = imm8
+ const MCOperand &MO = MI.getOperand(OpIdx);
+ const MCOperand &MO1 = MI.getOperand(OpIdx + 1);
+ unsigned Reg = CTX.getRegisterInfo()->getEncodingValue(MO.getReg());
+ unsigned Imm8 = MO1.getImm();
+ return (Reg << 8) | Imm8;
+}
+
+uint32_t
+ARMMCCodeEmitter::getHiLo16ImmOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ // {20-16} = imm{15-12}
+ // {11-0} = imm{11-0}
+ const MCOperand &MO = MI.getOperand(OpIdx);
+ if (MO.isImm())
+ // Hi / lo 16 bits already extracted during earlier passes.
+ return static_cast<unsigned>(MO.getImm());
+
+ // Handle :upper16: and :lower16: assembly prefixes.
+ const MCExpr *E = MO.getExpr();
+ MCFixupKind Kind;
+ if (E->getKind() == MCExpr::Target) {
+ const ARMMCExpr *ARM16Expr = cast<ARMMCExpr>(E);
+ E = ARM16Expr->getSubExpr();
+
+ if (const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(E)) {
+ const int64_t Value = MCE->getValue();
+ if (Value > UINT32_MAX)
+ report_fatal_error("constant value truncated (limited to 32-bit)");
+
+ switch (ARM16Expr->getKind()) {
+ case ARMMCExpr::VK_ARM_HI16:
+ return (int32_t(Value) & 0xffff0000) >> 16;
+ case ARMMCExpr::VK_ARM_LO16:
+ return (int32_t(Value) & 0x0000ffff);
+ default: llvm_unreachable("Unsupported ARMFixup");
+ }
+ }
+
+ switch (ARM16Expr->getKind()) {
+ default: llvm_unreachable("Unsupported ARMFixup");
+ case ARMMCExpr::VK_ARM_HI16:
+ Kind = MCFixupKind(isThumb2(STI) ? ARM::fixup_t2_movt_hi16
+ : ARM::fixup_arm_movt_hi16);
+ break;
+ case ARMMCExpr::VK_ARM_LO16:
+ Kind = MCFixupKind(isThumb2(STI) ? ARM::fixup_t2_movw_lo16
+ : ARM::fixup_arm_movw_lo16);
+ break;
+ }
+
+ Fixups.push_back(MCFixup::create(0, E, Kind, MI.getLoc()));
+ return 0;
+ }
+ // If the expression doesn't have :upper16: or :lower16: on it,
+ // it's just a plain immediate expression, previously those evaluated to
+ // the lower 16 bits of the expression regardless of whether
+ // we have a movt or a movw, but that led to misleadingly results.
+ // This is disallowed in the AsmParser in validateInstruction()
+ // so this should never happen.
+ llvm_unreachable("expression without :upper16: or :lower16:");
+}
+
+uint32_t ARMMCCodeEmitter::
+getLdStSORegOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ const MCOperand &MO = MI.getOperand(OpIdx);
+ const MCOperand &MO1 = MI.getOperand(OpIdx+1);
+ const MCOperand &MO2 = MI.getOperand(OpIdx+2);
+ unsigned Rn = CTX.getRegisterInfo()->getEncodingValue(MO.getReg());
+ unsigned Rm = CTX.getRegisterInfo()->getEncodingValue(MO1.getReg());
+ unsigned ShImm = ARM_AM::getAM2Offset(MO2.getImm());
+ bool isAdd = ARM_AM::getAM2Op(MO2.getImm()) == ARM_AM::add;
+ ARM_AM::ShiftOpc ShOp = ARM_AM::getAM2ShiftOpc(MO2.getImm());
+ unsigned SBits = getShiftOp(ShOp);
+
+ // While "lsr #32" and "asr #32" exist, they are encoded with a 0 in the shift
+ // amount. However, it would be an easy mistake to make so check here.
+ assert((ShImm & ~0x1f) == 0 && "Out of range shift amount");
+
+ // {16-13} = Rn
+ // {12} = isAdd
+ // {11-0} = shifter
+ // {3-0} = Rm
+ // {4} = 0
+ // {6-5} = type
+ // {11-7} = imm
+ uint32_t Binary = Rm;
+ Binary |= Rn << 13;
+ Binary |= SBits << 5;
+ Binary |= ShImm << 7;
+ if (isAdd)
+ Binary |= 1 << 12;
+ return Binary;
+}
+
+uint32_t ARMMCCodeEmitter::
+getAddrMode2OpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ // {17-14} Rn
+ // {13} 1 == imm12, 0 == Rm
+ // {12} isAdd
+ // {11-0} imm12/Rm
+ const MCOperand &MO = MI.getOperand(OpIdx);
+ unsigned Rn = CTX.getRegisterInfo()->getEncodingValue(MO.getReg());
+ uint32_t Binary = getAddrMode2OffsetOpValue(MI, OpIdx + 1, Fixups, STI);
+ Binary |= Rn << 14;
+ return Binary;
+}
+
+uint32_t ARMMCCodeEmitter::
+getAddrMode2OffsetOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ // {13} 1 == imm12, 0 == Rm
+ // {12} isAdd
+ // {11-0} imm12/Rm
+ const MCOperand &MO = MI.getOperand(OpIdx);
+ const MCOperand &MO1 = MI.getOperand(OpIdx+1);
+ unsigned Imm = MO1.getImm();
+ bool isAdd = ARM_AM::getAM2Op(Imm) == ARM_AM::add;
+ bool isReg = MO.getReg() != 0;
+ uint32_t Binary = ARM_AM::getAM2Offset(Imm);
+ // if reg +/- reg, Rm will be non-zero. Otherwise, we have reg +/- imm12
+ if (isReg) {
+ ARM_AM::ShiftOpc ShOp = ARM_AM::getAM2ShiftOpc(Imm);
+ Binary <<= 7; // Shift amount is bits [11:7]
+ Binary |= getShiftOp(ShOp) << 5; // Shift type is bits [6:5]
+ Binary |= CTX.getRegisterInfo()->getEncodingValue(MO.getReg()); // Rm is bits [3:0]
+ }
+ return Binary | (isAdd << 12) | (isReg << 13);
+}
+
+uint32_t ARMMCCodeEmitter::
+getPostIdxRegOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ // {4} isAdd
+ // {3-0} Rm
+ const MCOperand &MO = MI.getOperand(OpIdx);
+ const MCOperand &MO1 = MI.getOperand(OpIdx+1);
+ bool isAdd = MO1.getImm() != 0;
+ return CTX.getRegisterInfo()->getEncodingValue(MO.getReg()) | (isAdd << 4);
+}
+
+uint32_t ARMMCCodeEmitter::
+getAddrMode3OffsetOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ // {9} 1 == imm8, 0 == Rm
+ // {8} isAdd
+ // {7-4} imm7_4/zero
+ // {3-0} imm3_0/Rm
+ const MCOperand &MO = MI.getOperand(OpIdx);
+ const MCOperand &MO1 = MI.getOperand(OpIdx+1);
+ unsigned Imm = MO1.getImm();
+ bool isAdd = ARM_AM::getAM3Op(Imm) == ARM_AM::add;
+ bool isImm = MO.getReg() == 0;
+ uint32_t Imm8 = ARM_AM::getAM3Offset(Imm);
+ // if reg +/- reg, Rm will be non-zero. Otherwise, we have reg +/- imm8
+ if (!isImm)
+ Imm8 = CTX.getRegisterInfo()->getEncodingValue(MO.getReg());
+ return Imm8 | (isAdd << 8) | (isImm << 9);
+}
+
+uint32_t ARMMCCodeEmitter::
+getAddrMode3OpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ // {13} 1 == imm8, 0 == Rm
+ // {12-9} Rn
+ // {8} isAdd
+ // {7-4} imm7_4/zero
+ // {3-0} imm3_0/Rm
+ const MCOperand &MO = MI.getOperand(OpIdx);
+ const MCOperand &MO1 = MI.getOperand(OpIdx+1);
+ const MCOperand &MO2 = MI.getOperand(OpIdx+2);
+
+ // If The first operand isn't a register, we have a label reference.
+ if (!MO.isReg()) {
+ unsigned Rn = CTX.getRegisterInfo()->getEncodingValue(ARM::PC); // Rn is PC.
+
+ assert(MO.isExpr() && "Unexpected machine operand type!");
+ const MCExpr *Expr = MO.getExpr();
+ MCFixupKind Kind = MCFixupKind(ARM::fixup_arm_pcrel_10_unscaled);
+ Fixups.push_back(MCFixup::create(0, Expr, Kind, MI.getLoc()));
+
+ ++MCNumCPRelocations;
+ return (Rn << 9) | (1 << 13);
+ }
+ unsigned Rn = CTX.getRegisterInfo()->getEncodingValue(MO.getReg());
+ unsigned Imm = MO2.getImm();
+ bool isAdd = ARM_AM::getAM3Op(Imm) == ARM_AM::add;
+ bool isImm = MO1.getReg() == 0;
+ uint32_t Imm8 = ARM_AM::getAM3Offset(Imm);
+ // if reg +/- reg, Rm will be non-zero. Otherwise, we have reg +/- imm8
+ if (!isImm)
+ Imm8 = CTX.getRegisterInfo()->getEncodingValue(MO1.getReg());
+ return (Rn << 9) | Imm8 | (isAdd << 8) | (isImm << 13);
+}
+
+/// getAddrModeThumbSPOpValue - Encode the t_addrmode_sp operands.
+uint32_t ARMMCCodeEmitter::
+getAddrModeThumbSPOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ // [SP, #imm]
+ // {7-0} = imm8
+ const MCOperand &MO1 = MI.getOperand(OpIdx + 1);
+ assert(MI.getOperand(OpIdx).getReg() == ARM::SP &&
+ "Unexpected base register!");
+
+ // The immediate is already shifted for the implicit zeroes, so no change
+ // here.
+ return MO1.getImm() & 0xff;
+}
+
+/// getAddrModeISOpValue - Encode the t_addrmode_is# operands.
+uint32_t ARMMCCodeEmitter::
+getAddrModeISOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ // [Rn, #imm]
+ // {7-3} = imm5
+ // {2-0} = Rn
+ const MCOperand &MO = MI.getOperand(OpIdx);
+ const MCOperand &MO1 = MI.getOperand(OpIdx + 1);
+ unsigned Rn = CTX.getRegisterInfo()->getEncodingValue(MO.getReg());
+ unsigned Imm5 = MO1.getImm();
+ return ((Imm5 & 0x1f) << 3) | Rn;
+}
+
+/// getAddrModePCOpValue - Return encoding for t_addrmode_pc operands.
+uint32_t ARMMCCodeEmitter::
+getAddrModePCOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ const MCOperand MO = MI.getOperand(OpIdx);
+ if (MO.isExpr())
+ return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_arm_thumb_cp, Fixups, STI);
+ return (MO.getImm() >> 2);
+}
+
+/// getAddrMode5OpValue - Return encoding info for 'reg +/- imm10' operand.
+uint32_t ARMMCCodeEmitter::
+getAddrMode5OpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ // {12-9} = reg
+ // {8} = (U)nsigned (add == '1', sub == '0')
+ // {7-0} = imm8
+ unsigned Reg, Imm8;
+ bool isAdd;
+ // If The first operand isn't a register, we have a label reference.
+ const MCOperand &MO = MI.getOperand(OpIdx);
+ if (!MO.isReg()) {
+ Reg = CTX.getRegisterInfo()->getEncodingValue(ARM::PC); // Rn is PC.
+ Imm8 = 0;
+ isAdd = false; // 'U' bit is handled as part of the fixup.
+
+ assert(MO.isExpr() && "Unexpected machine operand type!");
+ const MCExpr *Expr = MO.getExpr();
+ MCFixupKind Kind;
+ if (isThumb2(STI))
+ Kind = MCFixupKind(ARM::fixup_t2_pcrel_10);
+ else
+ Kind = MCFixupKind(ARM::fixup_arm_pcrel_10);
+ Fixups.push_back(MCFixup::create(0, Expr, Kind, MI.getLoc()));
+
+ ++MCNumCPRelocations;
+ } else {
+ EncodeAddrModeOpValues(MI, OpIdx, Reg, Imm8, Fixups, STI);
+ isAdd = ARM_AM::getAM5Op(Imm8) == ARM_AM::add;
+ }
+
+ uint32_t Binary = ARM_AM::getAM5Offset(Imm8);
+ // Immediate is always encoded as positive. The 'U' bit controls add vs sub.
+ if (isAdd)
+ Binary |= (1 << 8);
+ Binary |= (Reg << 9);
+ return Binary;
+}
+
+unsigned ARMMCCodeEmitter::
+getSORegRegOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ // Sub-operands are [reg, reg, imm]. The first register is Rm, the reg to be
+ // shifted. The second is Rs, the amount to shift by, and the third specifies
+ // the type of the shift.
+ //
+ // {3-0} = Rm.
+ // {4} = 1
+ // {6-5} = type
+ // {11-8} = Rs
+ // {7} = 0
+
+ const MCOperand &MO = MI.getOperand(OpIdx);
+ const MCOperand &MO1 = MI.getOperand(OpIdx + 1);
+ const MCOperand &MO2 = MI.getOperand(OpIdx + 2);
+ ARM_AM::ShiftOpc SOpc = ARM_AM::getSORegShOp(MO2.getImm());
+
+ // Encode Rm.
+ unsigned Binary = CTX.getRegisterInfo()->getEncodingValue(MO.getReg());
+
+ // Encode the shift opcode.
+ unsigned SBits = 0;
+ unsigned Rs = MO1.getReg();
+ if (Rs) {
+ // Set shift operand (bit[7:4]).
+ // LSL - 0001
+ // LSR - 0011
+ // ASR - 0101
+ // ROR - 0111
+ switch (SOpc) {
+ default: llvm_unreachable("Unknown shift opc!");
+ case ARM_AM::lsl: SBits = 0x1; break;
+ case ARM_AM::lsr: SBits = 0x3; break;
+ case ARM_AM::asr: SBits = 0x5; break;
+ case ARM_AM::ror: SBits = 0x7; break;
+ }
+ }
+
+ Binary |= SBits << 4;
+
+ // Encode the shift operation Rs.
+ // Encode Rs bit[11:8].
+ assert(ARM_AM::getSORegOffset(MO2.getImm()) == 0);
+ return Binary | (CTX.getRegisterInfo()->getEncodingValue(Rs) << ARMII::RegRsShift);
+}
+
+unsigned ARMMCCodeEmitter::
+getSORegImmOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ // Sub-operands are [reg, imm]. The first register is Rm, the reg to be
+ // shifted. The second is the amount to shift by.
+ //
+ // {3-0} = Rm.
+ // {4} = 0
+ // {6-5} = type
+ // {11-7} = imm
+
+ const MCOperand &MO = MI.getOperand(OpIdx);
+ const MCOperand &MO1 = MI.getOperand(OpIdx + 1);
+ ARM_AM::ShiftOpc SOpc = ARM_AM::getSORegShOp(MO1.getImm());
+
+ // Encode Rm.
+ unsigned Binary = CTX.getRegisterInfo()->getEncodingValue(MO.getReg());
+
+ // Encode the shift opcode.
+ unsigned SBits = 0;
+
+ // Set shift operand (bit[6:4]).
+ // LSL - 000
+ // LSR - 010
+ // ASR - 100
+ // ROR - 110
+ // RRX - 110 and bit[11:8] clear.
+ switch (SOpc) {
+ default: llvm_unreachable("Unknown shift opc!");
+ case ARM_AM::lsl: SBits = 0x0; break;
+ case ARM_AM::lsr: SBits = 0x2; break;
+ case ARM_AM::asr: SBits = 0x4; break;
+ case ARM_AM::ror: SBits = 0x6; break;
+ case ARM_AM::rrx:
+ Binary |= 0x60;
+ return Binary;
+ }
+
+ // Encode shift_imm bit[11:7].
+ Binary |= SBits << 4;
+ unsigned Offset = ARM_AM::getSORegOffset(MO1.getImm());
+ assert(Offset < 32 && "Offset must be in range 0-31!");
+ return Binary | (Offset << 7);
+}
+
+
+unsigned ARMMCCodeEmitter::
+getT2AddrModeSORegOpValue(const MCInst &MI, unsigned OpNum,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ const MCOperand &MO1 = MI.getOperand(OpNum);
+ const MCOperand &MO2 = MI.getOperand(OpNum+1);
+ const MCOperand &MO3 = MI.getOperand(OpNum+2);
+
+ // Encoded as [Rn, Rm, imm].
+ // FIXME: Needs fixup support.
+ unsigned Value = CTX.getRegisterInfo()->getEncodingValue(MO1.getReg());
+ Value <<= 4;
+ Value |= CTX.getRegisterInfo()->getEncodingValue(MO2.getReg());
+ Value <<= 2;
+ Value |= MO3.getImm();
+
+ return Value;
+}
+
+unsigned ARMMCCodeEmitter::
+getT2AddrModeImm8OpValue(const MCInst &MI, unsigned OpNum,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ const MCOperand &MO1 = MI.getOperand(OpNum);
+ const MCOperand &MO2 = MI.getOperand(OpNum+1);
+
+ // FIXME: Needs fixup support.
+ unsigned Value = CTX.getRegisterInfo()->getEncodingValue(MO1.getReg());
+
+ // Even though the immediate is 8 bits long, we need 9 bits in order
+ // to represent the (inverse of the) sign bit.
+ Value <<= 9;
+ int32_t tmp = (int32_t)MO2.getImm();
+ if (tmp < 0)
+ tmp = abs(tmp);
+ else
+ Value |= 256; // Set the ADD bit
+ Value |= tmp & 255;
+ return Value;
+}
+
+unsigned ARMMCCodeEmitter::
+getT2AddrModeImm8OffsetOpValue(const MCInst &MI, unsigned OpNum,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ const MCOperand &MO1 = MI.getOperand(OpNum);
+
+ // FIXME: Needs fixup support.
+ unsigned Value = 0;
+ int32_t tmp = (int32_t)MO1.getImm();
+ if (tmp < 0)
+ tmp = abs(tmp);
+ else
+ Value |= 256; // Set the ADD bit
+ Value |= tmp & 255;
+ return Value;
+}
+
+unsigned ARMMCCodeEmitter::
+getT2AddrModeImm12OffsetOpValue(const MCInst &MI, unsigned OpNum,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ const MCOperand &MO1 = MI.getOperand(OpNum);
+
+ // FIXME: Needs fixup support.
+ unsigned Value = 0;
+ int32_t tmp = (int32_t)MO1.getImm();
+ if (tmp < 0)
+ tmp = abs(tmp);
+ else
+ Value |= 4096; // Set the ADD bit
+ Value |= tmp & 4095;
+ return Value;
+}
+
+unsigned ARMMCCodeEmitter::
+getT2SORegOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ // Sub-operands are [reg, imm]. The first register is Rm, the reg to be
+ // shifted. The second is the amount to shift by.
+ //
+ // {3-0} = Rm.
+ // {4} = 0
+ // {6-5} = type
+ // {11-7} = imm
+
+ const MCOperand &MO = MI.getOperand(OpIdx);
+ const MCOperand &MO1 = MI.getOperand(OpIdx + 1);
+ ARM_AM::ShiftOpc SOpc = ARM_AM::getSORegShOp(MO1.getImm());
+
+ // Encode Rm.
+ unsigned Binary = CTX.getRegisterInfo()->getEncodingValue(MO.getReg());
+
+ // Encode the shift opcode.
+ unsigned SBits = 0;
+ // Set shift operand (bit[6:4]).
+ // LSL - 000
+ // LSR - 010
+ // ASR - 100
+ // ROR - 110
+ switch (SOpc) {
+ default: llvm_unreachable("Unknown shift opc!");
+ case ARM_AM::lsl: SBits = 0x0; break;
+ case ARM_AM::lsr: SBits = 0x2; break;
+ case ARM_AM::asr: SBits = 0x4; break;
+ case ARM_AM::rrx: // FALLTHROUGH
+ case ARM_AM::ror: SBits = 0x6; break;
+ }
+
+ Binary |= SBits << 4;
+ if (SOpc == ARM_AM::rrx)
+ return Binary;
+
+ // Encode shift_imm bit[11:7].
+ return Binary | ARM_AM::getSORegOffset(MO1.getImm()) << 7;
+}
+
+unsigned ARMMCCodeEmitter::
+getBitfieldInvertedMaskOpValue(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ // 10 bits. lower 5 bits are are the lsb of the mask, high five bits are the
+ // msb of the mask.
+ const MCOperand &MO = MI.getOperand(Op);
+ uint32_t v = ~MO.getImm();
+ uint32_t lsb = countTrailingZeros(v);
+ uint32_t msb = (32 - countLeadingZeros (v)) - 1;
+ assert (v != 0 && lsb < 32 && msb < 32 && "Illegal bitfield mask!");
+ return lsb | (msb << 5);
+}
+
+unsigned ARMMCCodeEmitter::
+getRegisterListOpValue(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ // VLDM/VSTM:
+ // {12-8} = Vd
+ // {7-0} = Number of registers
+ //
+ // LDM/STM:
+ // {15-0} = Bitfield of GPRs.
+ unsigned Reg = MI.getOperand(Op).getReg();
+ bool SPRRegs = ARMMCRegisterClasses[ARM::SPRRegClassID].contains(Reg);
+ bool DPRRegs = ARMMCRegisterClasses[ARM::DPRRegClassID].contains(Reg);
+
+ unsigned Binary = 0;
+
+ if (SPRRegs || DPRRegs) {
+ // VLDM/VSTM
+ unsigned RegNo = CTX.getRegisterInfo()->getEncodingValue(Reg);
+ unsigned NumRegs = (MI.getNumOperands() - Op) & 0xff;
+ Binary |= (RegNo & 0x1f) << 8;
+ if (SPRRegs)
+ Binary |= NumRegs;
+ else
+ Binary |= NumRegs * 2;
+ } else {
+ for (unsigned I = Op, E = MI.getNumOperands(); I < E; ++I) {
+ unsigned RegNo = CTX.getRegisterInfo()->getEncodingValue(MI.getOperand(I).getReg());
+ Binary |= 1 << RegNo;
+ }
+ }
+
+ return Binary;
+}
+
+/// getAddrMode6AddressOpValue - Encode an addrmode6 register number along
+/// with the alignment operand.
+unsigned ARMMCCodeEmitter::
+getAddrMode6AddressOpValue(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ const MCOperand &Reg = MI.getOperand(Op);
+ const MCOperand &Imm = MI.getOperand(Op + 1);
+
+ unsigned RegNo = CTX.getRegisterInfo()->getEncodingValue(Reg.getReg());
+ unsigned Align = 0;
+
+ switch (Imm.getImm()) {
+ default: break;
+ case 2:
+ case 4:
+ case 8: Align = 0x01; break;
+ case 16: Align = 0x02; break;
+ case 32: Align = 0x03; break;
+ }
+
+ return RegNo | (Align << 4);
+}
+
+/// getAddrMode6OneLane32AddressOpValue - Encode an addrmode6 register number
+/// along with the alignment operand for use in VST1 and VLD1 with size 32.
+unsigned ARMMCCodeEmitter::
+getAddrMode6OneLane32AddressOpValue(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ const MCOperand &Reg = MI.getOperand(Op);
+ const MCOperand &Imm = MI.getOperand(Op + 1);
+
+ unsigned RegNo = CTX.getRegisterInfo()->getEncodingValue(Reg.getReg());
+ unsigned Align = 0;
+
+ switch (Imm.getImm()) {
+ default: break;
+ case 8:
+ case 16:
+ case 32: // Default '0' value for invalid alignments of 8, 16, 32 bytes.
+ case 2: Align = 0x00; break;
+ case 4: Align = 0x03; break;
+ }
+
+ return RegNo | (Align << 4);
+}
+
+
+/// getAddrMode6DupAddressOpValue - Encode an addrmode6 register number and
+/// alignment operand for use in VLD-dup instructions. This is the same as
+/// getAddrMode6AddressOpValue except for the alignment encoding, which is
+/// different for VLD4-dup.
+unsigned ARMMCCodeEmitter::
+getAddrMode6DupAddressOpValue(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ const MCOperand &Reg = MI.getOperand(Op);
+ const MCOperand &Imm = MI.getOperand(Op + 1);
+
+ unsigned RegNo = CTX.getRegisterInfo()->getEncodingValue(Reg.getReg());
+ unsigned Align = 0;
+
+ switch (Imm.getImm()) {
+ default: break;
+ case 2:
+ case 4:
+ case 8: Align = 0x01; break;
+ case 16: Align = 0x03; break;
+ }
+
+ return RegNo | (Align << 4);
+}
+
+unsigned ARMMCCodeEmitter::
+getAddrMode6OffsetOpValue(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ const MCOperand &MO = MI.getOperand(Op);
+ if (MO.getReg() == 0) return 0x0D;
+ return CTX.getRegisterInfo()->getEncodingValue(MO.getReg());
+}
+
+unsigned ARMMCCodeEmitter::
+getShiftRight8Imm(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ return 8 - MI.getOperand(Op).getImm();
+}
+
+unsigned ARMMCCodeEmitter::
+getShiftRight16Imm(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ return 16 - MI.getOperand(Op).getImm();
+}
+
+unsigned ARMMCCodeEmitter::
+getShiftRight32Imm(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ return 32 - MI.getOperand(Op).getImm();
+}
+
+unsigned ARMMCCodeEmitter::
+getShiftRight64Imm(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ return 64 - MI.getOperand(Op).getImm();
+}
+
+void ARMMCCodeEmitter::
+encodeInstruction(const MCInst &MI, raw_ostream &OS,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ // Pseudo instructions don't get encoded.
+ const MCInstrDesc &Desc = MCII.get(MI.getOpcode());
+ uint64_t TSFlags = Desc.TSFlags;
+ if ((TSFlags & ARMII::FormMask) == ARMII::Pseudo)
+ return;
+
+ int Size;
+ if (Desc.getSize() == 2 || Desc.getSize() == 4)
+ Size = Desc.getSize();
+ else
+ llvm_unreachable("Unexpected instruction size!");
+
+ uint32_t Binary = getBinaryCodeForInstr(MI, Fixups, STI);
+ // Thumb 32-bit wide instructions need to emit the high order halfword
+ // first.
+ if (isThumb(STI) && Size == 4) {
+ EmitConstant(Binary >> 16, 2, OS);
+ EmitConstant(Binary & 0xffff, 2, OS);
+ } else
+ EmitConstant(Binary, Size, OS);
+ ++MCNumEmitted; // Keep track of the # of mi's emitted.
+}
+
+#include "ARMGenMCCodeEmitter.inc"
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp
new file mode 100644
index 0000000..2063ca6
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp
@@ -0,0 +1,41 @@
+//===-- ARMMCExpr.cpp - ARM specific MC expression classes ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARMMCExpr.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCStreamer.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "armmcexpr"
+
+const ARMMCExpr*
+ARMMCExpr::create(VariantKind Kind, const MCExpr *Expr,
+ MCContext &Ctx) {
+ return new (Ctx) ARMMCExpr(Kind, Expr);
+}
+
+void ARMMCExpr::printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const {
+ switch (Kind) {
+ default: llvm_unreachable("Invalid kind!");
+ case VK_ARM_HI16: OS << ":upper16:"; break;
+ case VK_ARM_LO16: OS << ":lower16:"; break;
+ }
+
+ const MCExpr *Expr = getSubExpr();
+ if (Expr->getKind() != MCExpr::SymbolRef)
+ OS << '(';
+ Expr->print(OS, MAI);
+ if (Expr->getKind() != MCExpr::SymbolRef)
+ OS << ')';
+}
+
+void ARMMCExpr::visitUsedExpr(MCStreamer &Streamer) const {
+ Streamer.visitUsedExpr(*getSubExpr());
+}
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h
new file mode 100644
index 0000000..75dde80
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h
@@ -0,0 +1,79 @@
+//===-- ARMMCExpr.h - ARM specific MC expression classes --------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_ARM_MCTARGETDESC_ARMMCEXPR_H
+#define LLVM_LIB_TARGET_ARM_MCTARGETDESC_ARMMCEXPR_H
+
+#include "llvm/MC/MCExpr.h"
+
+namespace llvm {
+
+class ARMMCExpr : public MCTargetExpr {
+public:
+ enum VariantKind {
+ VK_ARM_None,
+ VK_ARM_HI16, // The R_ARM_MOVT_ABS relocation (:upper16: in the .s file)
+ VK_ARM_LO16 // The R_ARM_MOVW_ABS_NC relocation (:lower16: in the .s file)
+ };
+
+private:
+ const VariantKind Kind;
+ const MCExpr *Expr;
+
+ explicit ARMMCExpr(VariantKind Kind, const MCExpr *Expr)
+ : Kind(Kind), Expr(Expr) {}
+
+public:
+ /// @name Construction
+ /// @{
+
+ static const ARMMCExpr *create(VariantKind Kind, const MCExpr *Expr,
+ MCContext &Ctx);
+
+ static const ARMMCExpr *createUpper16(const MCExpr *Expr, MCContext &Ctx) {
+ return create(VK_ARM_HI16, Expr, Ctx);
+ }
+
+ static const ARMMCExpr *createLower16(const MCExpr *Expr, MCContext &Ctx) {
+ return create(VK_ARM_LO16, Expr, Ctx);
+ }
+
+ /// @}
+ /// @name Accessors
+ /// @{
+
+ /// getOpcode - Get the kind of this expression.
+ VariantKind getKind() const { return Kind; }
+
+ /// getSubExpr - Get the child of this expression.
+ const MCExpr *getSubExpr() const { return Expr; }
+
+ /// @}
+
+ void printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const override;
+ bool evaluateAsRelocatableImpl(MCValue &Res,
+ const MCAsmLayout *Layout,
+ const MCFixup *Fixup) const override {
+ return false;
+ }
+ void visitUsedExpr(MCStreamer &Streamer) const override;
+ MCFragment *findAssociatedFragment() const override {
+ return getSubExpr()->findAssociatedFragment();
+ }
+
+ // There are no TLS ARMMCExprs at the moment.
+ void fixELFSymbolsInTLSFixups(MCAssembler &Asm) const override {}
+
+ static bool classof(const MCExpr *E) {
+ return E->getKind() == MCExpr::Target;
+ }
+};
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
new file mode 100644
index 0000000..8c8c249
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
@@ -0,0 +1,344 @@
+//===-- ARMMCTargetDesc.cpp - ARM Target Descriptions ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides ARM specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARMBaseInfo.h"
+#include "ARMMCAsmInfo.h"
+#include "ARMMCTargetDesc.h"
+#include "InstPrinter/ARMInstPrinter.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/MC/MCCodeGenInfo.h"
+#include "llvm/MC/MCELFStreamer.h"
+#include "llvm/MC/MCInstrAnalysis.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TargetParser.h"
+#include "llvm/Support/TargetRegistry.h"
+
+using namespace llvm;
+
+#define GET_REGINFO_MC_DESC
+#include "ARMGenRegisterInfo.inc"
+
+static bool getMCRDeprecationInfo(MCInst &MI, const MCSubtargetInfo &STI,
+ std::string &Info) {
+ if (STI.getFeatureBits()[llvm::ARM::HasV7Ops] &&
+ (MI.getOperand(0).isImm() && MI.getOperand(0).getImm() == 15) &&
+ (MI.getOperand(1).isImm() && MI.getOperand(1).getImm() == 0) &&
+ // Checks for the deprecated CP15ISB encoding:
+ // mcr p15, #0, rX, c7, c5, #4
+ (MI.getOperand(3).isImm() && MI.getOperand(3).getImm() == 7)) {
+ if ((MI.getOperand(5).isImm() && MI.getOperand(5).getImm() == 4)) {
+ if (MI.getOperand(4).isImm() && MI.getOperand(4).getImm() == 5) {
+ Info = "deprecated since v7, use 'isb'";
+ return true;
+ }
+
+ // Checks for the deprecated CP15DSB encoding:
+ // mcr p15, #0, rX, c7, c10, #4
+ if (MI.getOperand(4).isImm() && MI.getOperand(4).getImm() == 10) {
+ Info = "deprecated since v7, use 'dsb'";
+ return true;
+ }
+ }
+ // Checks for the deprecated CP15DMB encoding:
+ // mcr p15, #0, rX, c7, c10, #5
+ if (MI.getOperand(4).isImm() && MI.getOperand(4).getImm() == 10 &&
+ (MI.getOperand(5).isImm() && MI.getOperand(5).getImm() == 5)) {
+ Info = "deprecated since v7, use 'dmb'";
+ return true;
+ }
+ }
+ return false;
+}
+
+static bool getITDeprecationInfo(MCInst &MI, const MCSubtargetInfo &STI,
+ std::string &Info) {
+ if (STI.getFeatureBits()[llvm::ARM::HasV8Ops] && MI.getOperand(1).isImm() &&
+ MI.getOperand(1).getImm() != 8) {
+ Info = "applying IT instruction to more than one subsequent instruction is "
+ "deprecated";
+ return true;
+ }
+
+ return false;
+}
+
+static bool getARMStoreDeprecationInfo(MCInst &MI, const MCSubtargetInfo &STI,
+ std::string &Info) {
+ assert(!STI.getFeatureBits()[llvm::ARM::ModeThumb] &&
+ "cannot predicate thumb instructions");
+
+ assert(MI.getNumOperands() >= 4 && "expected >= 4 arguments");
+ for (unsigned OI = 4, OE = MI.getNumOperands(); OI < OE; ++OI) {
+ assert(MI.getOperand(OI).isReg() && "expected register");
+ if (MI.getOperand(OI).getReg() == ARM::SP ||
+ MI.getOperand(OI).getReg() == ARM::PC) {
+ Info = "use of SP or PC in the list is deprecated";
+ return true;
+ }
+ }
+ return false;
+}
+
+static bool getARMLoadDeprecationInfo(MCInst &MI, const MCSubtargetInfo &STI,
+ std::string &Info) {
+ assert(!STI.getFeatureBits()[llvm::ARM::ModeThumb] &&
+ "cannot predicate thumb instructions");
+
+ assert(MI.getNumOperands() >= 4 && "expected >= 4 arguments");
+ bool ListContainsPC = false, ListContainsLR = false;
+ for (unsigned OI = 4, OE = MI.getNumOperands(); OI < OE; ++OI) {
+ assert(MI.getOperand(OI).isReg() && "expected register");
+ switch (MI.getOperand(OI).getReg()) {
+ default:
+ break;
+ case ARM::LR:
+ ListContainsLR = true;
+ break;
+ case ARM::PC:
+ ListContainsPC = true;
+ break;
+ case ARM::SP:
+ Info = "use of SP in the list is deprecated";
+ return true;
+ }
+ }
+
+ if (ListContainsPC && ListContainsLR) {
+ Info = "use of LR and PC simultaneously in the list is deprecated";
+ return true;
+ }
+
+ return false;
+}
+
+#define GET_INSTRINFO_MC_DESC
+#include "ARMGenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_MC_DESC
+#include "ARMGenSubtargetInfo.inc"
+
+std::string ARM_MC::ParseARMTriple(const Triple &TT, StringRef CPU) {
+ bool isThumb =
+ TT.getArch() == Triple::thumb || TT.getArch() == Triple::thumbeb;
+
+ std::string ARMArchFeature;
+
+ unsigned ArchID = ARM::parseArch(TT.getArchName());
+ if (ArchID != ARM::AK_INVALID && (CPU.empty() || CPU == "generic"))
+ ARMArchFeature = (ARMArchFeature + "+" + ARM::getArchName(ArchID)).str();
+
+ if (isThumb) {
+ if (ARMArchFeature.empty())
+ ARMArchFeature = "+thumb-mode";
+ else
+ ARMArchFeature += ",+thumb-mode";
+ }
+
+ if (TT.isOSNaCl()) {
+ if (ARMArchFeature.empty())
+ ARMArchFeature = "+nacl-trap";
+ else
+ ARMArchFeature += ",+nacl-trap";
+ }
+
+ return ARMArchFeature;
+}
+
+MCSubtargetInfo *ARM_MC::createARMMCSubtargetInfo(const Triple &TT,
+ StringRef CPU, StringRef FS) {
+ std::string ArchFS = ARM_MC::ParseARMTriple(TT, CPU);
+ if (!FS.empty()) {
+ if (!ArchFS.empty())
+ ArchFS = (Twine(ArchFS) + "," + FS).str();
+ else
+ ArchFS = FS;
+ }
+
+ return createARMMCSubtargetInfoImpl(TT, CPU, ArchFS);
+}
+
+static MCInstrInfo *createARMMCInstrInfo() {
+ MCInstrInfo *X = new MCInstrInfo();
+ InitARMMCInstrInfo(X);
+ return X;
+}
+
+static MCRegisterInfo *createARMMCRegisterInfo(const Triple &Triple) {
+ MCRegisterInfo *X = new MCRegisterInfo();
+ InitARMMCRegisterInfo(X, ARM::LR, 0, 0, ARM::PC);
+ return X;
+}
+
+static MCAsmInfo *createARMMCAsmInfo(const MCRegisterInfo &MRI,
+ const Triple &TheTriple) {
+ MCAsmInfo *MAI;
+ if (TheTriple.isOSDarwin() || TheTriple.isOSBinFormatMachO())
+ MAI = new ARMMCAsmInfoDarwin(TheTriple);
+ else if (TheTriple.isWindowsMSVCEnvironment())
+ MAI = new ARMCOFFMCAsmInfoMicrosoft();
+ else if (TheTriple.isOSWindows())
+ MAI = new ARMCOFFMCAsmInfoGNU();
+ else
+ MAI = new ARMELFMCAsmInfo(TheTriple);
+
+ unsigned Reg = MRI.getDwarfRegNum(ARM::SP, true);
+ MAI->addInitialFrameState(MCCFIInstruction::createDefCfa(nullptr, Reg, 0));
+
+ return MAI;
+}
+
+static MCCodeGenInfo *createARMMCCodeGenInfo(const Triple &TT, Reloc::Model RM,
+ CodeModel::Model CM,
+ CodeGenOpt::Level OL) {
+ MCCodeGenInfo *X = new MCCodeGenInfo();
+ if (RM == Reloc::Default) {
+ // Default relocation model on Darwin is PIC, not DynamicNoPIC.
+ RM = TT.isOSDarwin() ? Reloc::PIC_ : Reloc::DynamicNoPIC;
+ }
+ X->initMCCodeGenInfo(RM, CM, OL);
+ return X;
+}
+
+static MCStreamer *createELFStreamer(const Triple &T, MCContext &Ctx,
+ MCAsmBackend &MAB, raw_pwrite_stream &OS,
+ MCCodeEmitter *Emitter, bool RelaxAll) {
+ return createARMELFStreamer(Ctx, MAB, OS, Emitter, false,
+ T.getArch() == Triple::thumb);
+}
+
+static MCStreamer *createARMMachOStreamer(MCContext &Ctx, MCAsmBackend &MAB,
+ raw_pwrite_stream &OS,
+ MCCodeEmitter *Emitter, bool RelaxAll,
+ bool DWARFMustBeAtTheEnd) {
+ return createMachOStreamer(Ctx, MAB, OS, Emitter, false, DWARFMustBeAtTheEnd);
+}
+
+static MCInstPrinter *createARMMCInstPrinter(const Triple &T,
+ unsigned SyntaxVariant,
+ const MCAsmInfo &MAI,
+ const MCInstrInfo &MII,
+ const MCRegisterInfo &MRI) {
+ if (SyntaxVariant == 0)
+ return new ARMInstPrinter(MAI, MII, MRI);
+ return nullptr;
+}
+
+static MCRelocationInfo *createARMMCRelocationInfo(const Triple &TT,
+ MCContext &Ctx) {
+ if (TT.isOSBinFormatMachO())
+ return createARMMachORelocationInfo(Ctx);
+ // Default to the stock relocation info.
+ return llvm::createMCRelocationInfo(TT, Ctx);
+}
+
+namespace {
+
+class ARMMCInstrAnalysis : public MCInstrAnalysis {
+public:
+ ARMMCInstrAnalysis(const MCInstrInfo *Info) : MCInstrAnalysis(Info) {}
+
+ bool isUnconditionalBranch(const MCInst &Inst) const override {
+ // BCCs with the "always" predicate are unconditional branches.
+ if (Inst.getOpcode() == ARM::Bcc && Inst.getOperand(1).getImm()==ARMCC::AL)
+ return true;
+ return MCInstrAnalysis::isUnconditionalBranch(Inst);
+ }
+
+ bool isConditionalBranch(const MCInst &Inst) const override {
+ // BCCs with the "always" predicate are unconditional branches.
+ if (Inst.getOpcode() == ARM::Bcc && Inst.getOperand(1).getImm()==ARMCC::AL)
+ return false;
+ return MCInstrAnalysis::isConditionalBranch(Inst);
+ }
+
+ bool evaluateBranch(const MCInst &Inst, uint64_t Addr,
+ uint64_t Size, uint64_t &Target) const override {
+ // We only handle PCRel branches for now.
+ if (Info->get(Inst.getOpcode()).OpInfo[0].OperandType!=MCOI::OPERAND_PCREL)
+ return false;
+
+ int64_t Imm = Inst.getOperand(0).getImm();
+ // FIXME: This is not right for thumb.
+ Target = Addr+Imm+8; // In ARM mode the PC is always off by 8 bytes.
+ return true;
+ }
+};
+
+}
+
+static MCInstrAnalysis *createARMMCInstrAnalysis(const MCInstrInfo *Info) {
+ return new ARMMCInstrAnalysis(Info);
+}
+
+// Force static initialization.
+extern "C" void LLVMInitializeARMTargetMC() {
+ for (Target *T : {&TheARMLETarget, &TheARMBETarget, &TheThumbLETarget,
+ &TheThumbBETarget}) {
+ // Register the MC asm info.
+ RegisterMCAsmInfoFn X(*T, createARMMCAsmInfo);
+
+ // Register the MC codegen info.
+ TargetRegistry::RegisterMCCodeGenInfo(*T, createARMMCCodeGenInfo);
+
+ // Register the MC instruction info.
+ TargetRegistry::RegisterMCInstrInfo(*T, createARMMCInstrInfo);
+
+ // Register the MC register info.
+ TargetRegistry::RegisterMCRegInfo(*T, createARMMCRegisterInfo);
+
+ // Register the MC subtarget info.
+ TargetRegistry::RegisterMCSubtargetInfo(*T,
+ ARM_MC::createARMMCSubtargetInfo);
+
+ // Register the MC instruction analyzer.
+ TargetRegistry::RegisterMCInstrAnalysis(*T, createARMMCInstrAnalysis);
+
+ TargetRegistry::RegisterELFStreamer(*T, createELFStreamer);
+ TargetRegistry::RegisterCOFFStreamer(*T, createARMWinCOFFStreamer);
+ TargetRegistry::RegisterMachOStreamer(*T, createARMMachOStreamer);
+
+ // Register the obj target streamer.
+ TargetRegistry::RegisterObjectTargetStreamer(*T,
+ createARMObjectTargetStreamer);
+
+ // Register the asm streamer.
+ TargetRegistry::RegisterAsmTargetStreamer(*T, createARMTargetAsmStreamer);
+
+ // Register the null TargetStreamer.
+ TargetRegistry::RegisterNullTargetStreamer(*T, createARMNullTargetStreamer);
+
+ // Register the MCInstPrinter.
+ TargetRegistry::RegisterMCInstPrinter(*T, createARMMCInstPrinter);
+
+ // Register the MC relocation info.
+ TargetRegistry::RegisterMCRelocationInfo(*T, createARMMCRelocationInfo);
+ }
+
+ // Register the MC Code Emitter
+ for (Target *T : {&TheARMLETarget, &TheThumbLETarget})
+ TargetRegistry::RegisterMCCodeEmitter(*T, createARMLEMCCodeEmitter);
+ for (Target *T : {&TheARMBETarget, &TheThumbBETarget})
+ TargetRegistry::RegisterMCCodeEmitter(*T, createARMBEMCCodeEmitter);
+
+ // Register the asm backend.
+ TargetRegistry::RegisterMCAsmBackend(TheARMLETarget, createARMLEAsmBackend);
+ TargetRegistry::RegisterMCAsmBackend(TheARMBETarget, createARMBEAsmBackend);
+ TargetRegistry::RegisterMCAsmBackend(TheThumbLETarget,
+ createThumbLEAsmBackend);
+ TargetRegistry::RegisterMCAsmBackend(TheThumbBETarget,
+ createThumbBEAsmBackend);
+}
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h
new file mode 100644
index 0000000..c2bbc8e
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h
@@ -0,0 +1,123 @@
+//===-- ARMMCTargetDesc.h - ARM Target Descriptions -------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides ARM specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_ARM_MCTARGETDESC_ARMMCTARGETDESC_H
+#define LLVM_LIB_TARGET_ARM_MCTARGETDESC_ARMMCTARGETDESC_H
+
+#include "llvm/Support/DataTypes.h"
+#include <string>
+
+namespace llvm {
+class formatted_raw_ostream;
+class MCAsmBackend;
+class MCCodeEmitter;
+class MCContext;
+class MCInstrInfo;
+class MCInstPrinter;
+class MCObjectWriter;
+class MCRegisterInfo;
+class MCSubtargetInfo;
+class MCStreamer;
+class MCRelocationInfo;
+class MCTargetStreamer;
+class StringRef;
+class Target;
+class Triple;
+class raw_ostream;
+class raw_pwrite_stream;
+
+extern Target TheARMLETarget, TheThumbLETarget;
+extern Target TheARMBETarget, TheThumbBETarget;
+
+namespace ARM_MC {
+std::string ParseARMTriple(const Triple &TT, StringRef CPU);
+
+/// Create a ARM MCSubtargetInfo instance. This is exposed so Asm parser, etc.
+/// do not need to go through TargetRegistry.
+MCSubtargetInfo *createARMMCSubtargetInfo(const Triple &TT, StringRef CPU,
+ StringRef FS);
+}
+
+MCTargetStreamer *createARMNullTargetStreamer(MCStreamer &S);
+MCTargetStreamer *createARMTargetAsmStreamer(MCStreamer &S,
+ formatted_raw_ostream &OS,
+ MCInstPrinter *InstPrint,
+ bool isVerboseAsm);
+MCTargetStreamer *createARMObjectTargetStreamer(MCStreamer &S,
+ const MCSubtargetInfo &STI);
+
+MCCodeEmitter *createARMLEMCCodeEmitter(const MCInstrInfo &MCII,
+ const MCRegisterInfo &MRI,
+ MCContext &Ctx);
+
+MCCodeEmitter *createARMBEMCCodeEmitter(const MCInstrInfo &MCII,
+ const MCRegisterInfo &MRI,
+ MCContext &Ctx);
+
+MCAsmBackend *createARMAsmBackend(const Target &T, const MCRegisterInfo &MRI,
+ const Triple &TT, StringRef CPU,
+ bool IsLittleEndian);
+
+MCAsmBackend *createARMLEAsmBackend(const Target &T, const MCRegisterInfo &MRI,
+ const Triple &TT, StringRef CPU);
+
+MCAsmBackend *createARMBEAsmBackend(const Target &T, const MCRegisterInfo &MRI,
+ const Triple &TT, StringRef CPU);
+
+MCAsmBackend *createThumbLEAsmBackend(const Target &T,
+ const MCRegisterInfo &MRI,
+ const Triple &TT, StringRef CPU);
+
+MCAsmBackend *createThumbBEAsmBackend(const Target &T,
+ const MCRegisterInfo &MRI,
+ const Triple &TT, StringRef CPU);
+
+// Construct a PE/COFF machine code streamer which will generate a PE/COFF
+// object file.
+MCStreamer *createARMWinCOFFStreamer(MCContext &Context, MCAsmBackend &MAB,
+ raw_pwrite_stream &OS,
+ MCCodeEmitter *Emitter, bool RelaxAll,
+ bool IncrementalLinkerCompatible);
+
+/// Construct an ELF Mach-O object writer.
+MCObjectWriter *createARMELFObjectWriter(raw_pwrite_stream &OS, uint8_t OSABI,
+ bool IsLittleEndian);
+
+/// Construct an ARM Mach-O object writer.
+MCObjectWriter *createARMMachObjectWriter(raw_pwrite_stream &OS, bool Is64Bit,
+ uint32_t CPUType,
+ uint32_t CPUSubtype);
+
+/// Construct an ARM PE/COFF object writer.
+MCObjectWriter *createARMWinCOFFObjectWriter(raw_pwrite_stream &OS,
+ bool Is64Bit);
+
+/// Construct ARM Mach-O relocation info.
+MCRelocationInfo *createARMMachORelocationInfo(MCContext &Ctx);
+} // End llvm namespace
+
+// Defines symbolic names for ARM registers. This defines a mapping from
+// register name to register number.
+//
+#define GET_REGINFO_ENUM
+#include "ARMGenRegisterInfo.inc"
+
+// Defines symbolic names for the ARM instructions.
+//
+#define GET_INSTRINFO_ENUM
+#include "ARMGenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_ENUM
+#include "ARMGenSubtargetInfo.inc"
+
+#endif
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMachORelocationInfo.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMachORelocationInfo.cpp
new file mode 100644
index 0000000..4468132
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMachORelocationInfo.cpp
@@ -0,0 +1,43 @@
+//===-- ARMMachORelocationInfo.cpp ----------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/ARMMCTargetDesc.h"
+#include "ARMMCExpr.h"
+#include "llvm-c/Disassembler.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCRelocationInfo.h"
+
+using namespace llvm;
+using namespace object;
+
+namespace {
+class ARMMachORelocationInfo : public MCRelocationInfo {
+public:
+ ARMMachORelocationInfo(MCContext &Ctx) : MCRelocationInfo(Ctx) {}
+
+ const MCExpr *createExprForCAPIVariantKind(const MCExpr *SubExpr,
+ unsigned VariantKind) override {
+ switch(VariantKind) {
+ case LLVMDisassembler_VariantKind_ARM_HI16:
+ return ARMMCExpr::createUpper16(SubExpr, Ctx);
+ case LLVMDisassembler_VariantKind_ARM_LO16:
+ return ARMMCExpr::createLower16(SubExpr, Ctx);
+ default:
+ return MCRelocationInfo::createExprForCAPIVariantKind(SubExpr,
+ VariantKind);
+ }
+ }
+};
+} // End unnamed namespace
+
+/// createARMMachORelocationInfo - Construct an ARM Mach-O RelocationInfo.
+MCRelocationInfo *llvm::createARMMachORelocationInfo(MCContext &Ctx) {
+ return new ARMMachORelocationInfo(Ctx);
+}
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
new file mode 100644
index 0000000..cfd504e
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
@@ -0,0 +1,483 @@
+//===-- ARMMachObjectWriter.cpp - ARM Mach Object Writer ------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/ARMMCTargetDesc.h"
+#include "MCTargetDesc/ARMBaseInfo.h"
+#include "MCTargetDesc/ARMFixupKinds.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/MC/MCAsmLayout.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCFixup.h"
+#include "llvm/MC/MCFixupKindInfo.h"
+#include "llvm/MC/MCMachObjectWriter.h"
+#include "llvm/MC/MCSection.h"
+#include "llvm/MC/MCValue.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MachO.h"
+using namespace llvm;
+
+namespace {
+class ARMMachObjectWriter : public MCMachObjectTargetWriter {
+ void RecordARMScatteredRelocation(MachObjectWriter *Writer,
+ const MCAssembler &Asm,
+ const MCAsmLayout &Layout,
+ const MCFragment *Fragment,
+ const MCFixup &Fixup,
+ MCValue Target,
+ unsigned Type,
+ unsigned Log2Size,
+ uint64_t &FixedValue);
+ void RecordARMScatteredHalfRelocation(MachObjectWriter *Writer,
+ const MCAssembler &Asm,
+ const MCAsmLayout &Layout,
+ const MCFragment *Fragment,
+ const MCFixup &Fixup, MCValue Target,
+ uint64_t &FixedValue);
+
+ bool requiresExternRelocation(MachObjectWriter *Writer,
+ const MCAssembler &Asm,
+ const MCFragment &Fragment, unsigned RelocType,
+ const MCSymbol &S, uint64_t FixedValue);
+
+public:
+ ARMMachObjectWriter(bool Is64Bit, uint32_t CPUType, uint32_t CPUSubtype)
+ : MCMachObjectTargetWriter(Is64Bit, CPUType, CPUSubtype) {}
+
+ void recordRelocation(MachObjectWriter *Writer, MCAssembler &Asm,
+ const MCAsmLayout &Layout, const MCFragment *Fragment,
+ const MCFixup &Fixup, MCValue Target,
+ uint64_t &FixedValue) override;
+};
+}
+
+static bool getARMFixupKindMachOInfo(unsigned Kind, unsigned &RelocType,
+ unsigned &Log2Size) {
+ RelocType = unsigned(MachO::ARM_RELOC_VANILLA);
+ Log2Size = ~0U;
+
+ switch (Kind) {
+ default:
+ return false;
+
+ case FK_Data_1:
+ Log2Size = llvm::Log2_32(1);
+ return true;
+ case FK_Data_2:
+ Log2Size = llvm::Log2_32(2);
+ return true;
+ case FK_Data_4:
+ Log2Size = llvm::Log2_32(4);
+ return true;
+ case FK_Data_8:
+ Log2Size = llvm::Log2_32(8);
+ return true;
+
+ // These fixups are expected to always be resolvable at assembly time and
+ // have no relocations supported.
+ case ARM::fixup_arm_ldst_pcrel_12:
+ case ARM::fixup_arm_pcrel_10:
+ case ARM::fixup_arm_adr_pcrel_12:
+ case ARM::fixup_arm_thumb_br:
+ return false;
+
+ // Handle 24-bit branch kinds.
+ case ARM::fixup_arm_condbranch:
+ case ARM::fixup_arm_uncondbranch:
+ case ARM::fixup_arm_uncondbl:
+ case ARM::fixup_arm_condbl:
+ case ARM::fixup_arm_blx:
+ RelocType = unsigned(MachO::ARM_RELOC_BR24);
+ // Report as 'long', even though that is not quite accurate.
+ Log2Size = llvm::Log2_32(4);
+ return true;
+
+ case ARM::fixup_t2_uncondbranch:
+ case ARM::fixup_arm_thumb_bl:
+ case ARM::fixup_arm_thumb_blx:
+ RelocType = unsigned(MachO::ARM_THUMB_RELOC_BR22);
+ Log2Size = llvm::Log2_32(4);
+ return true;
+
+ // For movw/movt r_type relocations they always have a pair following them and
+ // the r_length bits are used differently. The encoding of the r_length is as
+ // follows:
+ // low bit of r_length:
+ // 0 - :lower16: for movw instructions
+ // 1 - :upper16: for movt instructions
+ // high bit of r_length:
+ // 0 - arm instructions
+ // 1 - thumb instructions
+ case ARM::fixup_arm_movt_hi16:
+ RelocType = unsigned(MachO::ARM_RELOC_HALF);
+ Log2Size = 1;
+ return true;
+ case ARM::fixup_t2_movt_hi16:
+ RelocType = unsigned(MachO::ARM_RELOC_HALF);
+ Log2Size = 3;
+ return true;
+
+ case ARM::fixup_arm_movw_lo16:
+ RelocType = unsigned(MachO::ARM_RELOC_HALF);
+ Log2Size = 0;
+ return true;
+ case ARM::fixup_t2_movw_lo16:
+ RelocType = unsigned(MachO::ARM_RELOC_HALF);
+ Log2Size = 2;
+ return true;
+ }
+}
+
+void ARMMachObjectWriter::
+RecordARMScatteredHalfRelocation(MachObjectWriter *Writer,
+ const MCAssembler &Asm,
+ const MCAsmLayout &Layout,
+ const MCFragment *Fragment,
+ const MCFixup &Fixup,
+ MCValue Target,
+ uint64_t &FixedValue) {
+ uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset();
+ unsigned IsPCRel = Writer->isFixupKindPCRel(Asm, Fixup.getKind());
+ unsigned Type = MachO::ARM_RELOC_HALF;
+
+ // See <reloc.h>.
+ const MCSymbol *A = &Target.getSymA()->getSymbol();
+
+ if (!A->getFragment()) {
+ Asm.getContext().reportError(Fixup.getLoc(),
+ "symbol '" + A->getName() +
+ "' can not be undefined in a subtraction expression");
+ return;
+ }
+
+ uint32_t Value = Writer->getSymbolAddress(*A, Layout);
+ uint32_t Value2 = 0;
+ uint64_t SecAddr = Writer->getSectionAddress(A->getFragment()->getParent());
+ FixedValue += SecAddr;
+
+ if (const MCSymbolRefExpr *B = Target.getSymB()) {
+ const MCSymbol *SB = &B->getSymbol();
+
+ if (!SB->getFragment()) {
+ Asm.getContext().reportError(Fixup.getLoc(),
+ "symbol '" + B->getSymbol().getName() +
+ "' can not be undefined in a subtraction expression");
+ return;
+ }
+
+ // Select the appropriate difference relocation type.
+ Type = MachO::ARM_RELOC_HALF_SECTDIFF;
+ Value2 = Writer->getSymbolAddress(B->getSymbol(), Layout);
+ FixedValue -= Writer->getSectionAddress(SB->getFragment()->getParent());
+ }
+
+ // Relocations are written out in reverse order, so the PAIR comes first.
+ // ARM_RELOC_HALF and ARM_RELOC_HALF_SECTDIFF abuse the r_length field:
+ //
+ // For these two r_type relocations they always have a pair following them and
+ // the r_length bits are used differently. The encoding of the r_length is as
+ // follows:
+ // low bit of r_length:
+ // 0 - :lower16: for movw instructions
+ // 1 - :upper16: for movt instructions
+ // high bit of r_length:
+ // 0 - arm instructions
+ // 1 - thumb instructions
+ // the other half of the relocated expression is in the following pair
+ // relocation entry in the low 16 bits of r_address field.
+ unsigned ThumbBit = 0;
+ unsigned MovtBit = 0;
+ switch ((unsigned)Fixup.getKind()) {
+ default: break;
+ case ARM::fixup_arm_movt_hi16:
+ MovtBit = 1;
+ // The thumb bit shouldn't be set in the 'other-half' bit of the
+ // relocation, but it will be set in FixedValue if the base symbol
+ // is a thumb function. Clear it out here.
+ if (Asm.isThumbFunc(A))
+ FixedValue &= 0xfffffffe;
+ break;
+ case ARM::fixup_t2_movt_hi16:
+ if (Asm.isThumbFunc(A))
+ FixedValue &= 0xfffffffe;
+ MovtBit = 1;
+ // Fallthrough
+ case ARM::fixup_t2_movw_lo16:
+ ThumbBit = 1;
+ break;
+ }
+
+ if (Type == MachO::ARM_RELOC_HALF_SECTDIFF) {
+ uint32_t OtherHalf = MovtBit
+ ? (FixedValue & 0xffff) : ((FixedValue & 0xffff0000) >> 16);
+
+ MachO::any_relocation_info MRE;
+ MRE.r_word0 = ((OtherHalf << 0) |
+ (MachO::ARM_RELOC_PAIR << 24) |
+ (MovtBit << 28) |
+ (ThumbBit << 29) |
+ (IsPCRel << 30) |
+ MachO::R_SCATTERED);
+ MRE.r_word1 = Value2;
+ Writer->addRelocation(nullptr, Fragment->getParent(), MRE);
+ }
+
+ MachO::any_relocation_info MRE;
+ MRE.r_word0 = ((FixupOffset << 0) |
+ (Type << 24) |
+ (MovtBit << 28) |
+ (ThumbBit << 29) |
+ (IsPCRel << 30) |
+ MachO::R_SCATTERED);
+ MRE.r_word1 = Value;
+ Writer->addRelocation(nullptr, Fragment->getParent(), MRE);
+}
+
+void ARMMachObjectWriter::RecordARMScatteredRelocation(MachObjectWriter *Writer,
+ const MCAssembler &Asm,
+ const MCAsmLayout &Layout,
+ const MCFragment *Fragment,
+ const MCFixup &Fixup,
+ MCValue Target,
+ unsigned Type,
+ unsigned Log2Size,
+ uint64_t &FixedValue) {
+ uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset();
+ unsigned IsPCRel = Writer->isFixupKindPCRel(Asm, Fixup.getKind());
+
+ // See <reloc.h>.
+ const MCSymbol *A = &Target.getSymA()->getSymbol();
+
+ if (!A->getFragment()) {
+ Asm.getContext().reportError(Fixup.getLoc(),
+ "symbol '" + A->getName() +
+ "' can not be undefined in a subtraction expression");
+ return;
+ }
+
+ uint32_t Value = Writer->getSymbolAddress(*A, Layout);
+ uint64_t SecAddr = Writer->getSectionAddress(A->getFragment()->getParent());
+ FixedValue += SecAddr;
+ uint32_t Value2 = 0;
+
+ if (const MCSymbolRefExpr *B = Target.getSymB()) {
+ assert(Type == MachO::ARM_RELOC_VANILLA && "invalid reloc for 2 symbols");
+ const MCSymbol *SB = &B->getSymbol();
+
+ if (!SB->getFragment()) {
+ Asm.getContext().reportError(Fixup.getLoc(),
+ "symbol '" + B->getSymbol().getName() +
+ "' can not be undefined in a subtraction expression");
+ return;
+ }
+
+ // Select the appropriate difference relocation type.
+ Type = MachO::ARM_RELOC_SECTDIFF;
+ Value2 = Writer->getSymbolAddress(B->getSymbol(), Layout);
+ FixedValue -= Writer->getSectionAddress(SB->getFragment()->getParent());
+ }
+
+ // Relocations are written out in reverse order, so the PAIR comes first.
+ if (Type == MachO::ARM_RELOC_SECTDIFF ||
+ Type == MachO::ARM_RELOC_LOCAL_SECTDIFF) {
+ MachO::any_relocation_info MRE;
+ MRE.r_word0 = ((0 << 0) |
+ (MachO::ARM_RELOC_PAIR << 24) |
+ (Log2Size << 28) |
+ (IsPCRel << 30) |
+ MachO::R_SCATTERED);
+ MRE.r_word1 = Value2;
+ Writer->addRelocation(nullptr, Fragment->getParent(), MRE);
+ }
+
+ MachO::any_relocation_info MRE;
+ MRE.r_word0 = ((FixupOffset << 0) |
+ (Type << 24) |
+ (Log2Size << 28) |
+ (IsPCRel << 30) |
+ MachO::R_SCATTERED);
+ MRE.r_word1 = Value;
+ Writer->addRelocation(nullptr, Fragment->getParent(), MRE);
+}
+
+bool ARMMachObjectWriter::requiresExternRelocation(MachObjectWriter *Writer,
+ const MCAssembler &Asm,
+ const MCFragment &Fragment,
+ unsigned RelocType,
+ const MCSymbol &S,
+ uint64_t FixedValue) {
+ // Most cases can be identified purely from the symbol.
+ if (Writer->doesSymbolRequireExternRelocation(S))
+ return true;
+ int64_t Value = (int64_t)FixedValue; // The displacement is signed.
+ int64_t Range;
+ switch (RelocType) {
+ default:
+ return false;
+ case MachO::ARM_RELOC_BR24:
+ // PC pre-adjustment of 8 for these instructions.
+ Value -= 8;
+ // ARM BL/BLX has a 25-bit offset.
+ Range = 0x1ffffff;
+ break;
+ case MachO::ARM_THUMB_RELOC_BR22:
+ // PC pre-adjustment of 4 for these instructions.
+ Value -= 4;
+ // Thumb BL/BLX has a 24-bit offset.
+ Range = 0xffffff;
+ }
+ // BL/BLX also use external relocations when an internal relocation
+ // would result in the target being out of range. This gives the linker
+ // enough information to generate a branch island.
+ Value += Writer->getSectionAddress(&S.getSection());
+ Value -= Writer->getSectionAddress(Fragment.getParent());
+ // If the resultant value would be out of range for an internal relocation,
+ // use an external instead.
+ if (Value > Range || Value < -(Range + 1))
+ return true;
+ return false;
+}
+
+void ARMMachObjectWriter::recordRelocation(MachObjectWriter *Writer,
+ MCAssembler &Asm,
+ const MCAsmLayout &Layout,
+ const MCFragment *Fragment,
+ const MCFixup &Fixup, MCValue Target,
+ uint64_t &FixedValue) {
+ unsigned IsPCRel = Writer->isFixupKindPCRel(Asm, Fixup.getKind());
+ unsigned Log2Size;
+ unsigned RelocType = MachO::ARM_RELOC_VANILLA;
+ if (!getARMFixupKindMachOInfo(Fixup.getKind(), RelocType, Log2Size)) {
+ // If we failed to get fixup kind info, it's because there's no legal
+ // relocation type for the fixup kind. This happens when it's a fixup that's
+ // expected to always be resolvable at assembly time and not have any
+ // relocations needed.
+ Asm.getContext().reportError(Fixup.getLoc(),
+ "unsupported relocation on symbol");
+ return;
+ }
+
+ // If this is a difference or a defined symbol plus an offset, then we need a
+ // scattered relocation entry. Differences always require scattered
+ // relocations.
+ if (Target.getSymB()) {
+ if (RelocType == MachO::ARM_RELOC_HALF)
+ return RecordARMScatteredHalfRelocation(Writer, Asm, Layout, Fragment,
+ Fixup, Target, FixedValue);
+ return RecordARMScatteredRelocation(Writer, Asm, Layout, Fragment, Fixup,
+ Target, RelocType, Log2Size,
+ FixedValue);
+ }
+
+ // Get the symbol data, if any.
+ const MCSymbol *A = nullptr;
+ if (Target.getSymA())
+ A = &Target.getSymA()->getSymbol();
+
+ // FIXME: For other platforms, we need to use scattered relocations for
+ // internal relocations with offsets. If this is an internal relocation with
+ // an offset, it also needs a scattered relocation entry.
+ //
+ // Is this right for ARM?
+ uint32_t Offset = Target.getConstant();
+ if (IsPCRel && RelocType == MachO::ARM_RELOC_VANILLA)
+ Offset += 1 << Log2Size;
+ if (Offset && A && !Writer->doesSymbolRequireExternRelocation(*A))
+ return RecordARMScatteredRelocation(Writer, Asm, Layout, Fragment, Fixup,
+ Target, RelocType, Log2Size,
+ FixedValue);
+
+ // See <reloc.h>.
+ uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset();
+ unsigned Index = 0;
+ unsigned Type = 0;
+ const MCSymbol *RelSymbol = nullptr;
+
+ if (Target.isAbsolute()) { // constant
+ // FIXME!
+ report_fatal_error("FIXME: relocations to absolute targets "
+ "not yet implemented");
+ } else {
+ // Resolve constant variables.
+ if (A->isVariable()) {
+ int64_t Res;
+ if (A->getVariableValue()->evaluateAsAbsolute(
+ Res, Layout, Writer->getSectionAddressMap())) {
+ FixedValue = Res;
+ return;
+ }
+ }
+
+ // Check whether we need an external or internal relocation.
+ if (requiresExternRelocation(Writer, Asm, *Fragment, RelocType, *A,
+ FixedValue)) {
+ RelSymbol = A;
+
+ // For external relocations, make sure to offset the fixup value to
+ // compensate for the addend of the symbol address, if it was
+ // undefined. This occurs with weak definitions, for example.
+ if (!A->isUndefined())
+ FixedValue -= Layout.getSymbolOffset(*A);
+ } else {
+ // The index is the section ordinal (1-based).
+ const MCSection &Sec = A->getSection();
+ Index = Sec.getOrdinal() + 1;
+ FixedValue += Writer->getSectionAddress(&Sec);
+ }
+ if (IsPCRel)
+ FixedValue -= Writer->getSectionAddress(Fragment->getParent());
+
+ // The type is determined by the fixup kind.
+ Type = RelocType;
+ }
+
+ // struct relocation_info (8 bytes)
+ MachO::any_relocation_info MRE;
+ MRE.r_word0 = FixupOffset;
+ MRE.r_word1 =
+ (Index << 0) | (IsPCRel << 24) | (Log2Size << 25) | (Type << 28);
+
+ // Even when it's not a scattered relocation, movw/movt always uses
+ // a PAIR relocation.
+ if (Type == MachO::ARM_RELOC_HALF) {
+ // The other-half value only gets populated for the movt and movw
+ // relocation entries.
+ uint32_t Value = 0;
+ switch ((unsigned)Fixup.getKind()) {
+ default: break;
+ case ARM::fixup_arm_movw_lo16:
+ case ARM::fixup_t2_movw_lo16:
+ Value = (FixedValue >> 16) & 0xffff;
+ break;
+ case ARM::fixup_arm_movt_hi16:
+ case ARM::fixup_t2_movt_hi16:
+ Value = FixedValue & 0xffff;
+ break;
+ }
+ MachO::any_relocation_info MREPair;
+ MREPair.r_word0 = Value;
+ MREPair.r_word1 = ((0xffffff << 0) |
+ (Log2Size << 25) |
+ (MachO::ARM_RELOC_PAIR << 28));
+
+ Writer->addRelocation(nullptr, Fragment->getParent(), MREPair);
+ }
+
+ Writer->addRelocation(RelSymbol, Fragment->getParent(), MRE);
+}
+
+MCObjectWriter *llvm::createARMMachObjectWriter(raw_pwrite_stream &OS,
+ bool Is64Bit, uint32_t CPUType,
+ uint32_t CPUSubtype) {
+ return createMachObjectWriter(new ARMMachObjectWriter(Is64Bit,
+ CPUType,
+ CPUSubtype),
+ OS, /*IsLittleEndian=*/true);
+}
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp
new file mode 100644
index 0000000..dad50f2
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp
@@ -0,0 +1,74 @@
+//===- ARMTargetStreamer.cpp - ARMTargetStreamer class --*- C++ -*---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the ARMTargetStreamer class.
+//
+//===----------------------------------------------------------------------===//
+#include "llvm/ADT/MapVector.h"
+#include "llvm/MC/ConstantPools.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCStreamer.h"
+
+using namespace llvm;
+//
+// ARMTargetStreamer Implemenation
+//
+ARMTargetStreamer::ARMTargetStreamer(MCStreamer &S)
+ : MCTargetStreamer(S), ConstantPools(new AssemblerConstantPools()) {}
+
+ARMTargetStreamer::~ARMTargetStreamer() {}
+
+// The constant pool handling is shared by all ARMTargetStreamer
+// implementations.
+const MCExpr *ARMTargetStreamer::addConstantPoolEntry(const MCExpr *Expr, SMLoc Loc) {
+ return ConstantPools->addEntry(Streamer, Expr, 4, Loc);
+}
+
+void ARMTargetStreamer::emitCurrentConstantPool() {
+ ConstantPools->emitForCurrentSection(Streamer);
+}
+
+// finish() - write out any non-empty assembler constant pools.
+void ARMTargetStreamer::finish() { ConstantPools->emitAll(Streamer); }
+
+// The remaining callbacks should be handled separately by each
+// streamer.
+void ARMTargetStreamer::emitFnStart() {}
+void ARMTargetStreamer::emitFnEnd() {}
+void ARMTargetStreamer::emitCantUnwind() {}
+void ARMTargetStreamer::emitPersonality(const MCSymbol *Personality) {}
+void ARMTargetStreamer::emitPersonalityIndex(unsigned Index) {}
+void ARMTargetStreamer::emitHandlerData() {}
+void ARMTargetStreamer::emitSetFP(unsigned FpReg, unsigned SpReg,
+ int64_t Offset) {}
+void ARMTargetStreamer::emitMovSP(unsigned Reg, int64_t Offset) {}
+void ARMTargetStreamer::emitPad(int64_t Offset) {}
+void ARMTargetStreamer::emitRegSave(const SmallVectorImpl<unsigned> &RegList,
+ bool isVector) {}
+void ARMTargetStreamer::emitUnwindRaw(int64_t StackOffset,
+ const SmallVectorImpl<uint8_t> &Opcodes) {
+}
+void ARMTargetStreamer::switchVendor(StringRef Vendor) {}
+void ARMTargetStreamer::emitAttribute(unsigned Attribute, unsigned Value) {}
+void ARMTargetStreamer::emitTextAttribute(unsigned Attribute,
+ StringRef String) {}
+void ARMTargetStreamer::emitIntTextAttribute(unsigned Attribute,
+ unsigned IntValue,
+ StringRef StringValue) {}
+void ARMTargetStreamer::emitArch(unsigned Arch) {}
+void ARMTargetStreamer::emitArchExtension(unsigned ArchExt) {}
+void ARMTargetStreamer::emitObjectArch(unsigned Arch) {}
+void ARMTargetStreamer::emitFPU(unsigned FPU) {}
+void ARMTargetStreamer::finishAttributeSection() {}
+void ARMTargetStreamer::emitInst(uint32_t Inst, char Suffix) {}
+void
+ARMTargetStreamer::AnnotateTLSDescriptorSequence(const MCSymbolRefExpr *SRE) {}
+
+void ARMTargetStreamer::emitThumbSet(MCSymbol *Symbol, const MCExpr *Value) {}
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.cpp
new file mode 100644
index 0000000..173cc93
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.cpp
@@ -0,0 +1,196 @@
+//===-- ARMUnwindOpAsm.cpp - ARM Unwind Opcodes Assembler -------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the unwind opcode assmebler for ARM exception handling
+// table.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARMUnwindOpAsm.h"
+#include "llvm/Support/ARMEHABI.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/LEB128.h"
+
+using namespace llvm;
+
+namespace {
+ /// UnwindOpcodeStreamer - The simple wrapper over SmallVector to emit bytes
+ /// with MSB to LSB per uint32_t ordering. For example, the first byte will
+ /// be placed in Vec[3], and the following bytes will be placed in 2, 1, 0,
+ /// 7, 6, 5, 4, 11, 10, 9, 8, and so on.
+ class UnwindOpcodeStreamer {
+ private:
+ SmallVectorImpl<uint8_t> &Vec;
+ size_t Pos;
+
+ public:
+ UnwindOpcodeStreamer(SmallVectorImpl<uint8_t> &V) : Vec(V), Pos(3) {
+ }
+
+ /// Emit the byte in MSB to LSB per uint32_t order.
+ inline void EmitByte(uint8_t elem) {
+ Vec[Pos] = elem;
+ Pos = (((Pos ^ 0x3u) + 1) ^ 0x3u);
+ }
+
+ /// Emit the size prefix.
+ inline void EmitSize(size_t Size) {
+ size_t SizeInWords = (Size + 3) / 4;
+ assert(SizeInWords <= 0x100u &&
+ "Only 256 additional words are allowed for unwind opcodes");
+ EmitByte(static_cast<uint8_t>(SizeInWords - 1));
+ }
+
+ /// Emit the personality index prefix.
+ inline void EmitPersonalityIndex(unsigned PI) {
+ assert(PI < ARM::EHABI::NUM_PERSONALITY_INDEX &&
+ "Invalid personality prefix");
+ EmitByte(ARM::EHABI::EHT_COMPACT | PI);
+ }
+
+ /// Fill the rest of bytes with FINISH opcode.
+ inline void FillFinishOpcode() {
+ while (Pos < Vec.size())
+ EmitByte(ARM::EHABI::UNWIND_OPCODE_FINISH);
+ }
+ };
+}
+
+void UnwindOpcodeAssembler::EmitRegSave(uint32_t RegSave) {
+ if (RegSave == 0u)
+ return;
+
+ // One byte opcode to save register r14 and r11-r4
+ if (RegSave & (1u << 4)) {
+ // The one byte opcode will always save r4, thus we can't use the one byte
+ // opcode when r4 is not in .save directive.
+
+ // Compute the consecutive registers from r4 to r11.
+ uint32_t Mask = RegSave & 0xff0u;
+ uint32_t Range = countTrailingOnes(Mask >> 5); // Exclude r4.
+ // Mask off non-consecutive registers. Keep r4.
+ Mask &= ~(0xffffffe0u << Range);
+
+ // Emit this opcode when the mask covers every registers.
+ uint32_t UnmaskedReg = RegSave & 0xfff0u & (~Mask);
+ if (UnmaskedReg == 0u) {
+ // Pop r[4 : (4 + n)]
+ EmitInt8(ARM::EHABI::UNWIND_OPCODE_POP_REG_RANGE_R4 | Range);
+ RegSave &= 0x000fu;
+ } else if (UnmaskedReg == (1u << 14)) {
+ // Pop r[14] + r[4 : (4 + n)]
+ EmitInt8(ARM::EHABI::UNWIND_OPCODE_POP_REG_RANGE_R4_R14 | Range);
+ RegSave &= 0x000fu;
+ }
+ }
+
+ // Two bytes opcode to save register r15-r4
+ if ((RegSave & 0xfff0u) != 0)
+ EmitInt16(ARM::EHABI::UNWIND_OPCODE_POP_REG_MASK_R4 | (RegSave >> 4));
+
+ // Opcode to save register r3-r0
+ if ((RegSave & 0x000fu) != 0)
+ EmitInt16(ARM::EHABI::UNWIND_OPCODE_POP_REG_MASK | (RegSave & 0x000fu));
+}
+
+/// Emit unwind opcodes for .vsave directives
+void UnwindOpcodeAssembler::EmitVFPRegSave(uint32_t VFPRegSave) {
+ // We only have 4 bits to save the offset in the opcode so look at the lower
+ // and upper 16 bits separately.
+ for (uint32_t Regs : {VFPRegSave & 0xffff0000u, VFPRegSave & 0x0000ffffu}) {
+ while (Regs) {
+ // Now look for a run of set bits. Remember the MSB and LSB of the run.
+ auto RangeMSB = 32 - countLeadingZeros(Regs);
+ auto RangeLen = countLeadingOnes(Regs << (32 - RangeMSB));
+ auto RangeLSB = RangeMSB - RangeLen;
+
+ int Opcode = RangeLSB >= 16
+ ? ARM::EHABI::UNWIND_OPCODE_POP_VFP_REG_RANGE_FSTMFDD_D16
+ : ARM::EHABI::UNWIND_OPCODE_POP_VFP_REG_RANGE_FSTMFDD;
+
+ EmitInt16(Opcode | ((RangeLSB % 16) << 4) | (RangeLen - 1));
+
+ // Zero out bits we're done with.
+ Regs &= ~(-1u << RangeLSB);
+ }
+ }
+}
+
+/// Emit unwind opcodes to copy address from source register to $sp.
+void UnwindOpcodeAssembler::EmitSetSP(uint16_t Reg) {
+ EmitInt8(ARM::EHABI::UNWIND_OPCODE_SET_VSP | Reg);
+}
+
+/// Emit unwind opcodes to add $sp with an offset.
+void UnwindOpcodeAssembler::EmitSPOffset(int64_t Offset) {
+ if (Offset > 0x200) {
+ uint8_t Buff[16];
+ Buff[0] = ARM::EHABI::UNWIND_OPCODE_INC_VSP_ULEB128;
+ size_t ULEBSize = encodeULEB128((Offset - 0x204) >> 2, Buff + 1);
+ EmitBytes(Buff, ULEBSize + 1);
+ } else if (Offset > 0) {
+ if (Offset > 0x100) {
+ EmitInt8(ARM::EHABI::UNWIND_OPCODE_INC_VSP | 0x3fu);
+ Offset -= 0x100;
+ }
+ EmitInt8(ARM::EHABI::UNWIND_OPCODE_INC_VSP |
+ static_cast<uint8_t>((Offset - 4) >> 2));
+ } else if (Offset < 0) {
+ while (Offset < -0x100) {
+ EmitInt8(ARM::EHABI::UNWIND_OPCODE_DEC_VSP | 0x3fu);
+ Offset += 0x100;
+ }
+ EmitInt8(ARM::EHABI::UNWIND_OPCODE_DEC_VSP |
+ static_cast<uint8_t>(((-Offset) - 4) >> 2));
+ }
+}
+
+void UnwindOpcodeAssembler::Finalize(unsigned &PersonalityIndex,
+ SmallVectorImpl<uint8_t> &Result) {
+
+ UnwindOpcodeStreamer OpStreamer(Result);
+
+ if (HasPersonality) {
+ // User-specifed personality routine: [ SIZE , OP1 , OP2 , ... ]
+ PersonalityIndex = ARM::EHABI::NUM_PERSONALITY_INDEX;
+ size_t TotalSize = Ops.size() + 1;
+ size_t RoundUpSize = (TotalSize + 3) / 4 * 4;
+ Result.resize(RoundUpSize);
+ OpStreamer.EmitSize(RoundUpSize);
+ } else {
+ // If no personalityindex is specified, select ane
+ if (PersonalityIndex == ARM::EHABI::NUM_PERSONALITY_INDEX)
+ PersonalityIndex = (Ops.size() <= 3) ? ARM::EHABI::AEABI_UNWIND_CPP_PR0
+ : ARM::EHABI::AEABI_UNWIND_CPP_PR1;
+ if (PersonalityIndex == ARM::EHABI::AEABI_UNWIND_CPP_PR0) {
+ // __aeabi_unwind_cpp_pr0: [ 0x80 , OP1 , OP2 , OP3 ]
+ assert(Ops.size() <= 3 && "too many opcodes for __aeabi_unwind_cpp_pr0");
+ Result.resize(4);
+ OpStreamer.EmitPersonalityIndex(PersonalityIndex);
+ } else {
+ // __aeabi_unwind_cpp_pr{1,2}: [ {0x81,0x82} , SIZE , OP1 , OP2 , ... ]
+ size_t TotalSize = Ops.size() + 2;
+ size_t RoundUpSize = (TotalSize + 3) / 4 * 4;
+ Result.resize(RoundUpSize);
+ OpStreamer.EmitPersonalityIndex(PersonalityIndex);
+ OpStreamer.EmitSize(RoundUpSize);
+ }
+ }
+
+ // Copy the unwind opcodes
+ for (size_t i = OpBegins.size() - 1; i > 0; --i)
+ for (size_t j = OpBegins[i - 1], end = OpBegins[i]; j < end; ++j)
+ OpStreamer.EmitByte(Ops[j]);
+
+ // Emit the padding finish opcodes if the size is not multiple of 4.
+ OpStreamer.FillFinishOpcode();
+
+ // Reset the assembler state
+ Reset();
+}
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.h b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.h
new file mode 100644
index 0000000..e0c113e
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.h
@@ -0,0 +1,93 @@
+//===-- ARMUnwindOpAsm.h - ARM Unwind Opcodes Assembler ---------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the unwind opcode assmebler for ARM exception handling
+// table.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_ARM_MCTARGETDESC_ARMUNWINDOPASM_H
+#define LLVM_LIB_TARGET_ARM_MCTARGETDESC_ARMUNWINDOPASM_H
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/ARMEHABI.h"
+#include "llvm/Support/DataTypes.h"
+
+namespace llvm {
+
+class MCSymbol;
+
+class UnwindOpcodeAssembler {
+private:
+ llvm::SmallVector<uint8_t, 32> Ops;
+ llvm::SmallVector<unsigned, 8> OpBegins;
+ bool HasPersonality;
+
+public:
+ UnwindOpcodeAssembler()
+ : HasPersonality(0) {
+ OpBegins.push_back(0);
+ }
+
+ /// Reset the unwind opcode assembler.
+ void Reset() {
+ Ops.clear();
+ OpBegins.clear();
+ OpBegins.push_back(0);
+ HasPersonality = 0;
+ }
+
+ /// Set the personality
+ void setPersonality(const MCSymbol *Per) {
+ HasPersonality = 1;
+ }
+
+ /// Emit unwind opcodes for .save directives
+ void EmitRegSave(uint32_t RegSave);
+
+ /// Emit unwind opcodes for .vsave directives
+ void EmitVFPRegSave(uint32_t VFPRegSave);
+
+ /// Emit unwind opcodes to copy address from source register to $sp.
+ void EmitSetSP(uint16_t Reg);
+
+ /// Emit unwind opcodes to add $sp with an offset.
+ void EmitSPOffset(int64_t Offset);
+
+ /// Emit unwind raw opcodes
+ void EmitRaw(const SmallVectorImpl<uint8_t> &Opcodes) {
+ Ops.insert(Ops.end(), Opcodes.begin(), Opcodes.end());
+ OpBegins.push_back(OpBegins.back() + Opcodes.size());
+ }
+
+ /// Finalize the unwind opcode sequence for EmitBytes()
+ void Finalize(unsigned &PersonalityIndex,
+ SmallVectorImpl<uint8_t> &Result);
+
+private:
+ void EmitInt8(unsigned Opcode) {
+ Ops.push_back(Opcode & 0xff);
+ OpBegins.push_back(OpBegins.back() + 1);
+ }
+
+ void EmitInt16(unsigned Opcode) {
+ Ops.push_back((Opcode >> 8) & 0xff);
+ Ops.push_back(Opcode & 0xff);
+ OpBegins.push_back(OpBegins.back() + 2);
+ }
+
+ void EmitBytes(const uint8_t *Opcode, size_t Size) {
+ Ops.insert(Ops.end(), Opcode, Opcode + Size);
+ OpBegins.push_back(OpBegins.back() + Size);
+ }
+};
+
+} // namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp
new file mode 100644
index 0000000..166c04b
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp
@@ -0,0 +1,91 @@
+//===-- ARMWinCOFFObjectWriter.cpp - ARM Windows COFF Object Writer -- C++ -==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/ARMFixupKinds.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/MC/MCAsmBackend.h"
+#include "llvm/MC/MCFixup.h"
+#include "llvm/MC/MCFixupKindInfo.h"
+#include "llvm/MC/MCValue.h"
+#include "llvm/MC/MCWinCOFFObjectWriter.h"
+#include "llvm/Support/COFF.h"
+#include "llvm/Support/Debug.h"
+
+using namespace llvm;
+
+namespace {
+class ARMWinCOFFObjectWriter : public MCWinCOFFObjectTargetWriter {
+public:
+ ARMWinCOFFObjectWriter(bool Is64Bit)
+ : MCWinCOFFObjectTargetWriter(COFF::IMAGE_FILE_MACHINE_ARMNT) {
+ assert(!Is64Bit && "AArch64 support not yet implemented");
+ }
+ ~ARMWinCOFFObjectWriter() override {}
+
+ unsigned getRelocType(const MCValue &Target, const MCFixup &Fixup,
+ bool IsCrossSection,
+ const MCAsmBackend &MAB) const override;
+
+ bool recordRelocation(const MCFixup &) const override;
+};
+
+unsigned ARMWinCOFFObjectWriter::getRelocType(const MCValue &Target,
+ const MCFixup &Fixup,
+ bool IsCrossSection,
+ const MCAsmBackend &MAB) const {
+ assert(getMachine() == COFF::IMAGE_FILE_MACHINE_ARMNT &&
+ "AArch64 support not yet implemented");
+
+ MCSymbolRefExpr::VariantKind Modifier =
+ Target.isAbsolute() ? MCSymbolRefExpr::VK_None : Target.getSymA()->getKind();
+
+ switch (static_cast<unsigned>(Fixup.getKind())) {
+ default: {
+ const MCFixupKindInfo &Info = MAB.getFixupKindInfo(Fixup.getKind());
+ report_fatal_error(Twine("unsupported relocation type: ") + Info.Name);
+ }
+ case FK_Data_4:
+ switch (Modifier) {
+ case MCSymbolRefExpr::VK_COFF_IMGREL32:
+ return COFF::IMAGE_REL_ARM_ADDR32NB;
+ case MCSymbolRefExpr::VK_SECREL:
+ return COFF::IMAGE_REL_ARM_SECREL;
+ default:
+ return COFF::IMAGE_REL_ARM_ADDR32;
+ }
+ case FK_SecRel_2:
+ return COFF::IMAGE_REL_ARM_SECTION;
+ case FK_SecRel_4:
+ return COFF::IMAGE_REL_ARM_SECREL;
+ case ARM::fixup_t2_condbranch:
+ return COFF::IMAGE_REL_ARM_BRANCH20T;
+ case ARM::fixup_t2_uncondbranch:
+ return COFF::IMAGE_REL_ARM_BRANCH24T;
+ case ARM::fixup_arm_thumb_bl:
+ case ARM::fixup_arm_thumb_blx:
+ return COFF::IMAGE_REL_ARM_BLX23T;
+ case ARM::fixup_t2_movw_lo16:
+ case ARM::fixup_t2_movt_hi16:
+ return COFF::IMAGE_REL_ARM_MOV32T;
+ }
+}
+
+bool ARMWinCOFFObjectWriter::recordRelocation(const MCFixup &Fixup) const {
+ return static_cast<unsigned>(Fixup.getKind()) != ARM::fixup_t2_movt_hi16;
+}
+}
+
+namespace llvm {
+MCObjectWriter *createARMWinCOFFObjectWriter(raw_pwrite_stream &OS,
+ bool Is64Bit) {
+ MCWinCOFFObjectTargetWriter *MOTW = new ARMWinCOFFObjectWriter(Is64Bit);
+ return createWinCOFFObjectWriter(MOTW, OS);
+}
+}
+
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp
new file mode 100644
index 0000000..83fa084
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp
@@ -0,0 +1,47 @@
+//===-- ARMWinCOFFStreamer.cpp - ARM Target WinCOFF Streamer ----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARMMCTargetDesc.h"
+#include "llvm/MC/MCWinCOFFStreamer.h"
+
+using namespace llvm;
+
+namespace {
+class ARMWinCOFFStreamer : public MCWinCOFFStreamer {
+public:
+ ARMWinCOFFStreamer(MCContext &C, MCAsmBackend &AB, MCCodeEmitter &CE,
+ raw_pwrite_stream &OS)
+ : MCWinCOFFStreamer(C, AB, CE, OS) {}
+
+ void EmitAssemblerFlag(MCAssemblerFlag Flag) override;
+ void EmitThumbFunc(MCSymbol *Symbol) override;
+};
+
+void ARMWinCOFFStreamer::EmitAssemblerFlag(MCAssemblerFlag Flag) {
+ switch (Flag) {
+ default: llvm_unreachable("not implemented");
+ case MCAF_SyntaxUnified:
+ case MCAF_Code16:
+ break;
+ }
+}
+
+void ARMWinCOFFStreamer::EmitThumbFunc(MCSymbol *Symbol) {
+ getAssembler().setIsThumbFunc(Symbol);
+}
+}
+
+MCStreamer *llvm::createARMWinCOFFStreamer(
+ MCContext &Context, MCAsmBackend &MAB, raw_pwrite_stream &OS,
+ MCCodeEmitter *Emitter, bool RelaxAll, bool IncrementalLinkerCompatible) {
+ auto *S = new ARMWinCOFFStreamer(Context, MAB, *Emitter, OS);
+ S->getAssembler().setIncrementalLinkerCompatible(IncrementalLinkerCompatible);
+ return S;
+}
+
diff --git a/contrib/llvm/lib/Target/ARM/MLxExpansionPass.cpp b/contrib/llvm/lib/Target/ARM/MLxExpansionPass.cpp
new file mode 100644
index 0000000..ed2deea
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/MLxExpansionPass.cpp
@@ -0,0 +1,400 @@
+//===-- MLxExpansionPass.cpp - Expand MLx instrs to avoid hazards ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Expand VFP / NEON floating point MLA / MLS instructions (each to a pair of
+// multiple and add / sub instructions) when special VMLx hazards are detected.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARM.h"
+#include "ARMBaseInstrInfo.h"
+#include "ARMSubtarget.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "mlx-expansion"
+
+static cl::opt<bool>
+ForceExapnd("expand-all-fp-mlx", cl::init(false), cl::Hidden);
+static cl::opt<unsigned>
+ExpandLimit("expand-limit", cl::init(~0U), cl::Hidden);
+
+STATISTIC(NumExpand, "Number of fp MLA / MLS instructions expanded");
+
+namespace {
+ struct MLxExpansion : public MachineFunctionPass {
+ static char ID;
+ MLxExpansion() : MachineFunctionPass(ID) {}
+
+ bool runOnMachineFunction(MachineFunction &Fn) override;
+
+ const char *getPassName() const override {
+ return "ARM MLA / MLS expansion pass";
+ }
+
+ private:
+ const ARMBaseInstrInfo *TII;
+ const TargetRegisterInfo *TRI;
+ MachineRegisterInfo *MRI;
+
+ bool isLikeA9;
+ bool isSwift;
+ unsigned MIIdx;
+ MachineInstr* LastMIs[4];
+ SmallPtrSet<MachineInstr*, 4> IgnoreStall;
+
+ void clearStack();
+ void pushStack(MachineInstr *MI);
+ MachineInstr *getAccDefMI(MachineInstr *MI) const;
+ unsigned getDefReg(MachineInstr *MI) const;
+ bool hasLoopHazard(MachineInstr *MI) const;
+ bool hasRAWHazard(unsigned Reg, MachineInstr *MI) const;
+ bool FindMLxHazard(MachineInstr *MI);
+ void ExpandFPMLxInstruction(MachineBasicBlock &MBB, MachineInstr *MI,
+ unsigned MulOpc, unsigned AddSubOpc,
+ bool NegAcc, bool HasLane);
+ bool ExpandFPMLxInstructions(MachineBasicBlock &MBB);
+ };
+ char MLxExpansion::ID = 0;
+}
+
+void MLxExpansion::clearStack() {
+ std::fill(LastMIs, LastMIs + 4, nullptr);
+ MIIdx = 0;
+}
+
+void MLxExpansion::pushStack(MachineInstr *MI) {
+ LastMIs[MIIdx] = MI;
+ if (++MIIdx == 4)
+ MIIdx = 0;
+}
+
+MachineInstr *MLxExpansion::getAccDefMI(MachineInstr *MI) const {
+ // Look past COPY and INSERT_SUBREG instructions to find the
+ // real definition MI. This is important for _sfp instructions.
+ unsigned Reg = MI->getOperand(1).getReg();
+ if (TargetRegisterInfo::isPhysicalRegister(Reg))
+ return nullptr;
+
+ MachineBasicBlock *MBB = MI->getParent();
+ MachineInstr *DefMI = MRI->getVRegDef(Reg);
+ while (true) {
+ if (DefMI->getParent() != MBB)
+ break;
+ if (DefMI->isCopyLike()) {
+ Reg = DefMI->getOperand(1).getReg();
+ if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ DefMI = MRI->getVRegDef(Reg);
+ continue;
+ }
+ } else if (DefMI->isInsertSubreg()) {
+ Reg = DefMI->getOperand(2).getReg();
+ if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ DefMI = MRI->getVRegDef(Reg);
+ continue;
+ }
+ }
+ break;
+ }
+ return DefMI;
+}
+
+unsigned MLxExpansion::getDefReg(MachineInstr *MI) const {
+ unsigned Reg = MI->getOperand(0).getReg();
+ if (TargetRegisterInfo::isPhysicalRegister(Reg) ||
+ !MRI->hasOneNonDBGUse(Reg))
+ return Reg;
+
+ MachineBasicBlock *MBB = MI->getParent();
+ MachineInstr *UseMI = &*MRI->use_instr_nodbg_begin(Reg);
+ if (UseMI->getParent() != MBB)
+ return Reg;
+
+ while (UseMI->isCopy() || UseMI->isInsertSubreg()) {
+ Reg = UseMI->getOperand(0).getReg();
+ if (TargetRegisterInfo::isPhysicalRegister(Reg) ||
+ !MRI->hasOneNonDBGUse(Reg))
+ return Reg;
+ UseMI = &*MRI->use_instr_nodbg_begin(Reg);
+ if (UseMI->getParent() != MBB)
+ return Reg;
+ }
+
+ return Reg;
+}
+
+/// hasLoopHazard - Check whether an MLx instruction is chained to itself across
+/// a single-MBB loop.
+bool MLxExpansion::hasLoopHazard(MachineInstr *MI) const {
+ unsigned Reg = MI->getOperand(1).getReg();
+ if (TargetRegisterInfo::isPhysicalRegister(Reg))
+ return false;
+
+ MachineBasicBlock *MBB = MI->getParent();
+ MachineInstr *DefMI = MRI->getVRegDef(Reg);
+ while (true) {
+outer_continue:
+ if (DefMI->getParent() != MBB)
+ break;
+
+ if (DefMI->isPHI()) {
+ for (unsigned i = 1, e = DefMI->getNumOperands(); i < e; i += 2) {
+ if (DefMI->getOperand(i + 1).getMBB() == MBB) {
+ unsigned SrcReg = DefMI->getOperand(i).getReg();
+ if (TargetRegisterInfo::isVirtualRegister(SrcReg)) {
+ DefMI = MRI->getVRegDef(SrcReg);
+ goto outer_continue;
+ }
+ }
+ }
+ } else if (DefMI->isCopyLike()) {
+ Reg = DefMI->getOperand(1).getReg();
+ if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ DefMI = MRI->getVRegDef(Reg);
+ continue;
+ }
+ } else if (DefMI->isInsertSubreg()) {
+ Reg = DefMI->getOperand(2).getReg();
+ if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ DefMI = MRI->getVRegDef(Reg);
+ continue;
+ }
+ }
+
+ break;
+ }
+
+ return DefMI == MI;
+}
+
+bool MLxExpansion::hasRAWHazard(unsigned Reg, MachineInstr *MI) const {
+ // FIXME: Detect integer instructions properly.
+ const MCInstrDesc &MCID = MI->getDesc();
+ unsigned Domain = MCID.TSFlags & ARMII::DomainMask;
+ if (MI->mayStore())
+ return false;
+ unsigned Opcode = MCID.getOpcode();
+ if (Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD)
+ return false;
+ if ((Domain & ARMII::DomainVFP) || (Domain & ARMII::DomainNEON))
+ return MI->readsRegister(Reg, TRI);
+ return false;
+}
+
+static bool isFpMulInstruction(unsigned Opcode) {
+ switch (Opcode) {
+ case ARM::VMULS:
+ case ARM::VMULfd:
+ case ARM::VMULfq:
+ case ARM::VMULD:
+ case ARM::VMULslfd:
+ case ARM::VMULslfq:
+ return true;
+ default:
+ return false;
+ }
+}
+
+bool MLxExpansion::FindMLxHazard(MachineInstr *MI) {
+ if (NumExpand >= ExpandLimit)
+ return false;
+
+ if (ForceExapnd)
+ return true;
+
+ MachineInstr *DefMI = getAccDefMI(MI);
+ if (TII->isFpMLxInstruction(DefMI->getOpcode())) {
+ // r0 = vmla
+ // r3 = vmla r0, r1, r2
+ // takes 16 - 17 cycles
+ //
+ // r0 = vmla
+ // r4 = vmul r1, r2
+ // r3 = vadd r0, r4
+ // takes about 14 - 15 cycles even with vmul stalling for 4 cycles.
+ IgnoreStall.insert(DefMI);
+ return true;
+ }
+
+ // On Swift, we mostly care about hazards from multiplication instructions
+ // writing the accumulator and the pipelining of loop iterations by out-of-
+ // order execution.
+ if (isSwift)
+ return isFpMulInstruction(DefMI->getOpcode()) || hasLoopHazard(MI);
+
+ if (IgnoreStall.count(MI))
+ return false;
+
+ // If a VMLA.F is followed by an VADD.F or VMUL.F with no RAW hazard, the
+ // VADD.F or VMUL.F will stall 4 cycles before issue. The 4 cycle stall
+ // preserves the in-order retirement of the instructions.
+ // Look at the next few instructions, if *most* of them can cause hazards,
+ // then the scheduler can't *fix* this, we'd better break up the VMLA.
+ unsigned Limit1 = isLikeA9 ? 1 : 4;
+ unsigned Limit2 = isLikeA9 ? 1 : 4;
+ for (unsigned i = 1; i <= 4; ++i) {
+ int Idx = ((int)MIIdx - i + 4) % 4;
+ MachineInstr *NextMI = LastMIs[Idx];
+ if (!NextMI)
+ continue;
+
+ if (TII->canCauseFpMLxStall(NextMI->getOpcode())) {
+ if (i <= Limit1)
+ return true;
+ }
+
+ // Look for VMLx RAW hazard.
+ if (i <= Limit2 && hasRAWHazard(getDefReg(MI), NextMI))
+ return true;
+ }
+
+ return false;
+}
+
+/// ExpandFPMLxInstructions - Expand a MLA / MLS instruction into a pair
+/// of MUL + ADD / SUB instructions.
+void
+MLxExpansion::ExpandFPMLxInstruction(MachineBasicBlock &MBB, MachineInstr *MI,
+ unsigned MulOpc, unsigned AddSubOpc,
+ bool NegAcc, bool HasLane) {
+ unsigned DstReg = MI->getOperand(0).getReg();
+ bool DstDead = MI->getOperand(0).isDead();
+ unsigned AccReg = MI->getOperand(1).getReg();
+ unsigned Src1Reg = MI->getOperand(2).getReg();
+ unsigned Src2Reg = MI->getOperand(3).getReg();
+ bool Src1Kill = MI->getOperand(2).isKill();
+ bool Src2Kill = MI->getOperand(3).isKill();
+ unsigned LaneImm = HasLane ? MI->getOperand(4).getImm() : 0;
+ unsigned NextOp = HasLane ? 5 : 4;
+ ARMCC::CondCodes Pred = (ARMCC::CondCodes)MI->getOperand(NextOp).getImm();
+ unsigned PredReg = MI->getOperand(++NextOp).getReg();
+
+ const MCInstrDesc &MCID1 = TII->get(MulOpc);
+ const MCInstrDesc &MCID2 = TII->get(AddSubOpc);
+ const MachineFunction &MF = *MI->getParent()->getParent();
+ unsigned TmpReg = MRI->createVirtualRegister(
+ TII->getRegClass(MCID1, 0, TRI, MF));
+
+ MachineInstrBuilder MIB = BuildMI(MBB, MI, MI->getDebugLoc(), MCID1, TmpReg)
+ .addReg(Src1Reg, getKillRegState(Src1Kill))
+ .addReg(Src2Reg, getKillRegState(Src2Kill));
+ if (HasLane)
+ MIB.addImm(LaneImm);
+ MIB.addImm(Pred).addReg(PredReg);
+
+ MIB = BuildMI(MBB, MI, MI->getDebugLoc(), MCID2)
+ .addReg(DstReg, getDefRegState(true) | getDeadRegState(DstDead));
+
+ if (NegAcc) {
+ bool AccKill = MRI->hasOneNonDBGUse(AccReg);
+ MIB.addReg(TmpReg, getKillRegState(true))
+ .addReg(AccReg, getKillRegState(AccKill));
+ } else {
+ MIB.addReg(AccReg).addReg(TmpReg, getKillRegState(true));
+ }
+ MIB.addImm(Pred).addReg(PredReg);
+
+ DEBUG({
+ dbgs() << "Expanding: " << *MI;
+ dbgs() << " to:\n";
+ MachineBasicBlock::iterator MII = MI;
+ MII = std::prev(MII);
+ MachineInstr &MI2 = *MII;
+ MII = std::prev(MII);
+ MachineInstr &MI1 = *MII;
+ dbgs() << " " << MI1;
+ dbgs() << " " << MI2;
+ });
+
+ MI->eraseFromParent();
+ ++NumExpand;
+}
+
+bool MLxExpansion::ExpandFPMLxInstructions(MachineBasicBlock &MBB) {
+ bool Changed = false;
+
+ clearStack();
+ IgnoreStall.clear();
+
+ unsigned Skip = 0;
+ MachineBasicBlock::reverse_iterator MII = MBB.rbegin(), E = MBB.rend();
+ while (MII != E) {
+ MachineInstr *MI = &*MII;
+
+ if (MI->isPosition() || MI->isImplicitDef() || MI->isCopy()) {
+ ++MII;
+ continue;
+ }
+
+ const MCInstrDesc &MCID = MI->getDesc();
+ if (MI->isBarrier()) {
+ clearStack();
+ Skip = 0;
+ ++MII;
+ continue;
+ }
+
+ unsigned Domain = MCID.TSFlags & ARMII::DomainMask;
+ if (Domain == ARMII::DomainGeneral) {
+ if (++Skip == 2)
+ // Assume dual issues of non-VFP / NEON instructions.
+ pushStack(nullptr);
+ } else {
+ Skip = 0;
+
+ unsigned MulOpc, AddSubOpc;
+ bool NegAcc, HasLane;
+ if (!TII->isFpMLxInstruction(MCID.getOpcode(),
+ MulOpc, AddSubOpc, NegAcc, HasLane) ||
+ !FindMLxHazard(MI))
+ pushStack(MI);
+ else {
+ ExpandFPMLxInstruction(MBB, MI, MulOpc, AddSubOpc, NegAcc, HasLane);
+ E = MBB.rend(); // May have changed if MI was the 1st instruction.
+ Changed = true;
+ continue;
+ }
+ }
+
+ ++MII;
+ }
+
+ return Changed;
+}
+
+bool MLxExpansion::runOnMachineFunction(MachineFunction &Fn) {
+ TII = static_cast<const ARMBaseInstrInfo *>(Fn.getSubtarget().getInstrInfo());
+ TRI = Fn.getSubtarget().getRegisterInfo();
+ MRI = &Fn.getRegInfo();
+ const ARMSubtarget *STI = &Fn.getSubtarget<ARMSubtarget>();
+ // Only run this for CortexA9.
+ if (!STI->isCortexA9())
+ return false;
+ isLikeA9 = STI->isLikeA9() || STI->isSwift();
+ isSwift = STI->isSwift();
+
+ bool Modified = false;
+ for (MachineBasicBlock &MBB : Fn)
+ Modified |= ExpandFPMLxInstructions(MBB);
+
+ return Modified;
+}
+
+FunctionPass *llvm::createMLxExpansionPass() {
+ return new MLxExpansion();
+}
diff --git a/contrib/llvm/lib/Target/ARM/TargetInfo/ARMTargetInfo.cpp b/contrib/llvm/lib/Target/ARM/TargetInfo/ARMTargetInfo.cpp
new file mode 100644
index 0000000..df73554
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/TargetInfo/ARMTargetInfo.cpp
@@ -0,0 +1,28 @@
+//===-- ARMTargetInfo.cpp - ARM Target Implementation ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/ARMMCTargetDesc.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/TargetRegistry.h"
+using namespace llvm;
+
+Target llvm::TheARMLETarget, llvm::TheARMBETarget;
+Target llvm::TheThumbLETarget, llvm::TheThumbBETarget;
+
+extern "C" void LLVMInitializeARMTargetInfo() {
+ RegisterTarget<Triple::arm, /*HasJIT=*/true>
+ X(TheARMLETarget, "arm", "ARM");
+ RegisterTarget<Triple::armeb, /*HasJIT=*/true>
+ Y(TheARMBETarget, "armeb", "ARM (big endian)");
+
+ RegisterTarget<Triple::thumb, /*HasJIT=*/true>
+ A(TheThumbLETarget, "thumb", "Thumb");
+ RegisterTarget<Triple::thumbeb, /*HasJIT=*/true>
+ B(TheThumbBETarget, "thumbeb", "Thumb (big endian)");
+}
diff --git a/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp b/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp
new file mode 100644
index 0000000..93e0ac4
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp
@@ -0,0 +1,659 @@
+//===-- Thumb1FrameLowering.cpp - Thumb1 Frame Information ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Thumb1 implementation of TargetFrameLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Thumb1FrameLowering.h"
+#include "ARMMachineFunctionInfo.h"
+#include "llvm/CodeGen/LivePhysRegs.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+
+using namespace llvm;
+
+Thumb1FrameLowering::Thumb1FrameLowering(const ARMSubtarget &sti)
+ : ARMFrameLowering(sti) {}
+
+bool Thumb1FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const{
+ const MachineFrameInfo *FFI = MF.getFrameInfo();
+ unsigned CFSize = FFI->getMaxCallFrameSize();
+ // It's not always a good idea to include the call frame as part of the
+ // stack frame. ARM (especially Thumb) has small immediate offset to
+ // address the stack frame. So a large call frame can cause poor codegen
+ // and may even makes it impossible to scavenge a register.
+ if (CFSize >= ((1 << 8) - 1) * 4 / 2) // Half of imm8 * 4
+ return false;
+
+ return !MF.getFrameInfo()->hasVarSizedObjects();
+}
+
+static void
+emitSPUpdate(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI,
+ const TargetInstrInfo &TII, DebugLoc dl,
+ const ThumbRegisterInfo &MRI,
+ int NumBytes, unsigned MIFlags = MachineInstr::NoFlags) {
+ emitThumbRegPlusImmediate(MBB, MBBI, dl, ARM::SP, ARM::SP, NumBytes, TII,
+ MRI, MIFlags);
+}
+
+
+void Thumb1FrameLowering::
+eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const {
+ const Thumb1InstrInfo &TII =
+ *static_cast<const Thumb1InstrInfo *>(STI.getInstrInfo());
+ const ThumbRegisterInfo *RegInfo =
+ static_cast<const ThumbRegisterInfo *>(STI.getRegisterInfo());
+ if (!hasReservedCallFrame(MF)) {
+ // If we have alloca, convert as follows:
+ // ADJCALLSTACKDOWN -> sub, sp, sp, amount
+ // ADJCALLSTACKUP -> add, sp, sp, amount
+ MachineInstr *Old = I;
+ DebugLoc dl = Old->getDebugLoc();
+ unsigned Amount = Old->getOperand(0).getImm();
+ if (Amount != 0) {
+ // We need to keep the stack aligned properly. To do this, we round the
+ // amount of space needed for the outgoing arguments up to the next
+ // alignment boundary.
+ unsigned Align = getStackAlignment();
+ Amount = (Amount+Align-1)/Align*Align;
+
+ // Replace the pseudo instruction with a new instruction...
+ unsigned Opc = Old->getOpcode();
+ if (Opc == ARM::ADJCALLSTACKDOWN || Opc == ARM::tADJCALLSTACKDOWN) {
+ emitSPUpdate(MBB, I, TII, dl, *RegInfo, -Amount);
+ } else {
+ assert(Opc == ARM::ADJCALLSTACKUP || Opc == ARM::tADJCALLSTACKUP);
+ emitSPUpdate(MBB, I, TII, dl, *RegInfo, Amount);
+ }
+ }
+ }
+ MBB.erase(I);
+}
+
+void Thumb1FrameLowering::emitPrologue(MachineFunction &MF,
+ MachineBasicBlock &MBB) const {
+ MachineBasicBlock::iterator MBBI = MBB.begin();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ MachineModuleInfo &MMI = MF.getMMI();
+ const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo();
+ const ThumbRegisterInfo *RegInfo =
+ static_cast<const ThumbRegisterInfo *>(STI.getRegisterInfo());
+ const Thumb1InstrInfo &TII =
+ *static_cast<const Thumb1InstrInfo *>(STI.getInstrInfo());
+
+ unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize();
+ unsigned NumBytes = MFI->getStackSize();
+ assert(NumBytes >= ArgRegsSaveSize &&
+ "ArgRegsSaveSize is included in NumBytes");
+ const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
+
+ // Debug location must be unknown since the first debug location is used
+ // to determine the end of the prologue.
+ DebugLoc dl;
+
+ unsigned FramePtr = RegInfo->getFrameRegister(MF);
+ unsigned BasePtr = RegInfo->getBaseRegister();
+ int CFAOffset = 0;
+
+ // Thumb add/sub sp, imm8 instructions implicitly multiply the offset by 4.
+ NumBytes = (NumBytes + 3) & ~3;
+ MFI->setStackSize(NumBytes);
+
+ // Determine the sizes of each callee-save spill areas and record which frame
+ // belongs to which callee-save spill areas.
+ unsigned GPRCS1Size = 0, GPRCS2Size = 0, DPRCSSize = 0;
+ int FramePtrSpillFI = 0;
+
+ if (ArgRegsSaveSize) {
+ emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, -ArgRegsSaveSize,
+ MachineInstr::FrameSetup);
+ CFAOffset -= ArgRegsSaveSize;
+ unsigned CFIIndex = MMI.addFrameInst(
+ MCCFIInstruction::createDefCfaOffset(nullptr, CFAOffset));
+ BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex)
+ .setMIFlags(MachineInstr::FrameSetup);
+ }
+
+ if (!AFI->hasStackFrame()) {
+ if (NumBytes - ArgRegsSaveSize != 0) {
+ emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, -(NumBytes - ArgRegsSaveSize),
+ MachineInstr::FrameSetup);
+ CFAOffset -= NumBytes - ArgRegsSaveSize;
+ unsigned CFIIndex = MMI.addFrameInst(
+ MCCFIInstruction::createDefCfaOffset(nullptr, CFAOffset));
+ BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex)
+ .setMIFlags(MachineInstr::FrameSetup);
+ }
+ return;
+ }
+
+ for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+ unsigned Reg = CSI[i].getReg();
+ int FI = CSI[i].getFrameIdx();
+ switch (Reg) {
+ case ARM::R8:
+ case ARM::R9:
+ case ARM::R10:
+ case ARM::R11:
+ if (STI.isTargetMachO()) {
+ GPRCS2Size += 4;
+ break;
+ }
+ // fallthrough
+ case ARM::R4:
+ case ARM::R5:
+ case ARM::R6:
+ case ARM::R7:
+ case ARM::LR:
+ if (Reg == FramePtr)
+ FramePtrSpillFI = FI;
+ GPRCS1Size += 4;
+ break;
+ default:
+ DPRCSSize += 8;
+ }
+ }
+
+ if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tPUSH) {
+ ++MBBI;
+ }
+
+ // Determine starting offsets of spill areas.
+ unsigned DPRCSOffset = NumBytes - ArgRegsSaveSize - (GPRCS1Size + GPRCS2Size + DPRCSSize);
+ unsigned GPRCS2Offset = DPRCSOffset + DPRCSSize;
+ unsigned GPRCS1Offset = GPRCS2Offset + GPRCS2Size;
+ bool HasFP = hasFP(MF);
+ if (HasFP)
+ AFI->setFramePtrSpillOffset(MFI->getObjectOffset(FramePtrSpillFI) +
+ NumBytes);
+ AFI->setGPRCalleeSavedArea1Offset(GPRCS1Offset);
+ AFI->setGPRCalleeSavedArea2Offset(GPRCS2Offset);
+ AFI->setDPRCalleeSavedAreaOffset(DPRCSOffset);
+ NumBytes = DPRCSOffset;
+
+ int FramePtrOffsetInBlock = 0;
+ unsigned adjustedGPRCS1Size = GPRCS1Size;
+ if (tryFoldSPUpdateIntoPushPop(STI, MF, std::prev(MBBI), NumBytes)) {
+ FramePtrOffsetInBlock = NumBytes;
+ adjustedGPRCS1Size += NumBytes;
+ NumBytes = 0;
+ }
+
+ if (adjustedGPRCS1Size) {
+ CFAOffset -= adjustedGPRCS1Size;
+ unsigned CFIIndex = MMI.addFrameInst(
+ MCCFIInstruction::createDefCfaOffset(nullptr, CFAOffset));
+ BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex)
+ .setMIFlags(MachineInstr::FrameSetup);
+ }
+ for (std::vector<CalleeSavedInfo>::const_iterator I = CSI.begin(),
+ E = CSI.end(); I != E; ++I) {
+ unsigned Reg = I->getReg();
+ int FI = I->getFrameIdx();
+ switch (Reg) {
+ case ARM::R8:
+ case ARM::R9:
+ case ARM::R10:
+ case ARM::R11:
+ case ARM::R12:
+ if (STI.isTargetMachO())
+ break;
+ // fallthough
+ case ARM::R0:
+ case ARM::R1:
+ case ARM::R2:
+ case ARM::R3:
+ case ARM::R4:
+ case ARM::R5:
+ case ARM::R6:
+ case ARM::R7:
+ case ARM::LR:
+ unsigned CFIIndex = MMI.addFrameInst(MCCFIInstruction::createOffset(
+ nullptr, MRI->getDwarfRegNum(Reg, true), MFI->getObjectOffset(FI)));
+ BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex)
+ .setMIFlags(MachineInstr::FrameSetup);
+ break;
+ }
+ }
+
+ // Adjust FP so it point to the stack slot that contains the previous FP.
+ if (HasFP) {
+ FramePtrOffsetInBlock +=
+ MFI->getObjectOffset(FramePtrSpillFI) + GPRCS1Size + ArgRegsSaveSize;
+ AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tADDrSPi), FramePtr)
+ .addReg(ARM::SP).addImm(FramePtrOffsetInBlock / 4)
+ .setMIFlags(MachineInstr::FrameSetup));
+ if(FramePtrOffsetInBlock) {
+ CFAOffset += FramePtrOffsetInBlock;
+ unsigned CFIIndex = MMI.addFrameInst(MCCFIInstruction::createDefCfa(
+ nullptr, MRI->getDwarfRegNum(FramePtr, true), CFAOffset));
+ BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex)
+ .setMIFlags(MachineInstr::FrameSetup);
+ } else {
+ unsigned CFIIndex =
+ MMI.addFrameInst(MCCFIInstruction::createDefCfaRegister(
+ nullptr, MRI->getDwarfRegNum(FramePtr, true)));
+ BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex)
+ .setMIFlags(MachineInstr::FrameSetup);
+ }
+ if (NumBytes > 508)
+ // If offset is > 508 then sp cannot be adjusted in a single instruction,
+ // try restoring from fp instead.
+ AFI->setShouldRestoreSPFromFP(true);
+ }
+
+ if (NumBytes) {
+ // Insert it after all the callee-save spills.
+ emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, -NumBytes,
+ MachineInstr::FrameSetup);
+ if (!HasFP) {
+ CFAOffset -= NumBytes;
+ unsigned CFIIndex = MMI.addFrameInst(
+ MCCFIInstruction::createDefCfaOffset(nullptr, CFAOffset));
+ BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex)
+ .setMIFlags(MachineInstr::FrameSetup);
+ }
+ }
+
+ if (STI.isTargetELF() && HasFP)
+ MFI->setOffsetAdjustment(MFI->getOffsetAdjustment() -
+ AFI->getFramePtrSpillOffset());
+
+ AFI->setGPRCalleeSavedArea1Size(GPRCS1Size);
+ AFI->setGPRCalleeSavedArea2Size(GPRCS2Size);
+ AFI->setDPRCalleeSavedAreaSize(DPRCSSize);
+
+ // Thumb1 does not currently support dynamic stack realignment. Report a
+ // fatal error rather then silently generate bad code.
+ if (RegInfo->needsStackRealignment(MF))
+ report_fatal_error("Dynamic stack realignment not supported for thumb1.");
+
+ // If we need a base pointer, set it up here. It's whatever the value
+ // of the stack pointer is at this point. Any variable size objects
+ // will be allocated after this, so we can still use the base pointer
+ // to reference locals.
+ if (RegInfo->hasBasePointer(MF))
+ AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), BasePtr)
+ .addReg(ARM::SP));
+
+ // If the frame has variable sized objects then the epilogue must restore
+ // the sp from fp. We can assume there's an FP here since hasFP already
+ // checks for hasVarSizedObjects.
+ if (MFI->hasVarSizedObjects())
+ AFI->setShouldRestoreSPFromFP(true);
+}
+
+static bool isCSRestore(MachineInstr *MI, const MCPhysReg *CSRegs) {
+ if (MI->getOpcode() == ARM::tLDRspi &&
+ MI->getOperand(1).isFI() &&
+ isCalleeSavedRegister(MI->getOperand(0).getReg(), CSRegs))
+ return true;
+ else if (MI->getOpcode() == ARM::tPOP) {
+ // The first two operands are predicates. The last two are
+ // imp-def and imp-use of SP. Check everything in between.
+ for (int i = 2, e = MI->getNumOperands() - 2; i != e; ++i)
+ if (!isCalleeSavedRegister(MI->getOperand(i).getReg(), CSRegs))
+ return false;
+ return true;
+ }
+ return false;
+}
+
+void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF,
+ MachineBasicBlock &MBB) const {
+ MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
+ DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ const ThumbRegisterInfo *RegInfo =
+ static_cast<const ThumbRegisterInfo *>(STI.getRegisterInfo());
+ const Thumb1InstrInfo &TII =
+ *static_cast<const Thumb1InstrInfo *>(STI.getInstrInfo());
+
+ unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize();
+ int NumBytes = (int)MFI->getStackSize();
+ assert((unsigned)NumBytes >= ArgRegsSaveSize &&
+ "ArgRegsSaveSize is included in NumBytes");
+ const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
+ unsigned FramePtr = RegInfo->getFrameRegister(MF);
+
+ if (!AFI->hasStackFrame()) {
+ if (NumBytes - ArgRegsSaveSize != 0)
+ emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, NumBytes - ArgRegsSaveSize);
+ } else {
+ // Unwind MBBI to point to first LDR / VLDRD.
+ if (MBBI != MBB.begin()) {
+ do
+ --MBBI;
+ while (MBBI != MBB.begin() && isCSRestore(MBBI, CSRegs));
+ if (!isCSRestore(MBBI, CSRegs))
+ ++MBBI;
+ }
+
+ // Move SP to start of FP callee save spill area.
+ NumBytes -= (AFI->getGPRCalleeSavedArea1Size() +
+ AFI->getGPRCalleeSavedArea2Size() +
+ AFI->getDPRCalleeSavedAreaSize() +
+ ArgRegsSaveSize);
+
+ if (AFI->shouldRestoreSPFromFP()) {
+ NumBytes = AFI->getFramePtrSpillOffset() - NumBytes;
+ // Reset SP based on frame pointer only if the stack frame extends beyond
+ // frame pointer stack slot, the target is ELF and the function has FP, or
+ // the target uses var sized objects.
+ if (NumBytes) {
+ assert(!MFI->getPristineRegs(MF).test(ARM::R4) &&
+ "No scratch register to restore SP from FP!");
+ emitThumbRegPlusImmediate(MBB, MBBI, dl, ARM::R4, FramePtr, -NumBytes,
+ TII, *RegInfo);
+ AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr),
+ ARM::SP)
+ .addReg(ARM::R4));
+ } else
+ AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr),
+ ARM::SP)
+ .addReg(FramePtr));
+ } else {
+ if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tBX_RET &&
+ &MBB.front() != MBBI && std::prev(MBBI)->getOpcode() == ARM::tPOP) {
+ MachineBasicBlock::iterator PMBBI = std::prev(MBBI);
+ if (!tryFoldSPUpdateIntoPushPop(STI, MF, PMBBI, NumBytes))
+ emitSPUpdate(MBB, PMBBI, TII, dl, *RegInfo, NumBytes);
+ } else if (!tryFoldSPUpdateIntoPushPop(STI, MF, MBBI, NumBytes))
+ emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, NumBytes);
+ }
+ }
+
+ if (needPopSpecialFixUp(MF)) {
+ bool Done = emitPopSpecialFixUp(MBB, /* DoIt */ true);
+ (void)Done;
+ assert(Done && "Emission of the special fixup failed!?");
+ }
+}
+
+bool Thumb1FrameLowering::canUseAsEpilogue(const MachineBasicBlock &MBB) const {
+ if (!needPopSpecialFixUp(*MBB.getParent()))
+ return true;
+
+ MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB);
+ return emitPopSpecialFixUp(*TmpMBB, /* DoIt */ false);
+}
+
+bool Thumb1FrameLowering::needPopSpecialFixUp(const MachineFunction &MF) const {
+ ARMFunctionInfo *AFI =
+ const_cast<MachineFunction *>(&MF)->getInfo<ARMFunctionInfo>();
+ if (AFI->getArgRegsSaveSize())
+ return true;
+
+ // LR cannot be encoded with Thumb1, i.e., it requires a special fix-up.
+ for (const CalleeSavedInfo &CSI : MF.getFrameInfo()->getCalleeSavedInfo())
+ if (CSI.getReg() == ARM::LR)
+ return true;
+
+ return false;
+}
+
+bool Thumb1FrameLowering::emitPopSpecialFixUp(MachineBasicBlock &MBB,
+ bool DoIt) const {
+ MachineFunction &MF = *MBB.getParent();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize();
+ const TargetInstrInfo &TII = *STI.getInstrInfo();
+ const ThumbRegisterInfo *RegInfo =
+ static_cast<const ThumbRegisterInfo *>(STI.getRegisterInfo());
+
+ // If MBBI is a return instruction, or is a tPOP followed by a return
+ // instruction in the successor BB, we may be able to directly restore
+ // LR in the PC.
+ // This is only possible with v5T ops (v4T can't change the Thumb bit via
+ // a POP PC instruction), and only if we do not need to emit any SP update.
+ // Otherwise, we need a temporary register to pop the value
+ // and copy that value into LR.
+ auto MBBI = MBB.getFirstTerminator();
+ bool CanRestoreDirectly = STI.hasV5TOps() && !ArgRegsSaveSize;
+ if (CanRestoreDirectly) {
+ if (MBBI != MBB.end() && MBBI->getOpcode() != ARM::tB)
+ CanRestoreDirectly = (MBBI->getOpcode() == ARM::tBX_RET ||
+ MBBI->getOpcode() == ARM::tPOP_RET);
+ else {
+ auto MBBI_prev = MBBI;
+ MBBI_prev--;
+ assert(MBBI_prev->getOpcode() == ARM::tPOP);
+ assert(MBB.succ_size() == 1);
+ if ((*MBB.succ_begin())->begin()->getOpcode() == ARM::tBX_RET)
+ MBBI = MBBI_prev; // Replace the final tPOP with a tPOP_RET.
+ else
+ CanRestoreDirectly = false;
+ }
+ }
+
+ if (CanRestoreDirectly) {
+ if (!DoIt || MBBI->getOpcode() == ARM::tPOP_RET)
+ return true;
+ MachineInstrBuilder MIB =
+ AddDefaultPred(
+ BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII.get(ARM::tPOP_RET)));
+ // Copy implicit ops and popped registers, if any.
+ for (auto MO: MBBI->operands())
+ if (MO.isReg() && (MO.isImplicit() || MO.isDef()))
+ MIB.addOperand(MO);
+ MIB.addReg(ARM::PC, RegState::Define);
+ // Erase the old instruction (tBX_RET or tPOP).
+ MBB.erase(MBBI);
+ return true;
+ }
+
+ // Look for a temporary register to use.
+ // First, compute the liveness information.
+ LivePhysRegs UsedRegs(STI.getRegisterInfo());
+ UsedRegs.addLiveOuts(&MBB, /*AddPristines*/ true);
+ // The semantic of pristines changed recently and now,
+ // the callee-saved registers that are touched in the function
+ // are not part of the pristines set anymore.
+ // Add those callee-saved now.
+ const TargetRegisterInfo *TRI = STI.getRegisterInfo();
+ const MCPhysReg *CSRegs = TRI->getCalleeSavedRegs(&MF);
+ for (unsigned i = 0; CSRegs[i]; ++i)
+ UsedRegs.addReg(CSRegs[i]);
+
+ DebugLoc dl = DebugLoc();
+ if (MBBI != MBB.end()) {
+ dl = MBBI->getDebugLoc();
+ auto InstUpToMBBI = MBB.end();
+ while (InstUpToMBBI != MBBI)
+ // The pre-decrement is on purpose here.
+ // We want to have the liveness right before MBBI.
+ UsedRegs.stepBackward(*--InstUpToMBBI);
+ }
+
+ // Look for a register that can be directly use in the POP.
+ unsigned PopReg = 0;
+ // And some temporary register, just in case.
+ unsigned TemporaryReg = 0;
+ BitVector PopFriendly =
+ TRI->getAllocatableSet(MF, TRI->getRegClass(ARM::tGPRRegClassID));
+ assert(PopFriendly.any() && "No allocatable pop-friendly register?!");
+ // Rebuild the GPRs from the high registers because they are removed
+ // form the GPR reg class for thumb1.
+ BitVector GPRsNoLRSP =
+ TRI->getAllocatableSet(MF, TRI->getRegClass(ARM::hGPRRegClassID));
+ GPRsNoLRSP |= PopFriendly;
+ GPRsNoLRSP.reset(ARM::LR);
+ GPRsNoLRSP.reset(ARM::SP);
+ GPRsNoLRSP.reset(ARM::PC);
+ for (int Register = GPRsNoLRSP.find_first(); Register != -1;
+ Register = GPRsNoLRSP.find_next(Register)) {
+ if (!UsedRegs.contains(Register)) {
+ // Remember the first pop-friendly register and exit.
+ if (PopFriendly.test(Register)) {
+ PopReg = Register;
+ TemporaryReg = 0;
+ break;
+ }
+ // Otherwise, remember that the register will be available to
+ // save a pop-friendly register.
+ TemporaryReg = Register;
+ }
+ }
+
+ if (!DoIt && !PopReg && !TemporaryReg)
+ return false;
+
+ assert((PopReg || TemporaryReg) && "Cannot get LR");
+
+ if (TemporaryReg) {
+ assert(!PopReg && "Unnecessary MOV is about to be inserted");
+ PopReg = PopFriendly.find_first();
+ AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr))
+ .addReg(TemporaryReg, RegState::Define)
+ .addReg(PopReg, RegState::Kill));
+ }
+
+ if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tPOP_RET) {
+ // We couldn't use the direct restoration above, so
+ // perform the opposite conversion: tPOP_RET to tPOP.
+ MachineInstrBuilder MIB =
+ AddDefaultPred(
+ BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII.get(ARM::tPOP)));
+ bool Popped = false;
+ for (auto MO: MBBI->operands())
+ if (MO.isReg() && (MO.isImplicit() || MO.isDef()) &&
+ MO.getReg() != ARM::PC) {
+ MIB.addOperand(MO);
+ if (!MO.isImplicit())
+ Popped = true;
+ }
+ // Is there anything left to pop?
+ if (!Popped)
+ MBB.erase(MIB.getInstr());
+ // Erase the old instruction.
+ MBB.erase(MBBI);
+ MBBI = AddDefaultPred(BuildMI(MBB, MBB.end(), dl, TII.get(ARM::tBX_RET)));
+ }
+
+ assert(PopReg && "Do not know how to get LR");
+ AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tPOP)))
+ .addReg(PopReg, RegState::Define);
+
+ emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, ArgRegsSaveSize);
+
+ AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr))
+ .addReg(ARM::LR, RegState::Define)
+ .addReg(PopReg, RegState::Kill));
+
+ if (TemporaryReg)
+ AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr))
+ .addReg(PopReg, RegState::Define)
+ .addReg(TemporaryReg, RegState::Kill));
+
+ return true;
+}
+
+bool Thumb1FrameLowering::
+spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const {
+ if (CSI.empty())
+ return false;
+
+ DebugLoc DL;
+ const TargetInstrInfo &TII = *STI.getInstrInfo();
+
+ MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(ARM::tPUSH));
+ AddDefaultPred(MIB);
+ for (unsigned i = CSI.size(); i != 0; --i) {
+ unsigned Reg = CSI[i-1].getReg();
+ bool isKill = true;
+
+ // Add the callee-saved register as live-in unless it's LR and
+ // @llvm.returnaddress is called. If LR is returned for @llvm.returnaddress
+ // then it's already added to the function and entry block live-in sets.
+ if (Reg == ARM::LR) {
+ MachineFunction &MF = *MBB.getParent();
+ if (MF.getFrameInfo()->isReturnAddressTaken() &&
+ MF.getRegInfo().isLiveIn(Reg))
+ isKill = false;
+ }
+
+ if (isKill)
+ MBB.addLiveIn(Reg);
+
+ MIB.addReg(Reg, getKillRegState(isKill));
+ }
+ MIB.setMIFlags(MachineInstr::FrameSetup);
+ return true;
+}
+
+bool Thumb1FrameLowering::
+restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const {
+ if (CSI.empty())
+ return false;
+
+ MachineFunction &MF = *MBB.getParent();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ const TargetInstrInfo &TII = *STI.getInstrInfo();
+
+ bool isVarArg = AFI->getArgRegsSaveSize() > 0;
+ DebugLoc DL = MI != MBB.end() ? MI->getDebugLoc() : DebugLoc();
+ MachineInstrBuilder MIB = BuildMI(MF, DL, TII.get(ARM::tPOP));
+ AddDefaultPred(MIB);
+
+ bool NeedsPop = false;
+ for (unsigned i = CSI.size(); i != 0; --i) {
+ unsigned Reg = CSI[i-1].getReg();
+ if (Reg == ARM::LR) {
+ if (MBB.succ_empty()) {
+ // Special epilogue for vararg functions. See emitEpilogue
+ if (isVarArg)
+ continue;
+ // ARMv4T requires BX, see emitEpilogue
+ if (!STI.hasV5TOps())
+ continue;
+ Reg = ARM::PC;
+ (*MIB).setDesc(TII.get(ARM::tPOP_RET));
+ if (MI != MBB.end())
+ MIB.copyImplicitOps(&*MI);
+ MI = MBB.erase(MI);
+ } else
+ // LR may only be popped into PC, as part of return sequence.
+ // If this isn't the return sequence, we'll need emitPopSpecialFixUp
+ // to restore LR the hard way.
+ continue;
+ }
+ MIB.addReg(Reg, getDefRegState(true));
+ NeedsPop = true;
+ }
+
+ // It's illegal to emit pop instruction without operands.
+ if (NeedsPop)
+ MBB.insert(MI, &*MIB);
+ else
+ MF.DeleteMachineInstr(MIB);
+
+ return true;
+}
diff --git a/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.h b/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.h
new file mode 100644
index 0000000..812f983
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.h
@@ -0,0 +1,88 @@
+//===-- Thumb1FrameLowering.h - Thumb1-specific frame info stuff --*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_ARM_THUMB1FRAMELOWERING_H
+#define LLVM_LIB_TARGET_ARM_THUMB1FRAMELOWERING_H
+
+#include "ARMFrameLowering.h"
+#include "Thumb1InstrInfo.h"
+#include "ThumbRegisterInfo.h"
+#include "llvm/Target/TargetFrameLowering.h"
+
+namespace llvm {
+
+class Thumb1FrameLowering : public ARMFrameLowering {
+public:
+ explicit Thumb1FrameLowering(const ARMSubtarget &sti);
+
+ /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
+ /// the function.
+ void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
+ void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
+
+ bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const override;
+ bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const override;
+
+ bool hasReservedCallFrame(const MachineFunction &MF) const override;
+
+ void
+ eliminateCallFramePseudoInstr(MachineFunction &MF,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI) const override;
+
+ /// Check whether or not the given \p MBB can be used as a epilogue
+ /// for the target.
+ /// The epilogue will be inserted before the first terminator of that block.
+ /// This method is used by the shrink-wrapping pass to decide if
+ /// \p MBB will be correctly handled by the target.
+ bool canUseAsEpilogue(const MachineBasicBlock &MBB) const override;
+
+private:
+ /// Check if the frame lowering of \p MF needs a special fixup
+ /// code sequence for the epilogue.
+ /// Unlike T2 and ARM mode, the T1 pop instruction cannot restore
+ /// to LR, and we can't pop the value directly to the PC when
+ /// we need to update the SP after popping the value. So instead
+ /// we have to emit:
+ /// POP {r3}
+ /// ADD sp, #offset
+ /// BX r3
+ /// If this would clobber a return value, then generate this sequence instead:
+ /// MOV ip, r3
+ /// POP {r3}
+ /// ADD sp, #offset
+ /// MOV lr, r3
+ /// MOV r3, ip
+ /// BX lr
+ bool needPopSpecialFixUp(const MachineFunction &MF) const;
+
+ /// Emit the special fixup code sequence for the epilogue.
+ /// \see needPopSpecialFixUp for more details.
+ /// \p DoIt, tells this method whether or not to actually insert
+ /// the code sequence in \p MBB. I.e., when \p DoIt is false,
+ /// \p MBB is left untouched.
+ /// \returns For \p DoIt == true: True when the emission succeeded
+ /// false otherwise. For \p DoIt == false: True when the emission
+ /// would have been possible, false otherwise.
+ bool emitPopSpecialFixUp(MachineBasicBlock &MBB, bool DoIt) const;
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp b/contrib/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp
new file mode 100644
index 0000000..530e1d3
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp
@@ -0,0 +1,128 @@
+//===-- Thumb1InstrInfo.cpp - Thumb-1 Instruction Information -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Thumb-1 implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARMSubtarget.h"
+#include "Thumb1InstrInfo.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/MC/MCInst.h"
+
+using namespace llvm;
+
+Thumb1InstrInfo::Thumb1InstrInfo(const ARMSubtarget &STI)
+ : ARMBaseInstrInfo(STI), RI() {}
+
+/// getNoopForMachoTarget - Return the noop instruction to use for a noop.
+void Thumb1InstrInfo::getNoopForMachoTarget(MCInst &NopInst) const {
+ NopInst.setOpcode(ARM::tMOVr);
+ NopInst.addOperand(MCOperand::createReg(ARM::R8));
+ NopInst.addOperand(MCOperand::createReg(ARM::R8));
+ NopInst.addOperand(MCOperand::createImm(ARMCC::AL));
+ NopInst.addOperand(MCOperand::createReg(0));
+}
+
+unsigned Thumb1InstrInfo::getUnindexedOpcode(unsigned Opc) const {
+ return 0;
+}
+
+void Thumb1InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const {
+ // Need to check the arch.
+ MachineFunction &MF = *MBB.getParent();
+ const ARMSubtarget &st = MF.getSubtarget<ARMSubtarget>();
+
+ assert(ARM::GPRRegClass.contains(DestReg, SrcReg) &&
+ "Thumb1 can only copy GPR registers");
+
+ if (st.hasV6Ops() || ARM::hGPRRegClass.contains(SrcReg)
+ || !ARM::tGPRRegClass.contains(DestReg))
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::tMOVr), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc)));
+ else {
+ // FIXME: The performance consequences of this are going to be atrocious.
+ // Some things to try that should be better:
+ // * 'mov hi, $src; mov $dst, hi', with hi as either r10 or r11
+ // * 'movs $dst, $src' if cpsr isn't live
+ // See: http://lists.llvm.org/pipermail/llvm-dev/2014-August/075998.html
+
+ // 'MOV lo, lo' is unpredictable on < v6, so use the stack to do it
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::tPUSH)))
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::tPOP)))
+ .addReg(DestReg, getDefRegState(true));
+ }
+}
+
+void Thumb1InstrInfo::
+storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+ unsigned SrcReg, bool isKill, int FI,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
+ assert((RC == &ARM::tGPRRegClass ||
+ (TargetRegisterInfo::isPhysicalRegister(SrcReg) &&
+ isARMLowRegister(SrcReg))) && "Unknown regclass!");
+
+ if (RC == &ARM::tGPRRegClass ||
+ (TargetRegisterInfo::isPhysicalRegister(SrcReg) &&
+ isARMLowRegister(SrcReg))) {
+ DebugLoc DL;
+ if (I != MBB.end()) DL = I->getDebugLoc();
+
+ MachineFunction &MF = *MBB.getParent();
+ MachineFrameInfo &MFI = *MF.getFrameInfo();
+ MachineMemOperand *MMO = MF.getMachineMemOperand(
+ MachinePointerInfo::getFixedStack(MF, FI), MachineMemOperand::MOStore,
+ MFI.getObjectSize(FI), MFI.getObjectAlignment(FI));
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::tSTRspi))
+ .addReg(SrcReg, getKillRegState(isKill))
+ .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
+ }
+}
+
+void Thumb1InstrInfo::
+loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+ unsigned DestReg, int FI,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
+ assert((RC == &ARM::tGPRRegClass ||
+ (TargetRegisterInfo::isPhysicalRegister(DestReg) &&
+ isARMLowRegister(DestReg))) && "Unknown regclass!");
+
+ if (RC == &ARM::tGPRRegClass ||
+ (TargetRegisterInfo::isPhysicalRegister(DestReg) &&
+ isARMLowRegister(DestReg))) {
+ DebugLoc DL;
+ if (I != MBB.end()) DL = I->getDebugLoc();
+
+ MachineFunction &MF = *MBB.getParent();
+ MachineFrameInfo &MFI = *MF.getFrameInfo();
+ MachineMemOperand *MMO = MF.getMachineMemOperand(
+ MachinePointerInfo::getFixedStack(MF, FI), MachineMemOperand::MOLoad,
+ MFI.getObjectSize(FI), MFI.getObjectAlignment(FI));
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::tLDRspi), DestReg)
+ .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
+ }
+}
+
+void
+Thumb1InstrInfo::expandLoadStackGuard(MachineBasicBlock::iterator MI,
+ Reloc::Model RM) const {
+ if (RM == Reloc::PIC_)
+ expandLoadStackGuardBase(MI, ARM::tLDRLIT_ga_pcrel, ARM::tLDRi, RM);
+ else
+ expandLoadStackGuardBase(MI, ARM::tLDRLIT_ga_abs, ARM::tLDRi, RM);
+}
diff --git a/contrib/llvm/lib/Target/ARM/Thumb1InstrInfo.h b/contrib/llvm/lib/Target/ARM/Thumb1InstrInfo.h
new file mode 100644
index 0000000..f3f493d
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/Thumb1InstrInfo.h
@@ -0,0 +1,63 @@
+//===-- Thumb1InstrInfo.h - Thumb-1 Instruction Information -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Thumb-1 implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_ARM_THUMB1INSTRINFO_H
+#define LLVM_LIB_TARGET_ARM_THUMB1INSTRINFO_H
+
+#include "ARMBaseInstrInfo.h"
+#include "ThumbRegisterInfo.h"
+
+namespace llvm {
+ class ARMSubtarget;
+
+class Thumb1InstrInfo : public ARMBaseInstrInfo {
+ ThumbRegisterInfo RI;
+public:
+ explicit Thumb1InstrInfo(const ARMSubtarget &STI);
+
+ /// getNoopForMachoTarget - Return the noop instruction to use for a noop.
+ void getNoopForMachoTarget(MCInst &NopInst) const override;
+
+ // Return the non-pre/post incrementing version of 'Opc'. Return 0
+ // if there is not such an opcode.
+ unsigned getUnindexedOpcode(unsigned Opc) const override;
+
+ /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As
+ /// such, whenever a client has an instance of instruction info, it should
+ /// always be able to get register info as well (through this method).
+ ///
+ const ThumbRegisterInfo &getRegisterInfo() const override { return RI; }
+
+ void copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const override;
+ void storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned SrcReg, bool isKill, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const override;
+
+ void loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned DestReg, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const override;
+
+private:
+ void expandLoadStackGuard(MachineBasicBlock::iterator MI,
+ Reloc::Model RM) const override;
+};
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/ARM/Thumb2ITBlockPass.cpp b/contrib/llvm/lib/Target/ARM/Thumb2ITBlockPass.cpp
new file mode 100644
index 0000000..bf0498d
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/Thumb2ITBlockPass.cpp
@@ -0,0 +1,299 @@
+//===-- Thumb2ITBlockPass.cpp - Insert Thumb-2 IT blocks ------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARM.h"
+#include "ARMMachineFunctionInfo.h"
+#include "Thumb2InstrInfo.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineInstrBundle.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "thumb2-it"
+
+STATISTIC(NumITs, "Number of IT blocks inserted");
+STATISTIC(NumMovedInsts, "Number of predicated instructions moved");
+
+namespace {
+ class Thumb2ITBlockPass : public MachineFunctionPass {
+ public:
+ static char ID;
+ Thumb2ITBlockPass() : MachineFunctionPass(ID) {}
+
+ bool restrictIT;
+ const Thumb2InstrInfo *TII;
+ const TargetRegisterInfo *TRI;
+ ARMFunctionInfo *AFI;
+
+ bool runOnMachineFunction(MachineFunction &Fn) override;
+
+ const char *getPassName() const override {
+ return "Thumb IT blocks insertion pass";
+ }
+
+ private:
+ bool MoveCopyOutOfITBlock(MachineInstr *MI,
+ ARMCC::CondCodes CC, ARMCC::CondCodes OCC,
+ SmallSet<unsigned, 4> &Defs,
+ SmallSet<unsigned, 4> &Uses);
+ bool InsertITInstructions(MachineBasicBlock &MBB);
+ };
+ char Thumb2ITBlockPass::ID = 0;
+}
+
+/// TrackDefUses - Tracking what registers are being defined and used by
+/// instructions in the IT block. This also tracks "dependencies", i.e. uses
+/// in the IT block that are defined before the IT instruction.
+static void TrackDefUses(MachineInstr *MI,
+ SmallSet<unsigned, 4> &Defs,
+ SmallSet<unsigned, 4> &Uses,
+ const TargetRegisterInfo *TRI) {
+ SmallVector<unsigned, 4> LocalDefs;
+ SmallVector<unsigned, 4> LocalUses;
+
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!Reg || Reg == ARM::ITSTATE || Reg == ARM::SP)
+ continue;
+ if (MO.isUse())
+ LocalUses.push_back(Reg);
+ else
+ LocalDefs.push_back(Reg);
+ }
+
+ for (unsigned i = 0, e = LocalUses.size(); i != e; ++i) {
+ unsigned Reg = LocalUses[i];
+ for (MCSubRegIterator Subreg(Reg, TRI, /*IncludeSelf=*/true);
+ Subreg.isValid(); ++Subreg)
+ Uses.insert(*Subreg);
+ }
+
+ for (unsigned i = 0, e = LocalDefs.size(); i != e; ++i) {
+ unsigned Reg = LocalDefs[i];
+ for (MCSubRegIterator Subreg(Reg, TRI, /*IncludeSelf=*/true);
+ Subreg.isValid(); ++Subreg)
+ Defs.insert(*Subreg);
+ if (Reg == ARM::CPSR)
+ continue;
+ }
+}
+
+/// Clear kill flags for any uses in the given set. This will likely
+/// conservatively remove more kill flags than are necessary, but removing them
+/// is safer than incorrect kill flags remaining on instructions.
+static void ClearKillFlags(MachineInstr *MI, SmallSet<unsigned, 4> &Uses) {
+ for (MachineOperand &MO : MI->operands()) {
+ if (!MO.isReg() || MO.isDef() || !MO.isKill())
+ continue;
+ if (!Uses.count(MO.getReg()))
+ continue;
+ MO.setIsKill(false);
+ }
+}
+
+static bool isCopy(MachineInstr *MI) {
+ switch (MI->getOpcode()) {
+ default:
+ return false;
+ case ARM::MOVr:
+ case ARM::MOVr_TC:
+ case ARM::tMOVr:
+ case ARM::t2MOVr:
+ return true;
+ }
+}
+
+bool
+Thumb2ITBlockPass::MoveCopyOutOfITBlock(MachineInstr *MI,
+ ARMCC::CondCodes CC, ARMCC::CondCodes OCC,
+ SmallSet<unsigned, 4> &Defs,
+ SmallSet<unsigned, 4> &Uses) {
+ if (!isCopy(MI))
+ return false;
+ // llvm models select's as two-address instructions. That means a copy
+ // is inserted before a t2MOVccr, etc. If the copy is scheduled in
+ // between selects we would end up creating multiple IT blocks.
+ assert(MI->getOperand(0).getSubReg() == 0 &&
+ MI->getOperand(1).getSubReg() == 0 &&
+ "Sub-register indices still around?");
+
+ unsigned DstReg = MI->getOperand(0).getReg();
+ unsigned SrcReg = MI->getOperand(1).getReg();
+
+ // First check if it's safe to move it.
+ if (Uses.count(DstReg) || Defs.count(SrcReg))
+ return false;
+
+ // If the CPSR is defined by this copy, then we don't want to move it. E.g.,
+ // if we have:
+ //
+ // movs r1, r1
+ // rsb r1, 0
+ // movs r2, r2
+ // rsb r2, 0
+ //
+ // we don't want this to be converted to:
+ //
+ // movs r1, r1
+ // movs r2, r2
+ // itt mi
+ // rsb r1, 0
+ // rsb r2, 0
+ //
+ const MCInstrDesc &MCID = MI->getDesc();
+ if (MI->hasOptionalDef() &&
+ MI->getOperand(MCID.getNumOperands() - 1).getReg() == ARM::CPSR)
+ return false;
+
+ // Then peek at the next instruction to see if it's predicated on CC or OCC.
+ // If not, then there is nothing to be gained by moving the copy.
+ MachineBasicBlock::iterator I = MI; ++I;
+ MachineBasicBlock::iterator E = MI->getParent()->end();
+ while (I != E && I->isDebugValue())
+ ++I;
+ if (I != E) {
+ unsigned NPredReg = 0;
+ ARMCC::CondCodes NCC = getITInstrPredicate(I, NPredReg);
+ if (NCC == CC || NCC == OCC)
+ return true;
+ }
+ return false;
+}
+
+bool Thumb2ITBlockPass::InsertITInstructions(MachineBasicBlock &MBB) {
+ bool Modified = false;
+
+ SmallSet<unsigned, 4> Defs;
+ SmallSet<unsigned, 4> Uses;
+ MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
+ while (MBBI != E) {
+ MachineInstr *MI = &*MBBI;
+ DebugLoc dl = MI->getDebugLoc();
+ unsigned PredReg = 0;
+ ARMCC::CondCodes CC = getITInstrPredicate(MI, PredReg);
+ if (CC == ARMCC::AL) {
+ ++MBBI;
+ continue;
+ }
+
+ Defs.clear();
+ Uses.clear();
+ TrackDefUses(MI, Defs, Uses, TRI);
+
+ // Insert an IT instruction.
+ MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(ARM::t2IT))
+ .addImm(CC);
+
+ // Add implicit use of ITSTATE to IT block instructions.
+ MI->addOperand(MachineOperand::CreateReg(ARM::ITSTATE, false/*ifDef*/,
+ true/*isImp*/, false/*isKill*/));
+
+ MachineInstr *LastITMI = MI;
+ MachineBasicBlock::iterator InsertPos = MIB.getInstr();
+ ++MBBI;
+
+ // Form IT block.
+ ARMCC::CondCodes OCC = ARMCC::getOppositeCondition(CC);
+ unsigned Mask = 0, Pos = 3;
+
+ // v8 IT blocks are limited to one conditional op unless -arm-no-restrict-it
+ // is set: skip the loop
+ if (!restrictIT) {
+ // Branches, including tricky ones like LDM_RET, need to end an IT
+ // block so check the instruction we just put in the block.
+ for (; MBBI != E && Pos &&
+ (!MI->isBranch() && !MI->isReturn()) ; ++MBBI) {
+ if (MBBI->isDebugValue())
+ continue;
+
+ MachineInstr *NMI = &*MBBI;
+ MI = NMI;
+
+ unsigned NPredReg = 0;
+ ARMCC::CondCodes NCC = getITInstrPredicate(NMI, NPredReg);
+ if (NCC == CC || NCC == OCC) {
+ Mask |= (NCC & 1) << Pos;
+ // Add implicit use of ITSTATE.
+ NMI->addOperand(MachineOperand::CreateReg(ARM::ITSTATE, false/*ifDef*/,
+ true/*isImp*/, false/*isKill*/));
+ LastITMI = NMI;
+ } else {
+ if (NCC == ARMCC::AL &&
+ MoveCopyOutOfITBlock(NMI, CC, OCC, Defs, Uses)) {
+ --MBBI;
+ MBB.remove(NMI);
+ MBB.insert(InsertPos, NMI);
+ ClearKillFlags(MI, Uses);
+ ++NumMovedInsts;
+ continue;
+ }
+ break;
+ }
+ TrackDefUses(NMI, Defs, Uses, TRI);
+ --Pos;
+ }
+ }
+
+ // Finalize IT mask.
+ Mask |= (1 << Pos);
+ // Tag along (firstcond[0] << 4) with the mask.
+ Mask |= (CC & 1) << 4;
+ MIB.addImm(Mask);
+
+ // Last instruction in IT block kills ITSTATE.
+ LastITMI->findRegisterUseOperand(ARM::ITSTATE)->setIsKill();
+
+ // Finalize the bundle.
+ finalizeBundle(MBB, InsertPos.getInstrIterator(),
+ ++LastITMI->getIterator());
+
+ Modified = true;
+ ++NumITs;
+ }
+
+ return Modified;
+}
+
+bool Thumb2ITBlockPass::runOnMachineFunction(MachineFunction &Fn) {
+ const ARMSubtarget &STI =
+ static_cast<const ARMSubtarget &>(Fn.getSubtarget());
+ if (!STI.isThumb2())
+ return false;
+ AFI = Fn.getInfo<ARMFunctionInfo>();
+ TII = static_cast<const Thumb2InstrInfo *>(STI.getInstrInfo());
+ TRI = STI.getRegisterInfo();
+ restrictIT = STI.restrictIT();
+
+ if (!AFI->isThumbFunction())
+ return false;
+
+ bool Modified = false;
+ for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; ) {
+ MachineBasicBlock &MBB = *MFI;
+ ++MFI;
+ Modified |= InsertITInstructions(MBB);
+ }
+
+ if (Modified)
+ AFI->setHasITBlocks(true);
+
+ return Modified;
+}
+
+/// createThumb2ITBlockPass - Returns an instance of the Thumb2 IT blocks
+/// insertion pass.
+FunctionPass *llvm::createThumb2ITBlockPass() {
+ return new Thumb2ITBlockPass();
+}
diff --git a/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp b/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp
new file mode 100644
index 0000000..4da769f
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp
@@ -0,0 +1,636 @@
+//===-- Thumb2InstrInfo.cpp - Thumb-2 Instruction Information -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Thumb-2 implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Thumb2InstrInfo.h"
+#include "ARMConstantPoolValue.h"
+#include "ARMMachineFunctionInfo.h"
+#include "MCTargetDesc/ARMAddressingModes.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/Support/CommandLine.h"
+
+using namespace llvm;
+
+static cl::opt<bool>
+OldT2IfCvt("old-thumb2-ifcvt", cl::Hidden,
+ cl::desc("Use old-style Thumb2 if-conversion heuristics"),
+ cl::init(false));
+
+Thumb2InstrInfo::Thumb2InstrInfo(const ARMSubtarget &STI)
+ : ARMBaseInstrInfo(STI), RI() {}
+
+/// getNoopForMachoTarget - Return the noop instruction to use for a noop.
+void Thumb2InstrInfo::getNoopForMachoTarget(MCInst &NopInst) const {
+ NopInst.setOpcode(ARM::tHINT);
+ NopInst.addOperand(MCOperand::createImm(0));
+ NopInst.addOperand(MCOperand::createImm(ARMCC::AL));
+ NopInst.addOperand(MCOperand::createReg(0));
+}
+
+unsigned Thumb2InstrInfo::getUnindexedOpcode(unsigned Opc) const {
+ // FIXME
+ return 0;
+}
+
+void
+Thumb2InstrInfo::ReplaceTailWithBranchTo(MachineBasicBlock::iterator Tail,
+ MachineBasicBlock *NewDest) const {
+ MachineBasicBlock *MBB = Tail->getParent();
+ ARMFunctionInfo *AFI = MBB->getParent()->getInfo<ARMFunctionInfo>();
+ if (!AFI->hasITBlocks()) {
+ TargetInstrInfo::ReplaceTailWithBranchTo(Tail, NewDest);
+ return;
+ }
+
+ // If the first instruction of Tail is predicated, we may have to update
+ // the IT instruction.
+ unsigned PredReg = 0;
+ ARMCC::CondCodes CC = getInstrPredicate(Tail, PredReg);
+ MachineBasicBlock::iterator MBBI = Tail;
+ if (CC != ARMCC::AL)
+ // Expecting at least the t2IT instruction before it.
+ --MBBI;
+
+ // Actually replace the tail.
+ TargetInstrInfo::ReplaceTailWithBranchTo(Tail, NewDest);
+
+ // Fix up IT.
+ if (CC != ARMCC::AL) {
+ MachineBasicBlock::iterator E = MBB->begin();
+ unsigned Count = 4; // At most 4 instructions in an IT block.
+ while (Count && MBBI != E) {
+ if (MBBI->isDebugValue()) {
+ --MBBI;
+ continue;
+ }
+ if (MBBI->getOpcode() == ARM::t2IT) {
+ unsigned Mask = MBBI->getOperand(1).getImm();
+ if (Count == 4)
+ MBBI->eraseFromParent();
+ else {
+ unsigned MaskOn = 1 << Count;
+ unsigned MaskOff = ~(MaskOn - 1);
+ MBBI->getOperand(1).setImm((Mask & MaskOff) | MaskOn);
+ }
+ return;
+ }
+ --MBBI;
+ --Count;
+ }
+
+ // Ctrl flow can reach here if branch folding is run before IT block
+ // formation pass.
+ }
+}
+
+bool
+Thumb2InstrInfo::isLegalToSplitMBBAt(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI) const {
+ while (MBBI->isDebugValue()) {
+ ++MBBI;
+ if (MBBI == MBB.end())
+ return false;
+ }
+
+ unsigned PredReg = 0;
+ return getITInstrPredicate(MBBI, PredReg) == ARMCC::AL;
+}
+
+void Thumb2InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const {
+ // Handle SPR, DPR, and QPR copies.
+ if (!ARM::GPRRegClass.contains(DestReg, SrcReg))
+ return ARMBaseInstrInfo::copyPhysReg(MBB, I, DL, DestReg, SrcReg, KillSrc);
+
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::tMOVr), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc)));
+}
+
+void Thumb2InstrInfo::
+storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+ unsigned SrcReg, bool isKill, int FI,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
+ DebugLoc DL;
+ if (I != MBB.end()) DL = I->getDebugLoc();
+
+ MachineFunction &MF = *MBB.getParent();
+ MachineFrameInfo &MFI = *MF.getFrameInfo();
+ MachineMemOperand *MMO = MF.getMachineMemOperand(
+ MachinePointerInfo::getFixedStack(MF, FI), MachineMemOperand::MOStore,
+ MFI.getObjectSize(FI), MFI.getObjectAlignment(FI));
+
+ if (RC == &ARM::GPRRegClass || RC == &ARM::tGPRRegClass ||
+ RC == &ARM::tcGPRRegClass || RC == &ARM::rGPRRegClass ||
+ RC == &ARM::GPRnopcRegClass) {
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::t2STRi12))
+ .addReg(SrcReg, getKillRegState(isKill))
+ .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
+ return;
+ }
+
+ if (ARM::GPRPairRegClass.hasSubClassEq(RC)) {
+ // Thumb2 STRD expects its dest-registers to be in rGPR. Not a problem for
+ // gsub_0, but needs an extra constraint for gsub_1 (which could be sp
+ // otherwise).
+ MachineRegisterInfo *MRI = &MF.getRegInfo();
+ MRI->constrainRegClass(SrcReg, &ARM::GPRPair_with_gsub_1_in_rGPRRegClass);
+
+ MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::t2STRDi8));
+ AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill), TRI);
+ AddDReg(MIB, SrcReg, ARM::gsub_1, 0, TRI);
+ MIB.addFrameIndex(FI).addImm(0).addMemOperand(MMO);
+ AddDefaultPred(MIB);
+ return;
+ }
+
+ ARMBaseInstrInfo::storeRegToStackSlot(MBB, I, SrcReg, isKill, FI, RC, TRI);
+}
+
+void Thumb2InstrInfo::
+loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+ unsigned DestReg, int FI,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
+ MachineFunction &MF = *MBB.getParent();
+ MachineFrameInfo &MFI = *MF.getFrameInfo();
+ MachineMemOperand *MMO = MF.getMachineMemOperand(
+ MachinePointerInfo::getFixedStack(MF, FI), MachineMemOperand::MOLoad,
+ MFI.getObjectSize(FI), MFI.getObjectAlignment(FI));
+ DebugLoc DL;
+ if (I != MBB.end()) DL = I->getDebugLoc();
+
+ if (RC == &ARM::GPRRegClass || RC == &ARM::tGPRRegClass ||
+ RC == &ARM::tcGPRRegClass || RC == &ARM::rGPRRegClass ||
+ RC == &ARM::GPRnopcRegClass) {
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::t2LDRi12), DestReg)
+ .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
+ return;
+ }
+
+ if (ARM::GPRPairRegClass.hasSubClassEq(RC)) {
+ // Thumb2 LDRD expects its dest-registers to be in rGPR. Not a problem for
+ // gsub_0, but needs an extra constraint for gsub_1 (which could be sp
+ // otherwise).
+ MachineRegisterInfo *MRI = &MF.getRegInfo();
+ MRI->constrainRegClass(DestReg, &ARM::GPRPair_with_gsub_1_in_rGPRRegClass);
+
+ MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::t2LDRDi8));
+ AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead, TRI);
+ AddDReg(MIB, DestReg, ARM::gsub_1, RegState::DefineNoRead, TRI);
+ MIB.addFrameIndex(FI).addImm(0).addMemOperand(MMO);
+ AddDefaultPred(MIB);
+
+ if (TargetRegisterInfo::isPhysicalRegister(DestReg))
+ MIB.addReg(DestReg, RegState::ImplicitDefine);
+ return;
+ }
+
+ ARMBaseInstrInfo::loadRegFromStackSlot(MBB, I, DestReg, FI, RC, TRI);
+}
+
+void
+Thumb2InstrInfo::expandLoadStackGuard(MachineBasicBlock::iterator MI,
+ Reloc::Model RM) const {
+ if (RM == Reloc::PIC_)
+ expandLoadStackGuardBase(MI, ARM::t2MOV_ga_pcrel, ARM::t2LDRi12, RM);
+ else
+ expandLoadStackGuardBase(MI, ARM::t2MOVi32imm, ARM::t2LDRi12, RM);
+}
+
+void llvm::emitT2RegPlusImmediate(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI, DebugLoc dl,
+ unsigned DestReg, unsigned BaseReg, int NumBytes,
+ ARMCC::CondCodes Pred, unsigned PredReg,
+ const ARMBaseInstrInfo &TII, unsigned MIFlags) {
+ if (NumBytes == 0 && DestReg != BaseReg) {
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), DestReg)
+ .addReg(BaseReg, RegState::Kill)
+ .addImm((unsigned)Pred).addReg(PredReg).setMIFlags(MIFlags);
+ return;
+ }
+
+ bool isSub = NumBytes < 0;
+ if (isSub) NumBytes = -NumBytes;
+
+ // If profitable, use a movw or movt to materialize the offset.
+ // FIXME: Use the scavenger to grab a scratch register.
+ if (DestReg != ARM::SP && DestReg != BaseReg &&
+ NumBytes >= 4096 &&
+ ARM_AM::getT2SOImmVal(NumBytes) == -1) {
+ bool Fits = false;
+ if (NumBytes < 65536) {
+ // Use a movw to materialize the 16-bit constant.
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi16), DestReg)
+ .addImm(NumBytes)
+ .addImm((unsigned)Pred).addReg(PredReg).setMIFlags(MIFlags);
+ Fits = true;
+ } else if ((NumBytes & 0xffff) == 0) {
+ // Use a movt to materialize the 32-bit constant.
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVTi16), DestReg)
+ .addReg(DestReg)
+ .addImm(NumBytes >> 16)
+ .addImm((unsigned)Pred).addReg(PredReg).setMIFlags(MIFlags);
+ Fits = true;
+ }
+
+ if (Fits) {
+ if (isSub) {
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::t2SUBrr), DestReg)
+ .addReg(BaseReg)
+ .addReg(DestReg, RegState::Kill)
+ .addImm((unsigned)Pred).addReg(PredReg).addReg(0)
+ .setMIFlags(MIFlags);
+ } else {
+ // Here we know that DestReg is not SP but we do not
+ // know anything about BaseReg. t2ADDrr is an invalid
+ // instruction is SP is used as the second argument, but
+ // is fine if SP is the first argument. To be sure we
+ // do not generate invalid encoding, put BaseReg first.
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::t2ADDrr), DestReg)
+ .addReg(BaseReg)
+ .addReg(DestReg, RegState::Kill)
+ .addImm((unsigned)Pred).addReg(PredReg).addReg(0)
+ .setMIFlags(MIFlags);
+ }
+ return;
+ }
+ }
+
+ while (NumBytes) {
+ unsigned ThisVal = NumBytes;
+ unsigned Opc = 0;
+ if (DestReg == ARM::SP && BaseReg != ARM::SP) {
+ // mov sp, rn. Note t2MOVr cannot be used.
+ AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr),DestReg)
+ .addReg(BaseReg).setMIFlags(MIFlags));
+ BaseReg = ARM::SP;
+ continue;
+ }
+
+ bool HasCCOut = true;
+ if (BaseReg == ARM::SP) {
+ // sub sp, sp, #imm7
+ if (DestReg == ARM::SP && (ThisVal < ((1 << 7)-1) * 4)) {
+ assert((ThisVal & 3) == 0 && "Stack update is not multiple of 4?");
+ Opc = isSub ? ARM::tSUBspi : ARM::tADDspi;
+ AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg)
+ .addReg(BaseReg).addImm(ThisVal/4).setMIFlags(MIFlags));
+ NumBytes = 0;
+ continue;
+ }
+
+ // sub rd, sp, so_imm
+ Opc = isSub ? ARM::t2SUBri : ARM::t2ADDri;
+ if (ARM_AM::getT2SOImmVal(NumBytes) != -1) {
+ NumBytes = 0;
+ } else {
+ // FIXME: Move this to ARMAddressingModes.h?
+ unsigned RotAmt = countLeadingZeros(ThisVal);
+ ThisVal = ThisVal & ARM_AM::rotr32(0xff000000U, RotAmt);
+ NumBytes &= ~ThisVal;
+ assert(ARM_AM::getT2SOImmVal(ThisVal) != -1 &&
+ "Bit extraction didn't work?");
+ }
+ } else {
+ assert(DestReg != ARM::SP && BaseReg != ARM::SP);
+ Opc = isSub ? ARM::t2SUBri : ARM::t2ADDri;
+ if (ARM_AM::getT2SOImmVal(NumBytes) != -1) {
+ NumBytes = 0;
+ } else if (ThisVal < 4096) {
+ Opc = isSub ? ARM::t2SUBri12 : ARM::t2ADDri12;
+ HasCCOut = false;
+ NumBytes = 0;
+ } else {
+ // FIXME: Move this to ARMAddressingModes.h?
+ unsigned RotAmt = countLeadingZeros(ThisVal);
+ ThisVal = ThisVal & ARM_AM::rotr32(0xff000000U, RotAmt);
+ NumBytes &= ~ThisVal;
+ assert(ARM_AM::getT2SOImmVal(ThisVal) != -1 &&
+ "Bit extraction didn't work?");
+ }
+ }
+
+ // Build the new ADD / SUB.
+ MachineInstrBuilder MIB =
+ AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg)
+ .addReg(BaseReg, RegState::Kill)
+ .addImm(ThisVal)).setMIFlags(MIFlags);
+ if (HasCCOut)
+ AddDefaultCC(MIB);
+
+ BaseReg = DestReg;
+ }
+}
+
+static unsigned
+negativeOffsetOpcode(unsigned opcode)
+{
+ switch (opcode) {
+ case ARM::t2LDRi12: return ARM::t2LDRi8;
+ case ARM::t2LDRHi12: return ARM::t2LDRHi8;
+ case ARM::t2LDRBi12: return ARM::t2LDRBi8;
+ case ARM::t2LDRSHi12: return ARM::t2LDRSHi8;
+ case ARM::t2LDRSBi12: return ARM::t2LDRSBi8;
+ case ARM::t2STRi12: return ARM::t2STRi8;
+ case ARM::t2STRBi12: return ARM::t2STRBi8;
+ case ARM::t2STRHi12: return ARM::t2STRHi8;
+ case ARM::t2PLDi12: return ARM::t2PLDi8;
+
+ case ARM::t2LDRi8:
+ case ARM::t2LDRHi8:
+ case ARM::t2LDRBi8:
+ case ARM::t2LDRSHi8:
+ case ARM::t2LDRSBi8:
+ case ARM::t2STRi8:
+ case ARM::t2STRBi8:
+ case ARM::t2STRHi8:
+ case ARM::t2PLDi8:
+ return opcode;
+
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+static unsigned
+positiveOffsetOpcode(unsigned opcode)
+{
+ switch (opcode) {
+ case ARM::t2LDRi8: return ARM::t2LDRi12;
+ case ARM::t2LDRHi8: return ARM::t2LDRHi12;
+ case ARM::t2LDRBi8: return ARM::t2LDRBi12;
+ case ARM::t2LDRSHi8: return ARM::t2LDRSHi12;
+ case ARM::t2LDRSBi8: return ARM::t2LDRSBi12;
+ case ARM::t2STRi8: return ARM::t2STRi12;
+ case ARM::t2STRBi8: return ARM::t2STRBi12;
+ case ARM::t2STRHi8: return ARM::t2STRHi12;
+ case ARM::t2PLDi8: return ARM::t2PLDi12;
+
+ case ARM::t2LDRi12:
+ case ARM::t2LDRHi12:
+ case ARM::t2LDRBi12:
+ case ARM::t2LDRSHi12:
+ case ARM::t2LDRSBi12:
+ case ARM::t2STRi12:
+ case ARM::t2STRBi12:
+ case ARM::t2STRHi12:
+ case ARM::t2PLDi12:
+ return opcode;
+
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+static unsigned
+immediateOffsetOpcode(unsigned opcode)
+{
+ switch (opcode) {
+ case ARM::t2LDRs: return ARM::t2LDRi12;
+ case ARM::t2LDRHs: return ARM::t2LDRHi12;
+ case ARM::t2LDRBs: return ARM::t2LDRBi12;
+ case ARM::t2LDRSHs: return ARM::t2LDRSHi12;
+ case ARM::t2LDRSBs: return ARM::t2LDRSBi12;
+ case ARM::t2STRs: return ARM::t2STRi12;
+ case ARM::t2STRBs: return ARM::t2STRBi12;
+ case ARM::t2STRHs: return ARM::t2STRHi12;
+ case ARM::t2PLDs: return ARM::t2PLDi12;
+
+ case ARM::t2LDRi12:
+ case ARM::t2LDRHi12:
+ case ARM::t2LDRBi12:
+ case ARM::t2LDRSHi12:
+ case ARM::t2LDRSBi12:
+ case ARM::t2STRi12:
+ case ARM::t2STRBi12:
+ case ARM::t2STRHi12:
+ case ARM::t2PLDi12:
+ case ARM::t2LDRi8:
+ case ARM::t2LDRHi8:
+ case ARM::t2LDRBi8:
+ case ARM::t2LDRSHi8:
+ case ARM::t2LDRSBi8:
+ case ARM::t2STRi8:
+ case ARM::t2STRBi8:
+ case ARM::t2STRHi8:
+ case ARM::t2PLDi8:
+ return opcode;
+
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+bool llvm::rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
+ unsigned FrameReg, int &Offset,
+ const ARMBaseInstrInfo &TII) {
+ unsigned Opcode = MI.getOpcode();
+ const MCInstrDesc &Desc = MI.getDesc();
+ unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask);
+ bool isSub = false;
+
+ // Memory operands in inline assembly always use AddrModeT2_i12.
+ if (Opcode == ARM::INLINEASM)
+ AddrMode = ARMII::AddrModeT2_i12; // FIXME. mode for thumb2?
+
+ if (Opcode == ARM::t2ADDri || Opcode == ARM::t2ADDri12) {
+ Offset += MI.getOperand(FrameRegIdx+1).getImm();
+
+ unsigned PredReg;
+ if (Offset == 0 && getInstrPredicate(&MI, PredReg) == ARMCC::AL) {
+ // Turn it into a move.
+ MI.setDesc(TII.get(ARM::tMOVr));
+ MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
+ // Remove offset and remaining explicit predicate operands.
+ do MI.RemoveOperand(FrameRegIdx+1);
+ while (MI.getNumOperands() > FrameRegIdx+1);
+ MachineInstrBuilder MIB(*MI.getParent()->getParent(), &MI);
+ AddDefaultPred(MIB);
+ return true;
+ }
+
+ bool HasCCOut = Opcode != ARM::t2ADDri12;
+
+ if (Offset < 0) {
+ Offset = -Offset;
+ isSub = true;
+ MI.setDesc(TII.get(ARM::t2SUBri));
+ } else {
+ MI.setDesc(TII.get(ARM::t2ADDri));
+ }
+
+ // Common case: small offset, fits into instruction.
+ if (ARM_AM::getT2SOImmVal(Offset) != -1) {
+ MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
+ MI.getOperand(FrameRegIdx+1).ChangeToImmediate(Offset);
+ // Add cc_out operand if the original instruction did not have one.
+ if (!HasCCOut)
+ MI.addOperand(MachineOperand::CreateReg(0, false));
+ Offset = 0;
+ return true;
+ }
+ // Another common case: imm12.
+ if (Offset < 4096 &&
+ (!HasCCOut || MI.getOperand(MI.getNumOperands()-1).getReg() == 0)) {
+ unsigned NewOpc = isSub ? ARM::t2SUBri12 : ARM::t2ADDri12;
+ MI.setDesc(TII.get(NewOpc));
+ MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
+ MI.getOperand(FrameRegIdx+1).ChangeToImmediate(Offset);
+ // Remove the cc_out operand.
+ if (HasCCOut)
+ MI.RemoveOperand(MI.getNumOperands()-1);
+ Offset = 0;
+ return true;
+ }
+
+ // Otherwise, extract 8 adjacent bits from the immediate into this
+ // t2ADDri/t2SUBri.
+ unsigned RotAmt = countLeadingZeros<unsigned>(Offset);
+ unsigned ThisImmVal = Offset & ARM_AM::rotr32(0xff000000U, RotAmt);
+
+ // We will handle these bits from offset, clear them.
+ Offset &= ~ThisImmVal;
+
+ assert(ARM_AM::getT2SOImmVal(ThisImmVal) != -1 &&
+ "Bit extraction didn't work?");
+ MI.getOperand(FrameRegIdx+1).ChangeToImmediate(ThisImmVal);
+ // Add cc_out operand if the original instruction did not have one.
+ if (!HasCCOut)
+ MI.addOperand(MachineOperand::CreateReg(0, false));
+
+ } else {
+
+ // AddrMode4 and AddrMode6 cannot handle any offset.
+ if (AddrMode == ARMII::AddrMode4 || AddrMode == ARMII::AddrMode6)
+ return false;
+
+ // AddrModeT2_so cannot handle any offset. If there is no offset
+ // register then we change to an immediate version.
+ unsigned NewOpc = Opcode;
+ if (AddrMode == ARMII::AddrModeT2_so) {
+ unsigned OffsetReg = MI.getOperand(FrameRegIdx+1).getReg();
+ if (OffsetReg != 0) {
+ MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
+ return Offset == 0;
+ }
+
+ MI.RemoveOperand(FrameRegIdx+1);
+ MI.getOperand(FrameRegIdx+1).ChangeToImmediate(0);
+ NewOpc = immediateOffsetOpcode(Opcode);
+ AddrMode = ARMII::AddrModeT2_i12;
+ }
+
+ unsigned NumBits = 0;
+ unsigned Scale = 1;
+ if (AddrMode == ARMII::AddrModeT2_i8 || AddrMode == ARMII::AddrModeT2_i12) {
+ // i8 supports only negative, and i12 supports only positive, so
+ // based on Offset sign convert Opcode to the appropriate
+ // instruction
+ Offset += MI.getOperand(FrameRegIdx+1).getImm();
+ if (Offset < 0) {
+ NewOpc = negativeOffsetOpcode(Opcode);
+ NumBits = 8;
+ isSub = true;
+ Offset = -Offset;
+ } else {
+ NewOpc = positiveOffsetOpcode(Opcode);
+ NumBits = 12;
+ }
+ } else if (AddrMode == ARMII::AddrMode5) {
+ // VFP address mode.
+ const MachineOperand &OffOp = MI.getOperand(FrameRegIdx+1);
+ int InstrOffs = ARM_AM::getAM5Offset(OffOp.getImm());
+ if (ARM_AM::getAM5Op(OffOp.getImm()) == ARM_AM::sub)
+ InstrOffs *= -1;
+ NumBits = 8;
+ Scale = 4;
+ Offset += InstrOffs * 4;
+ assert((Offset & (Scale-1)) == 0 && "Can't encode this offset!");
+ if (Offset < 0) {
+ Offset = -Offset;
+ isSub = true;
+ }
+ } else if (AddrMode == ARMII::AddrModeT2_i8s4) {
+ Offset += MI.getOperand(FrameRegIdx + 1).getImm() * 4;
+ NumBits = 10; // 8 bits scaled by 4
+ // MCInst operand expects already scaled value.
+ Scale = 1;
+ assert((Offset & 3) == 0 && "Can't encode this offset!");
+ } else {
+ llvm_unreachable("Unsupported addressing mode!");
+ }
+
+ if (NewOpc != Opcode)
+ MI.setDesc(TII.get(NewOpc));
+
+ MachineOperand &ImmOp = MI.getOperand(FrameRegIdx+1);
+
+ // Attempt to fold address computation
+ // Common case: small offset, fits into instruction.
+ int ImmedOffset = Offset / Scale;
+ unsigned Mask = (1 << NumBits) - 1;
+ if ((unsigned)Offset <= Mask * Scale) {
+ // Replace the FrameIndex with fp/sp
+ MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
+ if (isSub) {
+ if (AddrMode == ARMII::AddrMode5)
+ // FIXME: Not consistent.
+ ImmedOffset |= 1 << NumBits;
+ else
+ ImmedOffset = -ImmedOffset;
+ }
+ ImmOp.ChangeToImmediate(ImmedOffset);
+ Offset = 0;
+ return true;
+ }
+
+ // Otherwise, offset doesn't fit. Pull in what we can to simplify
+ ImmedOffset = ImmedOffset & Mask;
+ if (isSub) {
+ if (AddrMode == ARMII::AddrMode5)
+ // FIXME: Not consistent.
+ ImmedOffset |= 1 << NumBits;
+ else {
+ ImmedOffset = -ImmedOffset;
+ if (ImmedOffset == 0)
+ // Change the opcode back if the encoded offset is zero.
+ MI.setDesc(TII.get(positiveOffsetOpcode(NewOpc)));
+ }
+ }
+ ImmOp.ChangeToImmediate(ImmedOffset);
+ Offset &= ~(Mask*Scale);
+ }
+
+ Offset = (isSub) ? -Offset : Offset;
+ return Offset == 0;
+}
+
+ARMCC::CondCodes
+llvm::getITInstrPredicate(const MachineInstr *MI, unsigned &PredReg) {
+ unsigned Opc = MI->getOpcode();
+ if (Opc == ARM::tBcc || Opc == ARM::t2Bcc)
+ return ARMCC::AL;
+ return getInstrPredicate(MI, PredReg);
+}
diff --git a/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.h b/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.h
new file mode 100644
index 0000000..916ab06
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.h
@@ -0,0 +1,78 @@
+//===-- Thumb2InstrInfo.h - Thumb-2 Instruction Information -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Thumb-2 implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_ARM_THUMB2INSTRINFO_H
+#define LLVM_LIB_TARGET_ARM_THUMB2INSTRINFO_H
+
+#include "ARMBaseInstrInfo.h"
+#include "ThumbRegisterInfo.h"
+
+namespace llvm {
+class ARMSubtarget;
+class ScheduleHazardRecognizer;
+
+class Thumb2InstrInfo : public ARMBaseInstrInfo {
+ ThumbRegisterInfo RI;
+public:
+ explicit Thumb2InstrInfo(const ARMSubtarget &STI);
+
+ /// getNoopForMachoTarget - Return the noop instruction to use for a noop.
+ void getNoopForMachoTarget(MCInst &NopInst) const override;
+
+ // Return the non-pre/post incrementing version of 'Opc'. Return 0
+ // if there is not such an opcode.
+ unsigned getUnindexedOpcode(unsigned Opc) const override;
+
+ void ReplaceTailWithBranchTo(MachineBasicBlock::iterator Tail,
+ MachineBasicBlock *NewDest) const override;
+
+ bool isLegalToSplitMBBAt(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI) const override;
+
+ void copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const override;
+
+ void storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned SrcReg, bool isKill, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const override;
+
+ void loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned DestReg, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const override;
+
+ /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As
+ /// such, whenever a client has an instance of instruction info, it should
+ /// always be able to get register info as well (through this method).
+ ///
+ const ThumbRegisterInfo &getRegisterInfo() const override { return RI; }
+
+private:
+ void expandLoadStackGuard(MachineBasicBlock::iterator MI,
+ Reloc::Model RM) const override;
+};
+
+/// getITInstrPredicate - Valid only in Thumb2 mode. This function is identical
+/// to llvm::getInstrPredicate except it returns AL for conditional branch
+/// instructions which are "predicated", but are not in IT blocks.
+ARMCC::CondCodes getITInstrPredicate(const MachineInstr *MI, unsigned &PredReg);
+
+
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp b/contrib/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp
new file mode 100644
index 0000000..bcd0e57
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp
@@ -0,0 +1,1054 @@
+//===-- Thumb2SizeReduction.cpp - Thumb2 code size reduction pass -*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARM.h"
+#include "ARMBaseInstrInfo.h"
+#include "ARMSubtarget.h"
+#include "MCTargetDesc/ARMAddressingModes.h"
+#include "Thumb2InstrInfo.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/IR/Function.h" // To access Function attributes
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "t2-reduce-size"
+
+STATISTIC(NumNarrows, "Number of 32-bit instrs reduced to 16-bit ones");
+STATISTIC(Num2Addrs, "Number of 32-bit instrs reduced to 2addr 16-bit ones");
+STATISTIC(NumLdSts, "Number of 32-bit load / store reduced to 16-bit ones");
+
+static cl::opt<int> ReduceLimit("t2-reduce-limit",
+ cl::init(-1), cl::Hidden);
+static cl::opt<int> ReduceLimit2Addr("t2-reduce-limit2",
+ cl::init(-1), cl::Hidden);
+static cl::opt<int> ReduceLimitLdSt("t2-reduce-limit3",
+ cl::init(-1), cl::Hidden);
+
+namespace {
+ /// ReduceTable - A static table with information on mapping from wide
+ /// opcodes to narrow
+ struct ReduceEntry {
+ uint16_t WideOpc; // Wide opcode
+ uint16_t NarrowOpc1; // Narrow opcode to transform to
+ uint16_t NarrowOpc2; // Narrow opcode when it's two-address
+ uint8_t Imm1Limit; // Limit of immediate field (bits)
+ uint8_t Imm2Limit; // Limit of immediate field when it's two-address
+ unsigned LowRegs1 : 1; // Only possible if low-registers are used
+ unsigned LowRegs2 : 1; // Only possible if low-registers are used (2addr)
+ unsigned PredCC1 : 2; // 0 - If predicated, cc is on and vice versa.
+ // 1 - No cc field.
+ // 2 - Always set CPSR.
+ unsigned PredCC2 : 2;
+ unsigned PartFlag : 1; // 16-bit instruction does partial flag update
+ unsigned Special : 1; // Needs to be dealt with specially
+ unsigned AvoidMovs: 1; // Avoid movs with shifter operand (for Swift)
+ };
+
+ static const ReduceEntry ReduceTable[] = {
+ // Wide, Narrow1, Narrow2, imm1,imm2, lo1, lo2, P/C,PF,S,AM
+ { ARM::t2ADCrr, 0, ARM::tADC, 0, 0, 0, 1, 0,0, 0,0,0 },
+ { ARM::t2ADDri, ARM::tADDi3, ARM::tADDi8, 3, 8, 1, 1, 0,0, 0,1,0 },
+ { ARM::t2ADDrr, ARM::tADDrr, ARM::tADDhirr, 0, 0, 1, 0, 0,1, 0,0,0 },
+ { ARM::t2ADDSri,ARM::tADDi3, ARM::tADDi8, 3, 8, 1, 1, 2,2, 0,1,0 },
+ { ARM::t2ADDSrr,ARM::tADDrr, 0, 0, 0, 1, 0, 2,0, 0,1,0 },
+ { ARM::t2ANDrr, 0, ARM::tAND, 0, 0, 0, 1, 0,0, 1,0,0 },
+ { ARM::t2ASRri, ARM::tASRri, 0, 5, 0, 1, 0, 0,0, 1,0,1 },
+ { ARM::t2ASRrr, 0, ARM::tASRrr, 0, 0, 0, 1, 0,0, 1,0,1 },
+ { ARM::t2BICrr, 0, ARM::tBIC, 0, 0, 0, 1, 0,0, 1,0,0 },
+ //FIXME: Disable CMN, as CCodes are backwards from compare expectations
+ //{ ARM::t2CMNrr, ARM::tCMN, 0, 0, 0, 1, 0, 2,0, 0,0,0 },
+ { ARM::t2CMNzrr, ARM::tCMNz, 0, 0, 0, 1, 0, 2,0, 0,0,0 },
+ { ARM::t2CMPri, ARM::tCMPi8, 0, 8, 0, 1, 0, 2,0, 0,0,0 },
+ { ARM::t2CMPrr, ARM::tCMPhir, 0, 0, 0, 0, 0, 2,0, 0,1,0 },
+ { ARM::t2EORrr, 0, ARM::tEOR, 0, 0, 0, 1, 0,0, 1,0,0 },
+ // FIXME: adr.n immediate offset must be multiple of 4.
+ //{ ARM::t2LEApcrelJT,ARM::tLEApcrelJT, 0, 0, 0, 1, 0, 1,0, 0,0,0 },
+ { ARM::t2LSLri, ARM::tLSLri, 0, 5, 0, 1, 0, 0,0, 1,0,1 },
+ { ARM::t2LSLrr, 0, ARM::tLSLrr, 0, 0, 0, 1, 0,0, 1,0,1 },
+ { ARM::t2LSRri, ARM::tLSRri, 0, 5, 0, 1, 0, 0,0, 1,0,1 },
+ { ARM::t2LSRrr, 0, ARM::tLSRrr, 0, 0, 0, 1, 0,0, 1,0,1 },
+ { ARM::t2MOVi, ARM::tMOVi8, 0, 8, 0, 1, 0, 0,0, 1,0,0 },
+ { ARM::t2MOVi16,ARM::tMOVi8, 0, 8, 0, 1, 0, 0,0, 1,1,0 },
+ // FIXME: Do we need the 16-bit 'S' variant?
+ { ARM::t2MOVr,ARM::tMOVr, 0, 0, 0, 0, 0, 1,0, 0,0,0 },
+ { ARM::t2MUL, 0, ARM::tMUL, 0, 0, 0, 1, 0,0, 1,0,0 },
+ { ARM::t2MVNr, ARM::tMVN, 0, 0, 0, 1, 0, 0,0, 0,0,0 },
+ { ARM::t2ORRrr, 0, ARM::tORR, 0, 0, 0, 1, 0,0, 1,0,0 },
+ { ARM::t2REV, ARM::tREV, 0, 0, 0, 1, 0, 1,0, 0,0,0 },
+ { ARM::t2REV16, ARM::tREV16, 0, 0, 0, 1, 0, 1,0, 0,0,0 },
+ { ARM::t2REVSH, ARM::tREVSH, 0, 0, 0, 1, 0, 1,0, 0,0,0 },
+ { ARM::t2RORrr, 0, ARM::tROR, 0, 0, 0, 1, 0,0, 1,0,0 },
+ { ARM::t2RSBri, ARM::tRSB, 0, 0, 0, 1, 0, 0,0, 0,1,0 },
+ { ARM::t2RSBSri,ARM::tRSB, 0, 0, 0, 1, 0, 2,0, 0,1,0 },
+ { ARM::t2SBCrr, 0, ARM::tSBC, 0, 0, 0, 1, 0,0, 0,0,0 },
+ { ARM::t2SUBri, ARM::tSUBi3, ARM::tSUBi8, 3, 8, 1, 1, 0,0, 0,0,0 },
+ { ARM::t2SUBrr, ARM::tSUBrr, 0, 0, 0, 1, 0, 0,0, 0,0,0 },
+ { ARM::t2SUBSri,ARM::tSUBi3, ARM::tSUBi8, 3, 8, 1, 1, 2,2, 0,0,0 },
+ { ARM::t2SUBSrr,ARM::tSUBrr, 0, 0, 0, 1, 0, 2,0, 0,0,0 },
+ { ARM::t2SXTB, ARM::tSXTB, 0, 0, 0, 1, 0, 1,0, 0,1,0 },
+ { ARM::t2SXTH, ARM::tSXTH, 0, 0, 0, 1, 0, 1,0, 0,1,0 },
+ { ARM::t2TSTrr, ARM::tTST, 0, 0, 0, 1, 0, 2,0, 0,0,0 },
+ { ARM::t2UXTB, ARM::tUXTB, 0, 0, 0, 1, 0, 1,0, 0,1,0 },
+ { ARM::t2UXTH, ARM::tUXTH, 0, 0, 0, 1, 0, 1,0, 0,1,0 },
+
+ // FIXME: Clean this up after splitting each Thumb load / store opcode
+ // into multiple ones.
+ { ARM::t2LDRi12,ARM::tLDRi, ARM::tLDRspi, 5, 8, 1, 0, 0,0, 0,1,0 },
+ { ARM::t2LDRs, ARM::tLDRr, 0, 0, 0, 1, 0, 0,0, 0,1,0 },
+ { ARM::t2LDRBi12,ARM::tLDRBi, 0, 5, 0, 1, 0, 0,0, 0,1,0 },
+ { ARM::t2LDRBs, ARM::tLDRBr, 0, 0, 0, 1, 0, 0,0, 0,1,0 },
+ { ARM::t2LDRHi12,ARM::tLDRHi, 0, 5, 0, 1, 0, 0,0, 0,1,0 },
+ { ARM::t2LDRHs, ARM::tLDRHr, 0, 0, 0, 1, 0, 0,0, 0,1,0 },
+ { ARM::t2LDRSBs,ARM::tLDRSB, 0, 0, 0, 1, 0, 0,0, 0,1,0 },
+ { ARM::t2LDRSHs,ARM::tLDRSH, 0, 0, 0, 1, 0, 0,0, 0,1,0 },
+ { ARM::t2STRi12,ARM::tSTRi, ARM::tSTRspi, 5, 8, 1, 0, 0,0, 0,1,0 },
+ { ARM::t2STRs, ARM::tSTRr, 0, 0, 0, 1, 0, 0,0, 0,1,0 },
+ { ARM::t2STRBi12,ARM::tSTRBi, 0, 5, 0, 1, 0, 0,0, 0,1,0 },
+ { ARM::t2STRBs, ARM::tSTRBr, 0, 0, 0, 1, 0, 0,0, 0,1,0 },
+ { ARM::t2STRHi12,ARM::tSTRHi, 0, 5, 0, 1, 0, 0,0, 0,1,0 },
+ { ARM::t2STRHs, ARM::tSTRHr, 0, 0, 0, 1, 0, 0,0, 0,1,0 },
+
+ { ARM::t2LDMIA, ARM::tLDMIA, 0, 0, 0, 1, 1, 1,1, 0,1,0 },
+ { ARM::t2LDMIA_RET,0, ARM::tPOP_RET, 0, 0, 1, 1, 1,1, 0,1,0 },
+ { ARM::t2LDMIA_UPD,ARM::tLDMIA_UPD,ARM::tPOP,0, 0, 1, 1, 1,1, 0,1,0 },
+ // ARM::t2STMIA (with no basereg writeback) has no Thumb1 equivalent.
+ // tSTMIA_UPD is a change in semantics which can only be used if the base
+ // register is killed. This difference is correctly handled elsewhere.
+ { ARM::t2STMIA, ARM::tSTMIA_UPD, 0, 0, 0, 1, 1, 1,1, 0,1,0 },
+ { ARM::t2STMIA_UPD,ARM::tSTMIA_UPD, 0, 0, 0, 1, 1, 1,1, 0,1,0 },
+ { ARM::t2STMDB_UPD, 0, ARM::tPUSH, 0, 0, 1, 1, 1,1, 0,1,0 }
+ };
+
+ class Thumb2SizeReduce : public MachineFunctionPass {
+ public:
+ static char ID;
+ Thumb2SizeReduce(std::function<bool(const Function &)> Ftor);
+
+ const Thumb2InstrInfo *TII;
+ const ARMSubtarget *STI;
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ const char *getPassName() const override {
+ return "Thumb2 instruction size reduction pass";
+ }
+
+ private:
+ /// ReduceOpcodeMap - Maps wide opcode to index of entry in ReduceTable.
+ DenseMap<unsigned, unsigned> ReduceOpcodeMap;
+
+ bool canAddPseudoFlagDep(MachineInstr *Use, bool IsSelfLoop);
+
+ bool VerifyPredAndCC(MachineInstr *MI, const ReduceEntry &Entry,
+ bool is2Addr, ARMCC::CondCodes Pred,
+ bool LiveCPSR, bool &HasCC, bool &CCDead);
+
+ bool ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI,
+ const ReduceEntry &Entry);
+
+ bool ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI,
+ const ReduceEntry &Entry, bool LiveCPSR, bool IsSelfLoop);
+
+ /// ReduceTo2Addr - Reduce a 32-bit instruction to a 16-bit two-address
+ /// instruction.
+ bool ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI,
+ const ReduceEntry &Entry, bool LiveCPSR,
+ bool IsSelfLoop);
+
+ /// ReduceToNarrow - Reduce a 32-bit instruction to a 16-bit
+ /// non-two-address instruction.
+ bool ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI,
+ const ReduceEntry &Entry, bool LiveCPSR,
+ bool IsSelfLoop);
+
+ /// ReduceMI - Attempt to reduce MI, return true on success.
+ bool ReduceMI(MachineBasicBlock &MBB, MachineInstr *MI,
+ bool LiveCPSR, bool IsSelfLoop);
+
+ /// ReduceMBB - Reduce width of instructions in the specified basic block.
+ bool ReduceMBB(MachineBasicBlock &MBB);
+
+ bool OptimizeSize;
+ bool MinimizeSize;
+
+ // Last instruction to define CPSR in the current block.
+ MachineInstr *CPSRDef;
+ // Was CPSR last defined by a high latency instruction?
+ // When CPSRDef is null, this refers to CPSR defs in predecessors.
+ bool HighLatencyCPSR;
+
+ struct MBBInfo {
+ // The flags leaving this block have high latency.
+ bool HighLatencyCPSR;
+ // Has this block been visited yet?
+ bool Visited;
+
+ MBBInfo() : HighLatencyCPSR(false), Visited(false) {}
+ };
+
+ SmallVector<MBBInfo, 8> BlockInfo;
+
+ std::function<bool(const Function &)> PredicateFtor;
+ };
+ char Thumb2SizeReduce::ID = 0;
+}
+
+Thumb2SizeReduce::Thumb2SizeReduce(std::function<bool(const Function &)> Ftor)
+ : MachineFunctionPass(ID), PredicateFtor(Ftor) {
+ OptimizeSize = MinimizeSize = false;
+ for (unsigned i = 0, e = array_lengthof(ReduceTable); i != e; ++i) {
+ unsigned FromOpc = ReduceTable[i].WideOpc;
+ if (!ReduceOpcodeMap.insert(std::make_pair(FromOpc, i)).second)
+ llvm_unreachable("Duplicated entries?");
+ }
+}
+
+static bool HasImplicitCPSRDef(const MCInstrDesc &MCID) {
+ for (const MCPhysReg *Regs = MCID.getImplicitDefs(); *Regs; ++Regs)
+ if (*Regs == ARM::CPSR)
+ return true;
+ return false;
+}
+
+// Check for a likely high-latency flag def.
+static bool isHighLatencyCPSR(MachineInstr *Def) {
+ switch(Def->getOpcode()) {
+ case ARM::FMSTAT:
+ case ARM::tMUL:
+ return true;
+ }
+ return false;
+}
+
+/// canAddPseudoFlagDep - For A9 (and other out-of-order) implementations,
+/// the 's' 16-bit instruction partially update CPSR. Abort the
+/// transformation to avoid adding false dependency on last CPSR setting
+/// instruction which hurts the ability for out-of-order execution engine
+/// to do register renaming magic.
+/// This function checks if there is a read-of-write dependency between the
+/// last instruction that defines the CPSR and the current instruction. If there
+/// is, then there is no harm done since the instruction cannot be retired
+/// before the CPSR setting instruction anyway.
+/// Note, we are not doing full dependency analysis here for the sake of compile
+/// time. We're not looking for cases like:
+/// r0 = muls ...
+/// r1 = add.w r0, ...
+/// ...
+/// = mul.w r1
+/// In this case it would have been ok to narrow the mul.w to muls since there
+/// are indirect RAW dependency between the muls and the mul.w
+bool
+Thumb2SizeReduce::canAddPseudoFlagDep(MachineInstr *Use, bool FirstInSelfLoop) {
+ // Disable the check for -Oz (aka OptimizeForSizeHarder).
+ if (MinimizeSize || !STI->avoidCPSRPartialUpdate())
+ return false;
+
+ if (!CPSRDef)
+ // If this BB loops back to itself, conservatively avoid narrowing the
+ // first instruction that does partial flag update.
+ return HighLatencyCPSR || FirstInSelfLoop;
+
+ SmallSet<unsigned, 2> Defs;
+ for (const MachineOperand &MO : CPSRDef->operands()) {
+ if (!MO.isReg() || MO.isUndef() || MO.isUse())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (Reg == 0 || Reg == ARM::CPSR)
+ continue;
+ Defs.insert(Reg);
+ }
+
+ for (const MachineOperand &MO : Use->operands()) {
+ if (!MO.isReg() || MO.isUndef() || MO.isDef())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (Defs.count(Reg))
+ return false;
+ }
+
+ // If the current CPSR has high latency, try to avoid the false dependency.
+ if (HighLatencyCPSR)
+ return true;
+
+ // tMOVi8 usually doesn't start long dependency chains, and there are a lot
+ // of them, so always shrink them when CPSR doesn't have high latency.
+ if (Use->getOpcode() == ARM::t2MOVi ||
+ Use->getOpcode() == ARM::t2MOVi16)
+ return false;
+
+ // No read-after-write dependency. The narrowing will add false dependency.
+ return true;
+}
+
+bool
+Thumb2SizeReduce::VerifyPredAndCC(MachineInstr *MI, const ReduceEntry &Entry,
+ bool is2Addr, ARMCC::CondCodes Pred,
+ bool LiveCPSR, bool &HasCC, bool &CCDead) {
+ if ((is2Addr && Entry.PredCC2 == 0) ||
+ (!is2Addr && Entry.PredCC1 == 0)) {
+ if (Pred == ARMCC::AL) {
+ // Not predicated, must set CPSR.
+ if (!HasCC) {
+ // Original instruction was not setting CPSR, but CPSR is not
+ // currently live anyway. It's ok to set it. The CPSR def is
+ // dead though.
+ if (!LiveCPSR) {
+ HasCC = true;
+ CCDead = true;
+ return true;
+ }
+ return false;
+ }
+ } else {
+ // Predicated, must not set CPSR.
+ if (HasCC)
+ return false;
+ }
+ } else if ((is2Addr && Entry.PredCC2 == 2) ||
+ (!is2Addr && Entry.PredCC1 == 2)) {
+ /// Old opcode has an optional def of CPSR.
+ if (HasCC)
+ return true;
+ // If old opcode does not implicitly define CPSR, then it's not ok since
+ // these new opcodes' CPSR def is not meant to be thrown away. e.g. CMP.
+ if (!HasImplicitCPSRDef(MI->getDesc()))
+ return false;
+ HasCC = true;
+ } else {
+ // 16-bit instruction does not set CPSR.
+ if (HasCC)
+ return false;
+ }
+
+ return true;
+}
+
+static bool VerifyLowRegs(MachineInstr *MI) {
+ unsigned Opc = MI->getOpcode();
+ bool isPCOk = (Opc == ARM::t2LDMIA_RET || Opc == ARM::t2LDMIA_UPD);
+ bool isLROk = (Opc == ARM::t2STMDB_UPD);
+ bool isSPOk = isPCOk || isLROk;
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || MO.isImplicit())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (Reg == 0 || Reg == ARM::CPSR)
+ continue;
+ if (isPCOk && Reg == ARM::PC)
+ continue;
+ if (isLROk && Reg == ARM::LR)
+ continue;
+ if (Reg == ARM::SP) {
+ if (isSPOk)
+ continue;
+ if (i == 1 && (Opc == ARM::t2LDRi12 || Opc == ARM::t2STRi12))
+ // Special case for these ldr / str with sp as base register.
+ continue;
+ }
+ if (!isARMLowRegister(Reg))
+ return false;
+ }
+ return true;
+}
+
+bool
+Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI,
+ const ReduceEntry &Entry) {
+ if (ReduceLimitLdSt != -1 && ((int)NumLdSts >= ReduceLimitLdSt))
+ return false;
+
+ unsigned Scale = 1;
+ bool HasImmOffset = false;
+ bool HasShift = false;
+ bool HasOffReg = true;
+ bool isLdStMul = false;
+ unsigned Opc = Entry.NarrowOpc1;
+ unsigned OpNum = 3; // First 'rest' of operands.
+ uint8_t ImmLimit = Entry.Imm1Limit;
+
+ switch (Entry.WideOpc) {
+ default:
+ llvm_unreachable("Unexpected Thumb2 load / store opcode!");
+ case ARM::t2LDRi12:
+ case ARM::t2STRi12:
+ if (MI->getOperand(1).getReg() == ARM::SP) {
+ Opc = Entry.NarrowOpc2;
+ ImmLimit = Entry.Imm2Limit;
+ }
+
+ Scale = 4;
+ HasImmOffset = true;
+ HasOffReg = false;
+ break;
+ case ARM::t2LDRBi12:
+ case ARM::t2STRBi12:
+ HasImmOffset = true;
+ HasOffReg = false;
+ break;
+ case ARM::t2LDRHi12:
+ case ARM::t2STRHi12:
+ Scale = 2;
+ HasImmOffset = true;
+ HasOffReg = false;
+ break;
+ case ARM::t2LDRs:
+ case ARM::t2LDRBs:
+ case ARM::t2LDRHs:
+ case ARM::t2LDRSBs:
+ case ARM::t2LDRSHs:
+ case ARM::t2STRs:
+ case ARM::t2STRBs:
+ case ARM::t2STRHs:
+ HasShift = true;
+ OpNum = 4;
+ break;
+ case ARM::t2LDMIA: {
+ unsigned BaseReg = MI->getOperand(0).getReg();
+ assert(isARMLowRegister(BaseReg));
+
+ // For the non-writeback version (this one), the base register must be
+ // one of the registers being loaded.
+ bool isOK = false;
+ for (unsigned i = 3; i < MI->getNumOperands(); ++i) {
+ if (MI->getOperand(i).getReg() == BaseReg) {
+ isOK = true;
+ break;
+ }
+ }
+
+ if (!isOK)
+ return false;
+
+ OpNum = 0;
+ isLdStMul = true;
+ break;
+ }
+ case ARM::t2STMIA: {
+ // If the base register is killed, we don't care what its value is after the
+ // instruction, so we can use an updating STMIA.
+ if (!MI->getOperand(0).isKill())
+ return false;
+
+ break;
+ }
+ case ARM::t2LDMIA_RET: {
+ unsigned BaseReg = MI->getOperand(1).getReg();
+ if (BaseReg != ARM::SP)
+ return false;
+ Opc = Entry.NarrowOpc2; // tPOP_RET
+ OpNum = 2;
+ isLdStMul = true;
+ break;
+ }
+ case ARM::t2LDMIA_UPD:
+ case ARM::t2STMIA_UPD:
+ case ARM::t2STMDB_UPD: {
+ OpNum = 0;
+
+ unsigned BaseReg = MI->getOperand(1).getReg();
+ if (BaseReg == ARM::SP &&
+ (Entry.WideOpc == ARM::t2LDMIA_UPD ||
+ Entry.WideOpc == ARM::t2STMDB_UPD)) {
+ Opc = Entry.NarrowOpc2; // tPOP or tPUSH
+ OpNum = 2;
+ } else if (!isARMLowRegister(BaseReg) ||
+ (Entry.WideOpc != ARM::t2LDMIA_UPD &&
+ Entry.WideOpc != ARM::t2STMIA_UPD)) {
+ return false;
+ }
+
+ isLdStMul = true;
+ break;
+ }
+ }
+
+ unsigned OffsetReg = 0;
+ bool OffsetKill = false;
+ bool OffsetInternal = false;
+ if (HasShift) {
+ OffsetReg = MI->getOperand(2).getReg();
+ OffsetKill = MI->getOperand(2).isKill();
+ OffsetInternal = MI->getOperand(2).isInternalRead();
+
+ if (MI->getOperand(3).getImm())
+ // Thumb1 addressing mode doesn't support shift.
+ return false;
+ }
+
+ unsigned OffsetImm = 0;
+ if (HasImmOffset) {
+ OffsetImm = MI->getOperand(2).getImm();
+ unsigned MaxOffset = ((1 << ImmLimit) - 1) * Scale;
+
+ if ((OffsetImm & (Scale - 1)) || OffsetImm > MaxOffset)
+ // Make sure the immediate field fits.
+ return false;
+ }
+
+ // Add the 16-bit load / store instruction.
+ DebugLoc dl = MI->getDebugLoc();
+ MachineInstrBuilder MIB = BuildMI(MBB, MI, dl, TII->get(Opc));
+
+ // tSTMIA_UPD takes a defining register operand. We've already checked that
+ // the register is killed, so mark it as dead here.
+ if (Entry.WideOpc == ARM::t2STMIA)
+ MIB.addReg(MI->getOperand(0).getReg(), RegState::Define | RegState::Dead);
+
+ if (!isLdStMul) {
+ MIB.addOperand(MI->getOperand(0));
+ MIB.addOperand(MI->getOperand(1));
+
+ if (HasImmOffset)
+ MIB.addImm(OffsetImm / Scale);
+
+ assert((!HasShift || OffsetReg) && "Invalid so_reg load / store address!");
+
+ if (HasOffReg)
+ MIB.addReg(OffsetReg, getKillRegState(OffsetKill) |
+ getInternalReadRegState(OffsetInternal));
+ }
+
+ // Transfer the rest of operands.
+ for (unsigned e = MI->getNumOperands(); OpNum != e; ++OpNum)
+ MIB.addOperand(MI->getOperand(OpNum));
+
+ // Transfer memoperands.
+ MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
+
+ // Transfer MI flags.
+ MIB.setMIFlags(MI->getFlags());
+
+ DEBUG(errs() << "Converted 32-bit: " << *MI << " to 16-bit: " << *MIB);
+
+ MBB.erase_instr(MI);
+ ++NumLdSts;
+ return true;
+}
+
+bool
+Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI,
+ const ReduceEntry &Entry,
+ bool LiveCPSR, bool IsSelfLoop) {
+ unsigned Opc = MI->getOpcode();
+ if (Opc == ARM::t2ADDri) {
+ // If the source register is SP, try to reduce to tADDrSPi, otherwise
+ // it's a normal reduce.
+ if (MI->getOperand(1).getReg() != ARM::SP) {
+ if (ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, IsSelfLoop))
+ return true;
+ return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop);
+ }
+ // Try to reduce to tADDrSPi.
+ unsigned Imm = MI->getOperand(2).getImm();
+ // The immediate must be in range, the destination register must be a low
+ // reg, the predicate must be "always" and the condition flags must not
+ // be being set.
+ if (Imm & 3 || Imm > 1020)
+ return false;
+ if (!isARMLowRegister(MI->getOperand(0).getReg()))
+ return false;
+ if (MI->getOperand(3).getImm() != ARMCC::AL)
+ return false;
+ const MCInstrDesc &MCID = MI->getDesc();
+ if (MCID.hasOptionalDef() &&
+ MI->getOperand(MCID.getNumOperands()-1).getReg() == ARM::CPSR)
+ return false;
+
+ MachineInstrBuilder MIB = BuildMI(MBB, MI, MI->getDebugLoc(),
+ TII->get(ARM::tADDrSPi))
+ .addOperand(MI->getOperand(0))
+ .addOperand(MI->getOperand(1))
+ .addImm(Imm / 4); // The tADDrSPi has an implied scale by four.
+ AddDefaultPred(MIB);
+
+ // Transfer MI flags.
+ MIB.setMIFlags(MI->getFlags());
+
+ DEBUG(errs() << "Converted 32-bit: " << *MI << " to 16-bit: " <<*MIB);
+
+ MBB.erase_instr(MI);
+ ++NumNarrows;
+ return true;
+ }
+
+ if (Entry.LowRegs1 && !VerifyLowRegs(MI))
+ return false;
+
+ if (MI->mayLoadOrStore())
+ return ReduceLoadStore(MBB, MI, Entry);
+
+ switch (Opc) {
+ default: break;
+ case ARM::t2ADDSri:
+ case ARM::t2ADDSrr: {
+ unsigned PredReg = 0;
+ if (getInstrPredicate(MI, PredReg) == ARMCC::AL) {
+ switch (Opc) {
+ default: break;
+ case ARM::t2ADDSri: {
+ if (ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, IsSelfLoop))
+ return true;
+ // fallthrough
+ }
+ case ARM::t2ADDSrr:
+ return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop);
+ }
+ }
+ break;
+ }
+ case ARM::t2RSBri:
+ case ARM::t2RSBSri:
+ case ARM::t2SXTB:
+ case ARM::t2SXTH:
+ case ARM::t2UXTB:
+ case ARM::t2UXTH:
+ if (MI->getOperand(2).getImm() == 0)
+ return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop);
+ break;
+ case ARM::t2MOVi16:
+ // Can convert only 'pure' immediate operands, not immediates obtained as
+ // globals' addresses.
+ if (MI->getOperand(1).isImm())
+ return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop);
+ break;
+ case ARM::t2CMPrr: {
+ // Try to reduce to the lo-reg only version first. Why there are two
+ // versions of the instruction is a mystery.
+ // It would be nice to just have two entries in the master table that
+ // are prioritized, but the table assumes a unique entry for each
+ // source insn opcode. So for now, we hack a local entry record to use.
+ static const ReduceEntry NarrowEntry =
+ { ARM::t2CMPrr,ARM::tCMPr, 0, 0, 0, 1, 1,2, 0, 0,1,0 };
+ if (ReduceToNarrow(MBB, MI, NarrowEntry, LiveCPSR, IsSelfLoop))
+ return true;
+ return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop);
+ }
+ }
+ return false;
+}
+
+bool
+Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI,
+ const ReduceEntry &Entry,
+ bool LiveCPSR, bool IsSelfLoop) {
+
+ if (ReduceLimit2Addr != -1 && ((int)Num2Addrs >= ReduceLimit2Addr))
+ return false;
+
+ if (!OptimizeSize && Entry.AvoidMovs && STI->avoidMOVsShifterOperand())
+ // Don't issue movs with shifter operand for some CPUs unless we
+ // are optimizing for size.
+ return false;
+
+ unsigned Reg0 = MI->getOperand(0).getReg();
+ unsigned Reg1 = MI->getOperand(1).getReg();
+ // t2MUL is "special". The tied source operand is second, not first.
+ if (MI->getOpcode() == ARM::t2MUL) {
+ unsigned Reg2 = MI->getOperand(2).getReg();
+ // Early exit if the regs aren't all low regs.
+ if (!isARMLowRegister(Reg0) || !isARMLowRegister(Reg1)
+ || !isARMLowRegister(Reg2))
+ return false;
+ if (Reg0 != Reg2) {
+ // If the other operand also isn't the same as the destination, we
+ // can't reduce.
+ if (Reg1 != Reg0)
+ return false;
+ // Try to commute the operands to make it a 2-address instruction.
+ MachineInstr *CommutedMI = TII->commuteInstruction(MI);
+ if (!CommutedMI)
+ return false;
+ }
+ } else if (Reg0 != Reg1) {
+ // Try to commute the operands to make it a 2-address instruction.
+ unsigned CommOpIdx1 = 1;
+ unsigned CommOpIdx2 = TargetInstrInfo::CommuteAnyOperandIndex;
+ if (!TII->findCommutedOpIndices(MI, CommOpIdx1, CommOpIdx2) ||
+ MI->getOperand(CommOpIdx2).getReg() != Reg0)
+ return false;
+ MachineInstr *CommutedMI =
+ TII->commuteInstruction(MI, false, CommOpIdx1, CommOpIdx2);
+ if (!CommutedMI)
+ return false;
+ }
+ if (Entry.LowRegs2 && !isARMLowRegister(Reg0))
+ return false;
+ if (Entry.Imm2Limit) {
+ unsigned Imm = MI->getOperand(2).getImm();
+ unsigned Limit = (1 << Entry.Imm2Limit) - 1;
+ if (Imm > Limit)
+ return false;
+ } else {
+ unsigned Reg2 = MI->getOperand(2).getReg();
+ if (Entry.LowRegs2 && !isARMLowRegister(Reg2))
+ return false;
+ }
+
+ // Check if it's possible / necessary to transfer the predicate.
+ const MCInstrDesc &NewMCID = TII->get(Entry.NarrowOpc2);
+ unsigned PredReg = 0;
+ ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
+ bool SkipPred = false;
+ if (Pred != ARMCC::AL) {
+ if (!NewMCID.isPredicable())
+ // Can't transfer predicate, fail.
+ return false;
+ } else {
+ SkipPred = !NewMCID.isPredicable();
+ }
+
+ bool HasCC = false;
+ bool CCDead = false;
+ const MCInstrDesc &MCID = MI->getDesc();
+ if (MCID.hasOptionalDef()) {
+ unsigned NumOps = MCID.getNumOperands();
+ HasCC = (MI->getOperand(NumOps-1).getReg() == ARM::CPSR);
+ if (HasCC && MI->getOperand(NumOps-1).isDead())
+ CCDead = true;
+ }
+ if (!VerifyPredAndCC(MI, Entry, true, Pred, LiveCPSR, HasCC, CCDead))
+ return false;
+
+ // Avoid adding a false dependency on partial flag update by some 16-bit
+ // instructions which has the 's' bit set.
+ if (Entry.PartFlag && NewMCID.hasOptionalDef() && HasCC &&
+ canAddPseudoFlagDep(MI, IsSelfLoop))
+ return false;
+
+ // Add the 16-bit instruction.
+ DebugLoc dl = MI->getDebugLoc();
+ MachineInstrBuilder MIB = BuildMI(MBB, MI, dl, NewMCID);
+ MIB.addOperand(MI->getOperand(0));
+ if (NewMCID.hasOptionalDef()) {
+ if (HasCC)
+ AddDefaultT1CC(MIB, CCDead);
+ else
+ AddNoT1CC(MIB);
+ }
+
+ // Transfer the rest of operands.
+ unsigned NumOps = MCID.getNumOperands();
+ for (unsigned i = 1, e = MI->getNumOperands(); i != e; ++i) {
+ if (i < NumOps && MCID.OpInfo[i].isOptionalDef())
+ continue;
+ if (SkipPred && MCID.OpInfo[i].isPredicate())
+ continue;
+ MIB.addOperand(MI->getOperand(i));
+ }
+
+ // Transfer MI flags.
+ MIB.setMIFlags(MI->getFlags());
+
+ DEBUG(errs() << "Converted 32-bit: " << *MI << " to 16-bit: " << *MIB);
+
+ MBB.erase_instr(MI);
+ ++Num2Addrs;
+ return true;
+}
+
+bool
+Thumb2SizeReduce::ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI,
+ const ReduceEntry &Entry,
+ bool LiveCPSR, bool IsSelfLoop) {
+ if (ReduceLimit != -1 && ((int)NumNarrows >= ReduceLimit))
+ return false;
+
+ if (!OptimizeSize && Entry.AvoidMovs && STI->avoidMOVsShifterOperand())
+ // Don't issue movs with shifter operand for some CPUs unless we
+ // are optimizing for size.
+ return false;
+
+ unsigned Limit = ~0U;
+ if (Entry.Imm1Limit)
+ Limit = (1 << Entry.Imm1Limit) - 1;
+
+ const MCInstrDesc &MCID = MI->getDesc();
+ for (unsigned i = 0, e = MCID.getNumOperands(); i != e; ++i) {
+ if (MCID.OpInfo[i].isPredicate())
+ continue;
+ const MachineOperand &MO = MI->getOperand(i);
+ if (MO.isReg()) {
+ unsigned Reg = MO.getReg();
+ if (!Reg || Reg == ARM::CPSR)
+ continue;
+ if (Entry.LowRegs1 && !isARMLowRegister(Reg))
+ return false;
+ } else if (MO.isImm() &&
+ !MCID.OpInfo[i].isPredicate()) {
+ if (((unsigned)MO.getImm()) > Limit)
+ return false;
+ }
+ }
+
+ // Check if it's possible / necessary to transfer the predicate.
+ const MCInstrDesc &NewMCID = TII->get(Entry.NarrowOpc1);
+ unsigned PredReg = 0;
+ ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
+ bool SkipPred = false;
+ if (Pred != ARMCC::AL) {
+ if (!NewMCID.isPredicable())
+ // Can't transfer predicate, fail.
+ return false;
+ } else {
+ SkipPred = !NewMCID.isPredicable();
+ }
+
+ bool HasCC = false;
+ bool CCDead = false;
+ if (MCID.hasOptionalDef()) {
+ unsigned NumOps = MCID.getNumOperands();
+ HasCC = (MI->getOperand(NumOps-1).getReg() == ARM::CPSR);
+ if (HasCC && MI->getOperand(NumOps-1).isDead())
+ CCDead = true;
+ }
+ if (!VerifyPredAndCC(MI, Entry, false, Pred, LiveCPSR, HasCC, CCDead))
+ return false;
+
+ // Avoid adding a false dependency on partial flag update by some 16-bit
+ // instructions which has the 's' bit set.
+ if (Entry.PartFlag && NewMCID.hasOptionalDef() && HasCC &&
+ canAddPseudoFlagDep(MI, IsSelfLoop))
+ return false;
+
+ // Add the 16-bit instruction.
+ DebugLoc dl = MI->getDebugLoc();
+ MachineInstrBuilder MIB = BuildMI(MBB, MI, dl, NewMCID);
+ MIB.addOperand(MI->getOperand(0));
+ if (NewMCID.hasOptionalDef()) {
+ if (HasCC)
+ AddDefaultT1CC(MIB, CCDead);
+ else
+ AddNoT1CC(MIB);
+ }
+
+ // Transfer the rest of operands.
+ unsigned NumOps = MCID.getNumOperands();
+ for (unsigned i = 1, e = MI->getNumOperands(); i != e; ++i) {
+ if (i < NumOps && MCID.OpInfo[i].isOptionalDef())
+ continue;
+ if ((MCID.getOpcode() == ARM::t2RSBSri ||
+ MCID.getOpcode() == ARM::t2RSBri ||
+ MCID.getOpcode() == ARM::t2SXTB ||
+ MCID.getOpcode() == ARM::t2SXTH ||
+ MCID.getOpcode() == ARM::t2UXTB ||
+ MCID.getOpcode() == ARM::t2UXTH) && i == 2)
+ // Skip the zero immediate operand, it's now implicit.
+ continue;
+ bool isPred = (i < NumOps && MCID.OpInfo[i].isPredicate());
+ if (SkipPred && isPred)
+ continue;
+ const MachineOperand &MO = MI->getOperand(i);
+ if (MO.isReg() && MO.isImplicit() && MO.getReg() == ARM::CPSR)
+ // Skip implicit def of CPSR. Either it's modeled as an optional
+ // def now or it's already an implicit def on the new instruction.
+ continue;
+ MIB.addOperand(MO);
+ }
+ if (!MCID.isPredicable() && NewMCID.isPredicable())
+ AddDefaultPred(MIB);
+
+ // Transfer MI flags.
+ MIB.setMIFlags(MI->getFlags());
+
+ DEBUG(errs() << "Converted 32-bit: " << *MI << " to 16-bit: " << *MIB);
+
+ MBB.erase_instr(MI);
+ ++NumNarrows;
+ return true;
+}
+
+static bool UpdateCPSRDef(MachineInstr &MI, bool LiveCPSR, bool &DefCPSR) {
+ bool HasDef = false;
+ for (const MachineOperand &MO : MI.operands()) {
+ if (!MO.isReg() || MO.isUndef() || MO.isUse())
+ continue;
+ if (MO.getReg() != ARM::CPSR)
+ continue;
+
+ DefCPSR = true;
+ if (!MO.isDead())
+ HasDef = true;
+ }
+
+ return HasDef || LiveCPSR;
+}
+
+static bool UpdateCPSRUse(MachineInstr &MI, bool LiveCPSR) {
+ for (const MachineOperand &MO : MI.operands()) {
+ if (!MO.isReg() || MO.isUndef() || MO.isDef())
+ continue;
+ if (MO.getReg() != ARM::CPSR)
+ continue;
+ assert(LiveCPSR && "CPSR liveness tracking is wrong!");
+ if (MO.isKill()) {
+ LiveCPSR = false;
+ break;
+ }
+ }
+
+ return LiveCPSR;
+}
+
+bool Thumb2SizeReduce::ReduceMI(MachineBasicBlock &MBB, MachineInstr *MI,
+ bool LiveCPSR, bool IsSelfLoop) {
+ unsigned Opcode = MI->getOpcode();
+ DenseMap<unsigned, unsigned>::iterator OPI = ReduceOpcodeMap.find(Opcode);
+ if (OPI == ReduceOpcodeMap.end())
+ return false;
+ const ReduceEntry &Entry = ReduceTable[OPI->second];
+
+ // Don't attempt normal reductions on "special" cases for now.
+ if (Entry.Special)
+ return ReduceSpecial(MBB, MI, Entry, LiveCPSR, IsSelfLoop);
+
+ // Try to transform to a 16-bit two-address instruction.
+ if (Entry.NarrowOpc2 &&
+ ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, IsSelfLoop))
+ return true;
+
+ // Try to transform to a 16-bit non-two-address instruction.
+ if (Entry.NarrowOpc1 &&
+ ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop))
+ return true;
+
+ return false;
+}
+
+bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) {
+ bool Modified = false;
+
+ // Yes, CPSR could be livein.
+ bool LiveCPSR = MBB.isLiveIn(ARM::CPSR);
+ MachineInstr *BundleMI = nullptr;
+
+ CPSRDef = nullptr;
+ HighLatencyCPSR = false;
+
+ // Check predecessors for the latest CPSRDef.
+ for (auto *Pred : MBB.predecessors()) {
+ const MBBInfo &PInfo = BlockInfo[Pred->getNumber()];
+ if (!PInfo.Visited) {
+ // Since blocks are visited in RPO, this must be a back-edge.
+ continue;
+ }
+ if (PInfo.HighLatencyCPSR) {
+ HighLatencyCPSR = true;
+ break;
+ }
+ }
+
+ // If this BB loops back to itself, conservatively avoid narrowing the
+ // first instruction that does partial flag update.
+ bool IsSelfLoop = MBB.isSuccessor(&MBB);
+ MachineBasicBlock::instr_iterator MII = MBB.instr_begin(),E = MBB.instr_end();
+ MachineBasicBlock::instr_iterator NextMII;
+ for (; MII != E; MII = NextMII) {
+ NextMII = std::next(MII);
+
+ MachineInstr *MI = &*MII;
+ if (MI->isBundle()) {
+ BundleMI = MI;
+ continue;
+ }
+ if (MI->isDebugValue())
+ continue;
+
+ LiveCPSR = UpdateCPSRUse(*MI, LiveCPSR);
+
+ // Does NextMII belong to the same bundle as MI?
+ bool NextInSameBundle = NextMII != E && NextMII->isBundledWithPred();
+
+ if (ReduceMI(MBB, MI, LiveCPSR, IsSelfLoop)) {
+ Modified = true;
+ MachineBasicBlock::instr_iterator I = std::prev(NextMII);
+ MI = &*I;
+ // Removing and reinserting the first instruction in a bundle will break
+ // up the bundle. Fix the bundling if it was broken.
+ if (NextInSameBundle && !NextMII->isBundledWithPred())
+ NextMII->bundleWithPred();
+ }
+
+ if (!NextInSameBundle && MI->isInsideBundle()) {
+ // FIXME: Since post-ra scheduler operates on bundles, the CPSR kill
+ // marker is only on the BUNDLE instruction. Process the BUNDLE
+ // instruction as we finish with the bundled instruction to work around
+ // the inconsistency.
+ if (BundleMI->killsRegister(ARM::CPSR))
+ LiveCPSR = false;
+ MachineOperand *MO = BundleMI->findRegisterDefOperand(ARM::CPSR);
+ if (MO && !MO->isDead())
+ LiveCPSR = true;
+ MO = BundleMI->findRegisterUseOperand(ARM::CPSR);
+ if (MO && !MO->isKill())
+ LiveCPSR = true;
+ }
+
+ bool DefCPSR = false;
+ LiveCPSR = UpdateCPSRDef(*MI, LiveCPSR, DefCPSR);
+ if (MI->isCall()) {
+ // Calls don't really set CPSR.
+ CPSRDef = nullptr;
+ HighLatencyCPSR = false;
+ IsSelfLoop = false;
+ } else if (DefCPSR) {
+ // This is the last CPSR defining instruction.
+ CPSRDef = MI;
+ HighLatencyCPSR = isHighLatencyCPSR(CPSRDef);
+ IsSelfLoop = false;
+ }
+ }
+
+ MBBInfo &Info = BlockInfo[MBB.getNumber()];
+ Info.HighLatencyCPSR = HighLatencyCPSR;
+ Info.Visited = true;
+ return Modified;
+}
+
+bool Thumb2SizeReduce::runOnMachineFunction(MachineFunction &MF) {
+ if (PredicateFtor && !PredicateFtor(*MF.getFunction()))
+ return false;
+
+ STI = &static_cast<const ARMSubtarget &>(MF.getSubtarget());
+ if (STI->isThumb1Only() || STI->prefers32BitThumb())
+ return false;
+
+ TII = static_cast<const Thumb2InstrInfo *>(STI->getInstrInfo());
+
+ // Optimizing / minimizing size? Minimizing size implies optimizing for size.
+ OptimizeSize = MF.getFunction()->optForSize();
+ MinimizeSize = MF.getFunction()->optForMinSize();
+
+ BlockInfo.clear();
+ BlockInfo.resize(MF.getNumBlockIDs());
+
+ // Visit blocks in reverse post-order so LastCPSRDef is known for all
+ // predecessors.
+ ReversePostOrderTraversal<MachineFunction*> RPOT(&MF);
+ bool Modified = false;
+ for (ReversePostOrderTraversal<MachineFunction*>::rpo_iterator
+ I = RPOT.begin(), E = RPOT.end(); I != E; ++I)
+ Modified |= ReduceMBB(**I);
+ return Modified;
+}
+
+/// createThumb2SizeReductionPass - Returns an instance of the Thumb2 size
+/// reduction pass.
+FunctionPass *llvm::createThumb2SizeReductionPass(
+ std::function<bool(const Function &)> Ftor) {
+ return new Thumb2SizeReduce(Ftor);
+}
diff --git a/contrib/llvm/lib/Target/ARM/ThumbRegisterInfo.cpp b/contrib/llvm/lib/Target/ARM/ThumbRegisterInfo.cpp
new file mode 100644
index 0000000..b5f9d7e
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ThumbRegisterInfo.cpp
@@ -0,0 +1,623 @@
+//===-- ThumbRegisterInfo.cpp - Thumb-1 Register Information -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Thumb-1 implementation of the TargetRegisterInfo
+// class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ThumbRegisterInfo.h"
+#include "ARMBaseInstrInfo.h"
+#include "ARMMachineFunctionInfo.h"
+#include "ARMSubtarget.h"
+#include "MCTargetDesc/ARMAddressingModes.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+extern cl::opt<bool> ReuseFrameIndexVals;
+}
+
+using namespace llvm;
+
+ThumbRegisterInfo::ThumbRegisterInfo() : ARMBaseRegisterInfo() {}
+
+const TargetRegisterClass *
+ThumbRegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC,
+ const MachineFunction &MF) const {
+ if (!MF.getSubtarget<ARMSubtarget>().isThumb1Only())
+ return ARMBaseRegisterInfo::getLargestLegalSuperClass(RC, MF);
+
+ if (ARM::tGPRRegClass.hasSubClassEq(RC))
+ return &ARM::tGPRRegClass;
+ return ARMBaseRegisterInfo::getLargestLegalSuperClass(RC, MF);
+}
+
+const TargetRegisterClass *
+ThumbRegisterInfo::getPointerRegClass(const MachineFunction &MF,
+ unsigned Kind) const {
+ if (!MF.getSubtarget<ARMSubtarget>().isThumb1Only())
+ return ARMBaseRegisterInfo::getPointerRegClass(MF, Kind);
+ return &ARM::tGPRRegClass;
+}
+
+static void emitThumb1LoadConstPool(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI,
+ DebugLoc dl, unsigned DestReg,
+ unsigned SubIdx, int Val,
+ ARMCC::CondCodes Pred, unsigned PredReg,
+ unsigned MIFlags) {
+ MachineFunction &MF = *MBB.getParent();
+ const ARMSubtarget &STI = MF.getSubtarget<ARMSubtarget>();
+ const TargetInstrInfo &TII = *STI.getInstrInfo();
+ MachineConstantPool *ConstantPool = MF.getConstantPool();
+ const Constant *C = ConstantInt::get(
+ Type::getInt32Ty(MBB.getParent()->getFunction()->getContext()), Val);
+ unsigned Idx = ConstantPool->getConstantPoolIndex(C, 4);
+
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::tLDRpci))
+ .addReg(DestReg, getDefRegState(true), SubIdx)
+ .addConstantPoolIndex(Idx).addImm(Pred).addReg(PredReg)
+ .setMIFlags(MIFlags);
+}
+
+static void emitThumb2LoadConstPool(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI,
+ DebugLoc dl, unsigned DestReg,
+ unsigned SubIdx, int Val,
+ ARMCC::CondCodes Pred, unsigned PredReg,
+ unsigned MIFlags) {
+ MachineFunction &MF = *MBB.getParent();
+ const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
+ MachineConstantPool *ConstantPool = MF.getConstantPool();
+ const Constant *C = ConstantInt::get(
+ Type::getInt32Ty(MBB.getParent()->getFunction()->getContext()), Val);
+ unsigned Idx = ConstantPool->getConstantPoolIndex(C, 4);
+
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::t2LDRpci))
+ .addReg(DestReg, getDefRegState(true), SubIdx)
+ .addConstantPoolIndex(Idx).addImm((int64_t)ARMCC::AL).addReg(0)
+ .setMIFlags(MIFlags);
+}
+
+/// emitLoadConstPool - Emits a load from constpool to materialize the
+/// specified immediate.
+void ThumbRegisterInfo::emitLoadConstPool(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, DebugLoc dl,
+ unsigned DestReg, unsigned SubIdx, int Val, ARMCC::CondCodes Pred,
+ unsigned PredReg, unsigned MIFlags) const {
+ MachineFunction &MF = *MBB.getParent();
+ const ARMSubtarget &STI = MF.getSubtarget<ARMSubtarget>();
+ if (STI.isThumb1Only()) {
+ assert((isARMLowRegister(DestReg) || isVirtualRegister(DestReg)) &&
+ "Thumb1 does not have ldr to high register");
+ return emitThumb1LoadConstPool(MBB, MBBI, dl, DestReg, SubIdx, Val, Pred,
+ PredReg, MIFlags);
+ }
+ return emitThumb2LoadConstPool(MBB, MBBI, dl, DestReg, SubIdx, Val, Pred,
+ PredReg, MIFlags);
+}
+
+/// emitThumbRegPlusImmInReg - Emits a series of instructions to materialize
+/// a destreg = basereg + immediate in Thumb code. Materialize the immediate
+/// in a register using mov / mvn sequences or load the immediate from a
+/// constpool entry.
+static
+void emitThumbRegPlusImmInReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI,
+ DebugLoc dl,
+ unsigned DestReg, unsigned BaseReg,
+ int NumBytes, bool CanChangeCC,
+ const TargetInstrInfo &TII,
+ const ARMBaseRegisterInfo& MRI,
+ unsigned MIFlags = MachineInstr::NoFlags) {
+ MachineFunction &MF = *MBB.getParent();
+ bool isHigh = !isARMLowRegister(DestReg) ||
+ (BaseReg != 0 && !isARMLowRegister(BaseReg));
+ bool isSub = false;
+ // Subtract doesn't have high register version. Load the negative value
+ // if either base or dest register is a high register. Also, if do not
+ // issue sub as part of the sequence if condition register is to be
+ // preserved.
+ if (NumBytes < 0 && !isHigh && CanChangeCC) {
+ isSub = true;
+ NumBytes = -NumBytes;
+ }
+ unsigned LdReg = DestReg;
+ if (DestReg == ARM::SP)
+ assert(BaseReg == ARM::SP && "Unexpected!");
+ if (!isARMLowRegister(DestReg) && !MRI.isVirtualRegister(DestReg))
+ LdReg = MF.getRegInfo().createVirtualRegister(&ARM::tGPRRegClass);
+
+ if (NumBytes <= 255 && NumBytes >= 0 && CanChangeCC) {
+ AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVi8), LdReg))
+ .addImm(NumBytes).setMIFlags(MIFlags);
+ } else if (NumBytes < 0 && NumBytes >= -255 && CanChangeCC) {
+ AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVi8), LdReg))
+ .addImm(NumBytes).setMIFlags(MIFlags);
+ AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII.get(ARM::tRSB), LdReg))
+ .addReg(LdReg, RegState::Kill).setMIFlags(MIFlags);
+ } else
+ MRI.emitLoadConstPool(MBB, MBBI, dl, LdReg, 0, NumBytes,
+ ARMCC::AL, 0, MIFlags);
+
+ // Emit add / sub.
+ int Opc = (isSub) ? ARM::tSUBrr : ((isHigh || !CanChangeCC) ? ARM::tADDhirr
+ : ARM::tADDrr);
+ MachineInstrBuilder MIB =
+ BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg);
+ if (Opc != ARM::tADDhirr)
+ MIB = AddDefaultT1CC(MIB);
+ if (DestReg == ARM::SP || isSub)
+ MIB.addReg(BaseReg).addReg(LdReg, RegState::Kill);
+ else
+ MIB.addReg(LdReg).addReg(BaseReg, RegState::Kill);
+ AddDefaultPred(MIB);
+}
+
+/// emitThumbRegPlusImmediate - Emits a series of instructions to materialize
+/// a destreg = basereg + immediate in Thumb code. Tries a series of ADDs or
+/// SUBs first, and uses a constant pool value if the instruction sequence would
+/// be too long. This is allowed to modify the condition flags.
+void llvm::emitThumbRegPlusImmediate(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI,
+ DebugLoc dl,
+ unsigned DestReg, unsigned BaseReg,
+ int NumBytes, const TargetInstrInfo &TII,
+ const ARMBaseRegisterInfo& MRI,
+ unsigned MIFlags) {
+ bool isSub = NumBytes < 0;
+ unsigned Bytes = (unsigned)NumBytes;
+ if (isSub) Bytes = -NumBytes;
+
+ int CopyOpc = 0;
+ unsigned CopyBits = 0;
+ unsigned CopyScale = 1;
+ bool CopyNeedsCC = false;
+ int ExtraOpc = 0;
+ unsigned ExtraBits = 0;
+ unsigned ExtraScale = 1;
+ bool ExtraNeedsCC = false;
+
+ // Strategy:
+ // We need to select two types of instruction, maximizing the available
+ // immediate range of each. The instructions we use will depend on whether
+ // DestReg and BaseReg are low, high or the stack pointer.
+ // * CopyOpc - DestReg = BaseReg + imm
+ // This will be emitted once if DestReg != BaseReg, and never if
+ // DestReg == BaseReg.
+ // * ExtraOpc - DestReg = DestReg + imm
+ // This will be emitted as many times as necessary to add the
+ // full immediate.
+ // If the immediate ranges of these instructions are not large enough to cover
+ // NumBytes with a reasonable number of instructions, we fall back to using a
+ // value loaded from a constant pool.
+ if (DestReg == ARM::SP) {
+ if (BaseReg == ARM::SP) {
+ // sp -> sp
+ // Already in right reg, no copy needed
+ } else {
+ // low -> sp or high -> sp
+ CopyOpc = ARM::tMOVr;
+ CopyBits = 0;
+ }
+ ExtraOpc = isSub ? ARM::tSUBspi : ARM::tADDspi;
+ ExtraBits = 7;
+ ExtraScale = 4;
+ } else if (isARMLowRegister(DestReg)) {
+ if (BaseReg == ARM::SP) {
+ // sp -> low
+ assert(!isSub && "Thumb1 does not have tSUBrSPi");
+ CopyOpc = ARM::tADDrSPi;
+ CopyBits = 8;
+ CopyScale = 4;
+ } else if (DestReg == BaseReg) {
+ // low -> same low
+ // Already in right reg, no copy needed
+ } else if (isARMLowRegister(BaseReg)) {
+ // low -> different low
+ CopyOpc = isSub ? ARM::tSUBi3 : ARM::tADDi3;
+ CopyBits = 3;
+ CopyNeedsCC = true;
+ } else {
+ // high -> low
+ CopyOpc = ARM::tMOVr;
+ CopyBits = 0;
+ }
+ ExtraOpc = isSub ? ARM::tSUBi8 : ARM::tADDi8;
+ ExtraBits = 8;
+ ExtraNeedsCC = true;
+ } else /* DestReg is high */ {
+ if (DestReg == BaseReg) {
+ // high -> same high
+ // Already in right reg, no copy needed
+ } else {
+ // {low,high,sp} -> high
+ CopyOpc = ARM::tMOVr;
+ CopyBits = 0;
+ }
+ ExtraOpc = 0;
+ }
+
+ // We could handle an unaligned immediate with an unaligned copy instruction
+ // and an aligned extra instruction, but this case is not currently needed.
+ assert(((Bytes & 3) == 0 || ExtraScale == 1) &&
+ "Unaligned offset, but all instructions require alignment");
+
+ unsigned CopyRange = ((1 << CopyBits) - 1) * CopyScale;
+ // If we would emit the copy with an immediate of 0, just use tMOVr.
+ if (CopyOpc && Bytes < CopyScale) {
+ CopyOpc = ARM::tMOVr;
+ CopyScale = 1;
+ CopyNeedsCC = false;
+ CopyRange = 0;
+ }
+ unsigned ExtraRange = ((1 << ExtraBits) - 1) * ExtraScale; // per instruction
+ unsigned RequiredCopyInstrs = CopyOpc ? 1 : 0;
+ unsigned RangeAfterCopy = (CopyRange > Bytes) ? 0 : (Bytes - CopyRange);
+
+ // We could handle this case when the copy instruction does not require an
+ // aligned immediate, but we do not currently do this.
+ assert(RangeAfterCopy % ExtraScale == 0 &&
+ "Extra instruction requires immediate to be aligned");
+
+ unsigned RequiredExtraInstrs;
+ if (ExtraRange)
+ RequiredExtraInstrs = RoundUpToAlignment(RangeAfterCopy, ExtraRange) / ExtraRange;
+ else if (RangeAfterCopy > 0)
+ // We need an extra instruction but none is available
+ RequiredExtraInstrs = 1000000;
+ else
+ RequiredExtraInstrs = 0;
+ unsigned RequiredInstrs = RequiredCopyInstrs + RequiredExtraInstrs;
+ unsigned Threshold = (DestReg == ARM::SP) ? 3 : 2;
+
+ // Use a constant pool, if the sequence of ADDs/SUBs is too expensive.
+ if (RequiredInstrs > Threshold) {
+ emitThumbRegPlusImmInReg(MBB, MBBI, dl,
+ DestReg, BaseReg, NumBytes, true,
+ TII, MRI, MIFlags);
+ return;
+ }
+
+ // Emit zero or one copy instructions
+ if (CopyOpc) {
+ unsigned CopyImm = std::min(Bytes, CopyRange) / CopyScale;
+ Bytes -= CopyImm * CopyScale;
+
+ MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(CopyOpc), DestReg);
+ if (CopyNeedsCC)
+ MIB = AddDefaultT1CC(MIB);
+ MIB.addReg(BaseReg, RegState::Kill);
+ if (CopyOpc != ARM::tMOVr) {
+ MIB.addImm(CopyImm);
+ }
+ AddDefaultPred(MIB.setMIFlags(MIFlags));
+
+ BaseReg = DestReg;
+ }
+
+ // Emit zero or more in-place add/sub instructions
+ while (Bytes) {
+ unsigned ExtraImm = std::min(Bytes, ExtraRange) / ExtraScale;
+ Bytes -= ExtraImm * ExtraScale;
+
+ MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(ExtraOpc), DestReg);
+ if (ExtraNeedsCC)
+ MIB = AddDefaultT1CC(MIB);
+ MIB.addReg(BaseReg).addImm(ExtraImm);
+ MIB = AddDefaultPred(MIB);
+ MIB.setMIFlags(MIFlags);
+ }
+}
+
+static void removeOperands(MachineInstr &MI, unsigned i) {
+ unsigned Op = i;
+ for (unsigned e = MI.getNumOperands(); i != e; ++i)
+ MI.RemoveOperand(Op);
+}
+
+/// convertToNonSPOpcode - Change the opcode to the non-SP version, because
+/// we're replacing the frame index with a non-SP register.
+static unsigned convertToNonSPOpcode(unsigned Opcode) {
+ switch (Opcode) {
+ case ARM::tLDRspi:
+ return ARM::tLDRi;
+
+ case ARM::tSTRspi:
+ return ARM::tSTRi;
+ }
+
+ return Opcode;
+}
+
+bool ThumbRegisterInfo::rewriteFrameIndex(MachineBasicBlock::iterator II,
+ unsigned FrameRegIdx,
+ unsigned FrameReg, int &Offset,
+ const ARMBaseInstrInfo &TII) const {
+ MachineInstr &MI = *II;
+ MachineBasicBlock &MBB = *MI.getParent();
+ assert(MBB.getParent()->getSubtarget<ARMSubtarget>().isThumb1Only() &&
+ "This isn't needed for thumb2!");
+ DebugLoc dl = MI.getDebugLoc();
+ MachineInstrBuilder MIB(*MBB.getParent(), &MI);
+ unsigned Opcode = MI.getOpcode();
+ const MCInstrDesc &Desc = MI.getDesc();
+ unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask);
+
+ if (Opcode == ARM::tADDframe) {
+ Offset += MI.getOperand(FrameRegIdx+1).getImm();
+ unsigned DestReg = MI.getOperand(0).getReg();
+
+ emitThumbRegPlusImmediate(MBB, II, dl, DestReg, FrameReg, Offset, TII,
+ *this);
+ MBB.erase(II);
+ return true;
+ } else {
+ if (AddrMode != ARMII::AddrModeT1_s)
+ llvm_unreachable("Unsupported addressing mode!");
+
+ unsigned ImmIdx = FrameRegIdx + 1;
+ int InstrOffs = MI.getOperand(ImmIdx).getImm();
+ unsigned NumBits = (FrameReg == ARM::SP) ? 8 : 5;
+ unsigned Scale = 4;
+
+ Offset += InstrOffs * Scale;
+ assert((Offset & (Scale - 1)) == 0 && "Can't encode this offset!");
+
+ // Common case: small offset, fits into instruction.
+ MachineOperand &ImmOp = MI.getOperand(ImmIdx);
+ int ImmedOffset = Offset / Scale;
+ unsigned Mask = (1 << NumBits) - 1;
+
+ if ((unsigned)Offset <= Mask * Scale) {
+ // Replace the FrameIndex with the frame register (e.g., sp).
+ MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
+ ImmOp.ChangeToImmediate(ImmedOffset);
+
+ // If we're using a register where sp was stored, convert the instruction
+ // to the non-SP version.
+ unsigned NewOpc = convertToNonSPOpcode(Opcode);
+ if (NewOpc != Opcode && FrameReg != ARM::SP)
+ MI.setDesc(TII.get(NewOpc));
+
+ return true;
+ }
+
+ NumBits = 5;
+ Mask = (1 << NumBits) - 1;
+
+ // If this is a thumb spill / restore, we will be using a constpool load to
+ // materialize the offset.
+ if (Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi) {
+ ImmOp.ChangeToImmediate(0);
+ } else {
+ // Otherwise, it didn't fit. Pull in what we can to simplify the immed.
+ ImmedOffset = ImmedOffset & Mask;
+ ImmOp.ChangeToImmediate(ImmedOffset);
+ Offset &= ~(Mask * Scale);
+ }
+ }
+
+ return Offset == 0;
+}
+
+void ThumbRegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg,
+ int64_t Offset) const {
+ const MachineFunction &MF = *MI.getParent()->getParent();
+ const ARMSubtarget &STI = MF.getSubtarget<ARMSubtarget>();
+ if (!STI.isThumb1Only())
+ return ARMBaseRegisterInfo::resolveFrameIndex(MI, BaseReg, Offset);
+
+ const ARMBaseInstrInfo &TII = *STI.getInstrInfo();
+ int Off = Offset; // ARM doesn't need the general 64-bit offsets
+ unsigned i = 0;
+
+ while (!MI.getOperand(i).isFI()) {
+ ++i;
+ assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!");
+ }
+ bool Done = rewriteFrameIndex(MI, i, BaseReg, Off, TII);
+ assert (Done && "Unable to resolve frame index!");
+ (void)Done;
+}
+
+/// saveScavengerRegister - Spill the register so it can be used by the
+/// register scavenger. Return true.
+bool ThumbRegisterInfo::saveScavengerRegister(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+ MachineBasicBlock::iterator &UseMI, const TargetRegisterClass *RC,
+ unsigned Reg) const {
+
+ const ARMSubtarget &STI = MBB.getParent()->getSubtarget<ARMSubtarget>();
+ if (!STI.isThumb1Only())
+ return ARMBaseRegisterInfo::saveScavengerRegister(MBB, I, UseMI, RC, Reg);
+
+ // Thumb1 can't use the emergency spill slot on the stack because
+ // ldr/str immediate offsets must be positive, and if we're referencing
+ // off the frame pointer (if, for example, there are alloca() calls in
+ // the function, the offset will be negative. Use R12 instead since that's
+ // a call clobbered register that we know won't be used in Thumb1 mode.
+ const TargetInstrInfo &TII = *STI.getInstrInfo();
+ DebugLoc DL;
+ AddDefaultPred(BuildMI(MBB, I, DL, TII.get(ARM::tMOVr))
+ .addReg(ARM::R12, RegState::Define)
+ .addReg(Reg, RegState::Kill));
+
+ // The UseMI is where we would like to restore the register. If there's
+ // interference with R12 before then, however, we'll need to restore it
+ // before that instead and adjust the UseMI.
+ bool done = false;
+ for (MachineBasicBlock::iterator II = I; !done && II != UseMI ; ++II) {
+ if (II->isDebugValue())
+ continue;
+ // If this instruction affects R12, adjust our restore point.
+ for (unsigned i = 0, e = II->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = II->getOperand(i);
+ if (MO.isRegMask() && MO.clobbersPhysReg(ARM::R12)) {
+ UseMI = II;
+ done = true;
+ break;
+ }
+ if (!MO.isReg() || MO.isUndef() || !MO.getReg() ||
+ TargetRegisterInfo::isVirtualRegister(MO.getReg()))
+ continue;
+ if (MO.getReg() == ARM::R12) {
+ UseMI = II;
+ done = true;
+ break;
+ }
+ }
+ }
+ // Restore the register from R12
+ AddDefaultPred(BuildMI(MBB, UseMI, DL, TII.get(ARM::tMOVr)).
+ addReg(Reg, RegState::Define).addReg(ARM::R12, RegState::Kill));
+
+ return true;
+}
+
+void ThumbRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
+ int SPAdj, unsigned FIOperandNum,
+ RegScavenger *RS) const {
+ MachineInstr &MI = *II;
+ MachineBasicBlock &MBB = *MI.getParent();
+ MachineFunction &MF = *MBB.getParent();
+ const ARMSubtarget &STI = MF.getSubtarget<ARMSubtarget>();
+ if (!STI.isThumb1Only())
+ return ARMBaseRegisterInfo::eliminateFrameIndex(II, SPAdj, FIOperandNum,
+ RS);
+
+ unsigned VReg = 0;
+ const ARMBaseInstrInfo &TII = *STI.getInstrInfo();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ DebugLoc dl = MI.getDebugLoc();
+ MachineInstrBuilder MIB(*MBB.getParent(), &MI);
+
+ unsigned FrameReg = ARM::SP;
+ int FrameIndex = MI.getOperand(FIOperandNum).getIndex();
+ int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex) +
+ MF.getFrameInfo()->getStackSize() + SPAdj;
+
+ if (MF.getFrameInfo()->hasVarSizedObjects()) {
+ assert(SPAdj == 0 && STI.getFrameLowering()->hasFP(MF) && "Unexpected");
+ // There are alloca()'s in this function, must reference off the frame
+ // pointer or base pointer instead.
+ if (!hasBasePointer(MF)) {
+ FrameReg = getFrameRegister(MF);
+ Offset -= AFI->getFramePtrSpillOffset();
+ } else
+ FrameReg = BasePtr;
+ }
+
+ // PEI::scavengeFrameVirtualRegs() cannot accurately track SPAdj because the
+ // call frame setup/destroy instructions have already been eliminated. That
+ // means the stack pointer cannot be used to access the emergency spill slot
+ // when !hasReservedCallFrame().
+#ifndef NDEBUG
+ if (RS && FrameReg == ARM::SP && RS->isScavengingFrameIndex(FrameIndex)){
+ assert(STI.getFrameLowering()->hasReservedCallFrame(MF) &&
+ "Cannot use SP to access the emergency spill slot in "
+ "functions without a reserved call frame");
+ assert(!MF.getFrameInfo()->hasVarSizedObjects() &&
+ "Cannot use SP to access the emergency spill slot in "
+ "functions with variable sized frame objects");
+ }
+#endif // NDEBUG
+
+ // Special handling of dbg_value instructions.
+ if (MI.isDebugValue()) {
+ MI.getOperand(FIOperandNum). ChangeToRegister(FrameReg, false /*isDef*/);
+ MI.getOperand(FIOperandNum+1).ChangeToImmediate(Offset);
+ return;
+ }
+
+ // Modify MI as necessary to handle as much of 'Offset' as possible
+ assert(AFI->isThumbFunction() &&
+ "This eliminateFrameIndex only supports Thumb1!");
+ if (rewriteFrameIndex(MI, FIOperandNum, FrameReg, Offset, TII))
+ return;
+
+ // If we get here, the immediate doesn't fit into the instruction. We folded
+ // as much as possible above, handle the rest, providing a register that is
+ // SP+LargeImm.
+ assert(Offset && "This code isn't needed if offset already handled!");
+
+ unsigned Opcode = MI.getOpcode();
+
+ // Remove predicate first.
+ int PIdx = MI.findFirstPredOperandIdx();
+ if (PIdx != -1)
+ removeOperands(MI, PIdx);
+
+ if (MI.mayLoad()) {
+ // Use the destination register to materialize sp + offset.
+ unsigned TmpReg = MI.getOperand(0).getReg();
+ bool UseRR = false;
+ if (Opcode == ARM::tLDRspi) {
+ if (FrameReg == ARM::SP)
+ emitThumbRegPlusImmInReg(MBB, II, dl, TmpReg, FrameReg,
+ Offset, false, TII, *this);
+ else {
+ emitLoadConstPool(MBB, II, dl, TmpReg, 0, Offset);
+ UseRR = true;
+ }
+ } else {
+ emitThumbRegPlusImmediate(MBB, II, dl, TmpReg, FrameReg, Offset, TII,
+ *this);
+ }
+
+ MI.setDesc(TII.get(UseRR ? ARM::tLDRr : ARM::tLDRi));
+ MI.getOperand(FIOperandNum).ChangeToRegister(TmpReg, false, false, true);
+ if (UseRR)
+ // Use [reg, reg] addrmode. Replace the immediate operand w/ the frame
+ // register. The offset is already handled in the vreg value.
+ MI.getOperand(FIOperandNum+1).ChangeToRegister(FrameReg, false, false,
+ false);
+ } else if (MI.mayStore()) {
+ VReg = MF.getRegInfo().createVirtualRegister(&ARM::tGPRRegClass);
+ bool UseRR = false;
+
+ if (Opcode == ARM::tSTRspi) {
+ if (FrameReg == ARM::SP)
+ emitThumbRegPlusImmInReg(MBB, II, dl, VReg, FrameReg,
+ Offset, false, TII, *this);
+ else {
+ emitLoadConstPool(MBB, II, dl, VReg, 0, Offset);
+ UseRR = true;
+ }
+ } else
+ emitThumbRegPlusImmediate(MBB, II, dl, VReg, FrameReg, Offset, TII,
+ *this);
+ MI.setDesc(TII.get(UseRR ? ARM::tSTRr : ARM::tSTRi));
+ MI.getOperand(FIOperandNum).ChangeToRegister(VReg, false, false, true);
+ if (UseRR)
+ // Use [reg, reg] addrmode. Replace the immediate operand w/ the frame
+ // register. The offset is already handled in the vreg value.
+ MI.getOperand(FIOperandNum+1).ChangeToRegister(FrameReg, false, false,
+ false);
+ } else {
+ llvm_unreachable("Unexpected opcode!");
+ }
+
+ // Add predicate back if it's needed.
+ if (MI.isPredicable())
+ AddDefaultPred(MIB);
+}
diff --git a/contrib/llvm/lib/Target/ARM/ThumbRegisterInfo.h b/contrib/llvm/lib/Target/ARM/ThumbRegisterInfo.h
new file mode 100644
index 0000000..23aaff3
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ThumbRegisterInfo.h
@@ -0,0 +1,65 @@
+//===- ThumbRegisterInfo.h - Thumb Register Information Impl -*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Thumb implementation of the TargetRegisterInfo
+// class. With the exception of emitLoadConstPool Thumb2 tracks
+// ARMBaseRegisterInfo, Thumb1 overloads the functions below.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_ARM_THUMB1REGISTERINFO_H
+#define LLVM_LIB_TARGET_ARM_THUMB1REGISTERINFO_H
+
+#include "ARMBaseRegisterInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+namespace llvm {
+ class ARMSubtarget;
+ class ARMBaseInstrInfo;
+
+struct ThumbRegisterInfo : public ARMBaseRegisterInfo {
+public:
+ ThumbRegisterInfo();
+
+ const TargetRegisterClass *
+ getLargestLegalSuperClass(const TargetRegisterClass *RC,
+ const MachineFunction &MF) const override;
+
+ const TargetRegisterClass *
+ getPointerRegClass(const MachineFunction &MF,
+ unsigned Kind = 0) const override;
+
+ /// emitLoadConstPool - Emits a load from constpool to materialize the
+ /// specified immediate.
+ void
+ emitLoadConstPool(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
+ DebugLoc dl, unsigned DestReg, unsigned SubIdx, int Val,
+ ARMCC::CondCodes Pred = ARMCC::AL, unsigned PredReg = 0,
+ unsigned MIFlags = MachineInstr::NoFlags) const override;
+
+ // rewrite MI to access 'Offset' bytes from the FP. Update Offset to be
+ // however much remains to be handled. Return 'true' if no further
+ // work is required.
+ bool rewriteFrameIndex(MachineBasicBlock::iterator II, unsigned FrameRegIdx,
+ unsigned FrameReg, int &Offset,
+ const ARMBaseInstrInfo &TII) const;
+ void resolveFrameIndex(MachineInstr &MI, unsigned BaseReg,
+ int64_t Offset) const override;
+ bool saveScavengerRegister(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ MachineBasicBlock::iterator &UseMI,
+ const TargetRegisterClass *RC,
+ unsigned Reg) const override;
+ void eliminateFrameIndex(MachineBasicBlock::iterator II,
+ int SPAdj, unsigned FIOperandNum,
+ RegScavenger *RS = nullptr) const override;
+};
+}
+
+#endif
OpenPOWER on IntegriCloud