summaryrefslogtreecommitdiffstats
path: root/lib/Target/AArch64
diff options
context:
space:
mode:
authordim <dim@FreeBSD.org>2015-05-27 18:44:32 +0000
committerdim <dim@FreeBSD.org>2015-05-27 18:44:32 +0000
commit782067d0278612ee75d024b9b135c221c327e9e8 (patch)
treea6140557876943cdd800ee997c9317283394b22c /lib/Target/AArch64
parent6669eceb008a9f13853b330dc0b099d6386fe879 (diff)
downloadFreeBSD-src-782067d0278612ee75d024b9b135c221c327e9e8.zip
FreeBSD-src-782067d0278612ee75d024b9b135c221c327e9e8.tar.gz
Vendor import of llvm trunk r238337:
https://llvm.org/svn/llvm-project/llvm/trunk@238337
Diffstat (limited to 'lib/Target/AArch64')
-rw-r--r--lib/Target/AArch64/AArch64.h3
-rw-r--r--lib/Target/AArch64/AArch64.td11
-rw-r--r--lib/Target/AArch64/AArch64A53Fix835769.cpp16
-rw-r--r--lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp56
-rw-r--r--lib/Target/AArch64/AArch64AddressTypePromotion.cpp1
-rw-r--r--lib/Target/AArch64/AArch64AdvSIMDScalarPass.cpp16
-rw-r--r--lib/Target/AArch64/AArch64AsmPrinter.cpp118
-rw-r--r--lib/Target/AArch64/AArch64BranchRelaxation.cpp4
-rw-r--r--lib/Target/AArch64/AArch64CallingConvention.h2
-rw-r--r--lib/Target/AArch64/AArch64CallingConvention.td2
-rw-r--r--lib/Target/AArch64/AArch64CleanupLocalDynamicTLSPass.cpp8
-rw-r--r--lib/Target/AArch64/AArch64CollectLOH.cpp26
-rw-r--r--lib/Target/AArch64/AArch64ConditionOptimizer.cpp16
-rw-r--r--lib/Target/AArch64/AArch64ConditionalCompares.cpp8
-rw-r--r--lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp13
-rw-r--r--lib/Target/AArch64/AArch64FastISel.cpp110
-rw-r--r--lib/Target/AArch64/AArch64FrameLowering.cpp260
-rw-r--r--lib/Target/AArch64/AArch64FrameLowering.h4
-rw-r--r--lib/Target/AArch64/AArch64ISelDAGToDAG.cpp311
-rw-r--r--lib/Target/AArch64/AArch64ISelLowering.cpp1140
-rw-r--r--lib/Target/AArch64/AArch64ISelLowering.h47
-rw-r--r--lib/Target/AArch64/AArch64InstrFormats.td228
-rw-r--r--lib/Target/AArch64/AArch64InstrInfo.cpp29
-rw-r--r--lib/Target/AArch64/AArch64InstrInfo.h7
-rw-r--r--lib/Target/AArch64/AArch64InstrInfo.td415
-rw-r--r--lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp224
-rw-r--r--lib/Target/AArch64/AArch64MCInstLower.cpp10
-rw-r--r--lib/Target/AArch64/AArch64PBQPRegAlloc.cpp8
-rw-r--r--lib/Target/AArch64/AArch64PromoteConstant.cpp157
-rw-r--r--lib/Target/AArch64/AArch64RegisterInfo.cpp77
-rw-r--r--lib/Target/AArch64/AArch64RegisterInfo.h22
-rw-r--r--lib/Target/AArch64/AArch64SchedA57.td20
-rw-r--r--lib/Target/AArch64/AArch64SelectionDAGInfo.cpp9
-rw-r--r--lib/Target/AArch64/AArch64StorePairSuppress.cpp19
-rw-r--r--lib/Target/AArch64/AArch64Subtarget.cpp16
-rw-r--r--lib/Target/AArch64/AArch64Subtarget.h11
-rw-r--r--lib/Target/AArch64/AArch64TargetMachine.cpp62
-rw-r--r--lib/Target/AArch64/AArch64TargetMachine.h9
-rw-r--r--lib/Target/AArch64/AArch64TargetObjectFile.cpp23
-rw-r--r--lib/Target/AArch64/AArch64TargetObjectFile.h7
-rw-r--r--lib/Target/AArch64/AArch64TargetTransformInfo.cpp327
-rw-r--r--lib/Target/AArch64/AArch64TargetTransformInfo.h147
-rw-r--r--lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp402
-rw-r--r--lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp123
-rw-r--r--lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.cpp6
-rw-r--r--lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp112
-rw-r--r--lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h131
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64AddressingModes.h6
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp66
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp8
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp54
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.h4
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp6
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h3
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp31
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp3
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h4
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp140
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h29
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp161
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp9
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.h42
-rw-r--r--lib/Target/AArch64/Utils/AArch64BaseInfo.cpp1510
-rw-r--r--lib/Target/AArch64/Utils/AArch64BaseInfo.h125
64 files changed, 4152 insertions, 2822 deletions
diff --git a/lib/Target/AArch64/AArch64.h b/lib/Target/AArch64/AArch64.h
index e96d18b..21106c9 100644
--- a/lib/Target/AArch64/AArch64.h
+++ b/lib/Target/AArch64/AArch64.h
@@ -40,9 +40,6 @@ FunctionPass *createAArch64ConditionOptimizerPass();
FunctionPass *createAArch64AddressTypePromotionPass();
FunctionPass *createAArch64A57FPLoadBalancing();
FunctionPass *createAArch64A53Fix835769();
-/// \brief Creates an ARM-specific Target Transformation Info pass.
-ImmutablePass *
-createAArch64TargetTransformInfoPass(const AArch64TargetMachine *TM);
FunctionPass *createAArch64CleanupLocalDynamicTLSPass();
diff --git a/lib/Target/AArch64/AArch64.td b/lib/Target/AArch64/AArch64.td
index e6a27c3..9a7d6c8 100644
--- a/lib/Target/AArch64/AArch64.td
+++ b/lib/Target/AArch64/AArch64.td
@@ -41,6 +41,13 @@ def FeatureZCZeroing : SubtargetFeature<"zcz", "HasZeroCycleZeroing", "true",
"Has zero-cycle zeroing instructions">;
//===----------------------------------------------------------------------===//
+// Architectures.
+//
+
+def HasV8_1aOps : SubtargetFeature<"v8.1a", "HasV8_1aOps", "true",
+ "Support ARM v8.1a instructions", [FeatureCRC]>;
+
+//===----------------------------------------------------------------------===//
// Register File Description
//===----------------------------------------------------------------------===//
@@ -91,6 +98,8 @@ def : ProcessorModel<"generic", NoSchedModel, [FeatureFPARMv8,
def : ProcessorModel<"cortex-a53", CortexA53Model, [ProcA53]>;
def : ProcessorModel<"cortex-a57", CortexA57Model, [ProcA57]>;
+// FIXME: Cortex-A72 is currently modelled as an Cortex-A57.
+def : ProcessorModel<"cortex-a72", CortexA57Model, [ProcA57]>;
def : ProcessorModel<"cyclone", CycloneModel, [ProcCyclone]>;
//===----------------------------------------------------------------------===//
@@ -114,12 +123,14 @@ def AppleAsmParserVariant : AsmParserVariant {
// AsmWriter bits get associated with the correct class.
def GenericAsmWriter : AsmWriter {
string AsmWriterClassName = "InstPrinter";
+ int PassSubtarget = 1;
int Variant = 0;
bit isMCAsmWriter = 1;
}
def AppleAsmWriter : AsmWriter {
let AsmWriterClassName = "AppleInstPrinter";
+ int PassSubtarget = 1;
int Variant = 1;
int isMCAsmWriter = 1;
}
diff --git a/lib/Target/AArch64/AArch64A53Fix835769.cpp b/lib/Target/AArch64/AArch64A53Fix835769.cpp
index 852a635..d7ef3f4 100644
--- a/lib/Target/AArch64/AArch64A53Fix835769.cpp
+++ b/lib/Target/AArch64/AArch64A53Fix835769.cpp
@@ -16,8 +16,6 @@
//===----------------------------------------------------------------------===//
#include "AArch64.h"
-#include "AArch64InstrInfo.h"
-#include "AArch64Subtarget.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
@@ -26,6 +24,8 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
using namespace llvm;
@@ -48,7 +48,7 @@ static bool isFirstInstructionInSequence(MachineInstr *MI) {
case AArch64::PRFUMi:
return true;
default:
- return (MI->mayLoad() || MI->mayStore());
+ return MI->mayLoadOrStore();
}
}
@@ -79,7 +79,7 @@ static bool isSecondInstructionInSequence(MachineInstr *MI) {
namespace {
class AArch64A53Fix835769 : public MachineFunctionPass {
- const AArch64InstrInfo *TII;
+ const TargetInstrInfo *TII;
public:
static char ID;
@@ -107,17 +107,13 @@ char AArch64A53Fix835769::ID = 0;
bool
AArch64A53Fix835769::runOnMachineFunction(MachineFunction &F) {
- const TargetMachine &TM = F.getTarget();
-
- bool Changed = false;
DEBUG(dbgs() << "***** AArch64A53Fix835769 *****\n");
-
- TII = TM.getSubtarget<AArch64Subtarget>().getInstrInfo();
+ bool Changed = false;
+ TII = F.getSubtarget().getInstrInfo();
for (auto &MBB : F) {
Changed |= runOnBasicBlock(MBB);
}
-
return Changed;
}
diff --git a/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp b/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp
index dd1a1ea..bffd9e6 100644
--- a/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp
+++ b/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp
@@ -96,6 +96,10 @@ static bool isMla(MachineInstr *MI) {
}
}
+namespace llvm {
+static void initializeAArch64A57FPLoadBalancingPass(PassRegistry &);
+}
+
//===----------------------------------------------------------------------===//
namespace {
@@ -109,14 +113,15 @@ static const char *ColorNames[2] = { "Even", "Odd" };
class Chain;
class AArch64A57FPLoadBalancing : public MachineFunctionPass {
- const AArch64InstrInfo *TII;
MachineRegisterInfo *MRI;
const TargetRegisterInfo *TRI;
RegisterClassInfo RCI;
public:
static char ID;
- explicit AArch64A57FPLoadBalancing() : MachineFunctionPass(ID) {}
+ explicit AArch64A57FPLoadBalancing() : MachineFunctionPass(ID) {
+ initializeAArch64A57FPLoadBalancingPass(*PassRegistry::getPassRegistry());
+ }
bool runOnMachineFunction(MachineFunction &F) override;
@@ -137,14 +142,22 @@ private:
int scavengeRegister(Chain *G, Color C, MachineBasicBlock &MBB);
void scanInstruction(MachineInstr *MI, unsigned Idx,
std::map<unsigned, Chain*> &Active,
- std::set<std::unique_ptr<Chain>> &AllChains);
+ std::vector<std::unique_ptr<Chain>> &AllChains);
void maybeKillChain(MachineOperand &MO, unsigned Idx,
std::map<unsigned, Chain*> &RegChains);
Color getColor(unsigned Register);
Chain *getAndEraseNext(Color PreferredColor, std::vector<Chain*> &L);
};
+}
+
char AArch64A57FPLoadBalancing::ID = 0;
+INITIALIZE_PASS_BEGIN(AArch64A57FPLoadBalancing, DEBUG_TYPE,
+ "AArch64 A57 FP Load-Balancing", false, false)
+INITIALIZE_PASS_END(AArch64A57FPLoadBalancing, DEBUG_TYPE,
+ "AArch64 A57 FP Load-Balancing", false, false)
+
+namespace {
/// A Chain is a sequence of instructions that are linked together by
/// an accumulation operand. For example:
///
@@ -259,7 +272,7 @@ public:
}
/// Return true if this chain starts before Other.
- bool startsBefore(Chain *Other) {
+ bool startsBefore(const Chain *Other) const {
return StartInstIdx < Other->StartInstIdx;
}
@@ -274,12 +287,12 @@ public:
raw_string_ostream OS(S);
OS << "{";
- StartInst->print(OS, NULL, true);
+ StartInst->print(OS, /* SkipOpers= */true);
OS << " -> ";
- LastInst->print(OS, NULL, true);
+ LastInst->print(OS, /* SkipOpers= */true);
if (KillInst) {
OS << " (kill @ ";
- KillInst->print(OS, NULL, true);
+ KillInst->print(OS, /* SkipOpers= */true);
OS << ")";
}
OS << "}";
@@ -294,13 +307,16 @@ public:
//===----------------------------------------------------------------------===//
bool AArch64A57FPLoadBalancing::runOnMachineFunction(MachineFunction &F) {
+ // Don't do anything if this isn't an A53 or A57.
+ if (!(F.getSubtarget<AArch64Subtarget>().isCortexA53() ||
+ F.getSubtarget<AArch64Subtarget>().isCortexA57()))
+ return false;
+
bool Changed = false;
DEBUG(dbgs() << "***** AArch64A57FPLoadBalancing *****\n");
- const TargetMachine &TM = F.getTarget();
MRI = &F.getRegInfo();
TRI = F.getRegInfo().getTargetRegisterInfo();
- TII = TM.getSubtarget<AArch64Subtarget>().getInstrInfo();
RCI.runOnMachineFunction(F);
for (auto &MBB : F) {
@@ -320,7 +336,7 @@ bool AArch64A57FPLoadBalancing::runOnBasicBlock(MachineBasicBlock &MBB) {
// been killed yet. This is keyed by register - all chains can only have one
// "link" register between each inst in the chain.
std::map<unsigned, Chain*> ActiveChains;
- std::set<std::unique_ptr<Chain>> AllChains;
+ std::vector<std::unique_ptr<Chain>> AllChains;
unsigned Idx = 0;
for (auto &MI : MBB)
scanInstruction(&MI, Idx++, ActiveChains, AllChains);
@@ -431,10 +447,17 @@ bool AArch64A57FPLoadBalancing::colorChainSet(std::vector<Chain*> GV,
// chains that we cannot change before we look at those we can,
// so the parity counter is updated and we know what color we should
// change them to!
+ // Final tie-break with instruction order so pass output is stable (i.e. not
+ // dependent on malloc'd pointer values).
std::sort(GV.begin(), GV.end(), [](const Chain *G1, const Chain *G2) {
if (G1->size() != G2->size())
return G1->size() > G2->size();
- return G1->requiresFixup() > G2->requiresFixup();
+ if (G1->requiresFixup() != G2->requiresFixup())
+ return G1->requiresFixup() > G2->requiresFixup();
+ // Make sure startsBefore() produces a stable final order.
+ assert((G1 == G2 || (G1->startsBefore(G2) ^ G2->startsBefore(G1))) &&
+ "Starts before not total order!");
+ return G1->startsBefore(G2);
});
Color PreferredColor = Parity < 0 ? Color::Even : Color::Odd;
@@ -580,10 +603,9 @@ bool AArch64A57FPLoadBalancing::colorChain(Chain *G, Color C,
return Changed;
}
-void AArch64A57FPLoadBalancing::
-scanInstruction(MachineInstr *MI, unsigned Idx,
- std::map<unsigned, Chain*> &ActiveChains,
- std::set<std::unique_ptr<Chain>> &AllChains) {
+void AArch64A57FPLoadBalancing::scanInstruction(
+ MachineInstr *MI, unsigned Idx, std::map<unsigned, Chain *> &ActiveChains,
+ std::vector<std::unique_ptr<Chain>> &AllChains) {
// Inspect "MI", updating ActiveChains and AllChains.
if (isMul(MI)) {
@@ -602,7 +624,7 @@ scanInstruction(MachineInstr *MI, unsigned Idx,
auto G = llvm::make_unique<Chain>(MI, Idx, getColor(DestReg));
ActiveChains[DestReg] = G.get();
- AllChains.insert(std::move(G));
+ AllChains.push_back(std::move(G));
} else if (isMla(MI)) {
@@ -646,7 +668,7 @@ scanInstruction(MachineInstr *MI, unsigned Idx,
<< TRI->getName(DestReg) << "\n");
auto G = llvm::make_unique<Chain>(MI, Idx, getColor(DestReg));
ActiveChains[DestReg] = G.get();
- AllChains.insert(std::move(G));
+ AllChains.push_back(std::move(G));
} else {
diff --git a/lib/Target/AArch64/AArch64AddressTypePromotion.cpp b/lib/Target/AArch64/AArch64AddressTypePromotion.cpp
index 287989f..716e1a3 100644
--- a/lib/Target/AArch64/AArch64AddressTypePromotion.cpp
+++ b/lib/Target/AArch64/AArch64AddressTypePromotion.cpp
@@ -41,6 +41,7 @@
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
using namespace llvm;
diff --git a/lib/Target/AArch64/AArch64AdvSIMDScalarPass.cpp b/lib/Target/AArch64/AArch64AdvSIMDScalarPass.cpp
index 5afe0f4..18d21fd 100644
--- a/lib/Target/AArch64/AArch64AdvSIMDScalarPass.cpp
+++ b/lib/Target/AArch64/AArch64AdvSIMDScalarPass.cpp
@@ -64,7 +64,7 @@ STATISTIC(NumCopiesInserted, "Number of cross-class copies inserted");
namespace {
class AArch64AdvSIMDScalar : public MachineFunctionPass {
MachineRegisterInfo *MRI;
- const AArch64InstrInfo *TII;
+ const TargetInstrInfo *TII;
private:
// isProfitableToTransform - Predicate function to determine whether an
@@ -158,7 +158,7 @@ static unsigned getSrcFromCopy(const MachineInstr *MI,
// getTransformOpcode - For any opcode for which there is an AdvSIMD equivalent
// that we're considering transforming to, return that AdvSIMD opcode. For all
// others, return the original opcode.
-static int getTransformOpcode(unsigned Opc) {
+static unsigned getTransformOpcode(unsigned Opc) {
switch (Opc) {
default:
break;
@@ -179,7 +179,7 @@ static int getTransformOpcode(unsigned Opc) {
}
static bool isTransformable(const MachineInstr *MI) {
- int Opc = MI->getOpcode();
+ unsigned Opc = MI->getOpcode();
return Opc != getTransformOpcode(Opc);
}
@@ -268,7 +268,7 @@ AArch64AdvSIMDScalar::isProfitableToTransform(const MachineInstr *MI) const {
return TransformAll;
}
-static MachineInstr *insertCopy(const AArch64InstrInfo *TII, MachineInstr *MI,
+static MachineInstr *insertCopy(const TargetInstrInfo *TII, MachineInstr *MI,
unsigned Dst, unsigned Src, bool IsKill) {
MachineInstrBuilder MIB =
BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), TII->get(AArch64::COPY),
@@ -286,8 +286,8 @@ void AArch64AdvSIMDScalar::transformInstruction(MachineInstr *MI) {
DEBUG(dbgs() << "Scalar transform: " << *MI);
MachineBasicBlock *MBB = MI->getParent();
- int OldOpc = MI->getOpcode();
- int NewOpc = getTransformOpcode(OldOpc);
+ unsigned OldOpc = MI->getOpcode();
+ unsigned NewOpc = getTransformOpcode(OldOpc);
assert(OldOpc != NewOpc && "transform an instruction to itself?!");
// Check if we need a copy for the source registers.
@@ -376,10 +376,8 @@ bool AArch64AdvSIMDScalar::runOnMachineFunction(MachineFunction &mf) {
bool Changed = false;
DEBUG(dbgs() << "***** AArch64AdvSIMDScalar *****\n");
- const TargetMachine &TM = mf.getTarget();
MRI = &mf.getRegInfo();
- TII = static_cast<const AArch64InstrInfo *>(
- TM.getSubtargetImpl()->getInstrInfo());
+ TII = mf.getSubtarget().getInstrInfo();
// Just check things on a one-block-at-a-time basis.
for (MachineFunction::iterator I = mf.begin(), E = mf.end(); I != E; ++I)
diff --git a/lib/Target/AArch64/AArch64AsmPrinter.cpp b/lib/Target/AArch64/AArch64AsmPrinter.cpp
index 5159dbf..a0a09e4 100644
--- a/lib/Target/AArch64/AArch64AsmPrinter.cpp
+++ b/lib/Target/AArch64/AArch64AsmPrinter.cpp
@@ -13,13 +13,13 @@
//===----------------------------------------------------------------------===//
#include "MCTargetDesc/AArch64AddressingModes.h"
-#include "MCTargetDesc/AArch64MCExpr.h"
#include "AArch64.h"
#include "AArch64MCInstLower.h"
#include "AArch64MachineFunctionInfo.h"
#include "AArch64RegisterInfo.h"
#include "AArch64Subtarget.h"
#include "InstPrinter/AArch64InstPrinter.h"
+#include "MCTargetDesc/AArch64MCExpr.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/ADT/Twine.h"
@@ -36,8 +36,10 @@
#include "llvm/MC/MCInstBuilder.h"
#include "llvm/MC/MCLinkerOptimizationHint.h"
#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
using namespace llvm;
#define DEBUG_TYPE "asm-printer"
@@ -45,19 +47,13 @@ using namespace llvm;
namespace {
class AArch64AsmPrinter : public AsmPrinter {
- /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
- /// make the right decision when printing asm code for different targets.
- const AArch64Subtarget *Subtarget;
-
AArch64MCInstLower MCInstLowering;
StackMaps SM;
public:
- AArch64AsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
- : AsmPrinter(TM, Streamer),
- Subtarget(&TM.getSubtarget<AArch64Subtarget>()),
- MCInstLowering(OutContext, *this), SM(*this), AArch64FI(nullptr),
- LOHLabelCounter(0) {}
+ AArch64AsmPrinter(TargetMachine &TM, std::unique_ptr<MCStreamer> Streamer)
+ : AsmPrinter(TM, std::move(Streamer)), MCInstLowering(OutContext, *this),
+ SM(*this), AArch64FI(nullptr) {}
const char *getPassName() const override {
return "AArch64 Assembly Printer";
@@ -118,7 +114,6 @@ private:
typedef std::map<const MachineInstr *, MCSymbol *> MInstToMCSymbol;
MInstToMCSymbol LOHInstToLabel;
- unsigned LOHLabelCounter;
};
} // end of anonymous namespace
@@ -126,38 +121,16 @@ private:
//===----------------------------------------------------------------------===//
void AArch64AsmPrinter::EmitEndOfAsmFile(Module &M) {
- if (Subtarget->isTargetMachO()) {
+ Triple TT(TM.getTargetTriple());
+ if (TT.isOSBinFormatMachO()) {
// Funny Darwin hack: This flag tells the linker that no global symbols
// contain code that falls through to other global symbols (e.g. the obvious
// implementation of multiple entry points). If this doesn't occur, the
// linker can safely perform dead code stripping. Since LLVM never
// generates code that does this, it is always safe to set.
- OutStreamer.EmitAssemblerFlag(MCAF_SubsectionsViaSymbols);
+ OutStreamer->EmitAssemblerFlag(MCAF_SubsectionsViaSymbols);
SM.serializeToStackMapSection();
}
-
- // Emit a .data.rel section containing any stubs that were created.
- if (Subtarget->isTargetELF()) {
- const TargetLoweringObjectFileELF &TLOFELF =
- static_cast<const TargetLoweringObjectFileELF &>(getObjFileLowering());
-
- MachineModuleInfoELF &MMIELF = MMI->getObjFileInfo<MachineModuleInfoELF>();
-
- // Output stubs for external and common global variables.
- MachineModuleInfoELF::SymbolListTy Stubs = MMIELF.GetGVStubList();
- if (!Stubs.empty()) {
- OutStreamer.SwitchSection(TLOFELF.getDataRelSection());
- const DataLayout *TD = TM.getSubtargetImpl()->getDataLayout();
-
- for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
- OutStreamer.EmitLabel(Stubs[i].first);
- OutStreamer.EmitSymbolValue(Stubs[i].second.getPointer(),
- TD->getPointerSize(0));
- }
- Stubs.clear();
- }
- }
-
}
MachineLocation
@@ -183,7 +156,7 @@ void AArch64AsmPrinter::EmitLOHs() {
"Label hasn't been inserted for LOH related instruction");
MCArgs.push_back(LabelIt->second);
}
- OutStreamer.EmitLOHDirective(D.getKind(), MCArgs);
+ OutStreamer->EmitLOHDirective(D.getKind(), MCArgs);
MCArgs.clear();
}
}
@@ -199,11 +172,11 @@ MCSymbol *AArch64AsmPrinter::GetCPISymbol(unsigned CPID) const {
// avoid addends on the relocation?), ELF has no such concept and
// uses a normal private symbol.
if (getDataLayout().getLinkerPrivateGlobalPrefix()[0])
- return OutContext.GetOrCreateSymbol(
+ return OutContext.getOrCreateSymbol(
Twine(getDataLayout().getLinkerPrivateGlobalPrefix()) + "CPI" +
Twine(getFunctionNumber()) + "_" + Twine(CPID));
- return OutContext.GetOrCreateSymbol(
+ return OutContext.getOrCreateSymbol(
Twine(getDataLayout().getPrivateGlobalPrefix()) + "CPI" +
Twine(getFunctionNumber()) + "_" + Twine(CPID));
}
@@ -226,6 +199,17 @@ void AArch64AsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNum,
O << '#' << Imm;
break;
}
+ case MachineOperand::MO_GlobalAddress: {
+ const GlobalValue *GV = MO.getGlobal();
+ MCSymbol *Sym = getSymbol(GV);
+
+ // FIXME: Can we get anything other than a plain symbol here?
+ assert(!MO.getTargetFlags() && "Unknown operand target flag!");
+
+ O << *Sym;
+ printOffset(MO.getOffset(), O);
+ break;
+ }
}
}
@@ -254,8 +238,8 @@ bool AArch64AsmPrinter::printAsmRegInClass(const MachineOperand &MO,
const TargetRegisterClass *RC,
bool isVector, raw_ostream &O) {
assert(MO.isReg() && "Should only get here with a register!");
- const AArch64RegisterInfo *RI = static_cast<const AArch64RegisterInfo *>(
- TM.getSubtargetImpl()->getRegisterInfo());
+ const AArch64RegisterInfo *RI =
+ MF->getSubtarget<AArch64Subtarget>().getRegisterInfo();
unsigned Reg = MO.getReg();
unsigned RegToPrint = RC->getRegister(RI->getEncodingValue(Reg));
assert(RI->regsOverlap(RegToPrint, Reg));
@@ -364,8 +348,8 @@ void AArch64AsmPrinter::PrintDebugValueComment(const MachineInstr *MI,
assert(NOps == 4);
OS << '\t' << MAI->getCommentString() << "DEBUG_VALUE: ";
// cast away const; DIetc do not take const operands for some reason.
- DIVariable V(const_cast<MDNode *>(MI->getOperand(NOps - 1).getMetadata()));
- OS << V.getName();
+ OS << cast<DILocalVariable>(MI->getOperand(NOps - 2).getMetadata())
+ ->getName();
OS << " <- ";
// Frame address. Currently handles register +- offset only.
assert(MI->getOperand(0).isReg() && MI->getOperand(1).isImm());
@@ -452,15 +436,15 @@ void AArch64AsmPrinter::LowerPATCHPOINT(MCStreamer &OutStreamer, StackMaps &SM,
void AArch64AsmPrinter::EmitInstruction(const MachineInstr *MI) {
// Do any auto-generated pseudo lowerings.
- if (emitPseudoExpansionLowering(OutStreamer, MI))
+ if (emitPseudoExpansionLowering(*OutStreamer, MI))
return;
if (AArch64FI->getLOHRelated().count(MI)) {
// Generate a label for LOH related instruction
- MCSymbol *LOHLabel = GetTempSymbol("loh", LOHLabelCounter++);
+ MCSymbol *LOHLabel = createTempSymbol("loh");
// Associate the instruction with the label
LOHInstToLabel[MI] = LOHLabel;
- OutStreamer.EmitLabel(LOHLabel);
+ OutStreamer->EmitLabel(LOHLabel);
}
// Do any manual lowerings.
@@ -468,11 +452,11 @@ void AArch64AsmPrinter::EmitInstruction(const MachineInstr *MI) {
default:
break;
case AArch64::DBG_VALUE: {
- if (isVerbose() && OutStreamer.hasRawTextSupport()) {
+ if (isVerbose() && OutStreamer->hasRawTextSupport()) {
SmallString<128> TmpStr;
raw_svector_ostream OS(TmpStr);
PrintDebugValueComment(MI, OS);
- OutStreamer.EmitRawText(StringRef(OS.str()));
+ OutStreamer->EmitRawText(StringRef(OS.str()));
}
return;
}
@@ -483,8 +467,8 @@ void AArch64AsmPrinter::EmitInstruction(const MachineInstr *MI) {
case AArch64::TCRETURNri: {
MCInst TmpInst;
TmpInst.setOpcode(AArch64::BR);
- TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
- EmitToStreamer(OutStreamer, TmpInst);
+ TmpInst.addOperand(MCOperand::createReg(MI->getOperand(0).getReg()));
+ EmitToStreamer(*OutStreamer, TmpInst);
return;
}
case AArch64::TCRETURNdi: {
@@ -493,7 +477,7 @@ void AArch64AsmPrinter::EmitInstruction(const MachineInstr *MI) {
MCInst TmpInst;
TmpInst.setOpcode(AArch64::B);
TmpInst.addOperand(Dest);
- EmitToStreamer(OutStreamer, TmpInst);
+ EmitToStreamer(*OutStreamer, TmpInst);
return;
}
case AArch64::TLSDESC_CALLSEQ: {
@@ -516,52 +500,52 @@ void AArch64AsmPrinter::EmitInstruction(const MachineInstr *MI) {
MCInst Adrp;
Adrp.setOpcode(AArch64::ADRP);
- Adrp.addOperand(MCOperand::CreateReg(AArch64::X0));
+ Adrp.addOperand(MCOperand::createReg(AArch64::X0));
Adrp.addOperand(SymTLSDesc);
- EmitToStreamer(OutStreamer, Adrp);
+ EmitToStreamer(*OutStreamer, Adrp);
MCInst Ldr;
Ldr.setOpcode(AArch64::LDRXui);
- Ldr.addOperand(MCOperand::CreateReg(AArch64::X1));
- Ldr.addOperand(MCOperand::CreateReg(AArch64::X0));
+ Ldr.addOperand(MCOperand::createReg(AArch64::X1));
+ Ldr.addOperand(MCOperand::createReg(AArch64::X0));
Ldr.addOperand(SymTLSDescLo12);
- Ldr.addOperand(MCOperand::CreateImm(0));
- EmitToStreamer(OutStreamer, Ldr);
+ Ldr.addOperand(MCOperand::createImm(0));
+ EmitToStreamer(*OutStreamer, Ldr);
MCInst Add;
Add.setOpcode(AArch64::ADDXri);
- Add.addOperand(MCOperand::CreateReg(AArch64::X0));
- Add.addOperand(MCOperand::CreateReg(AArch64::X0));
+ Add.addOperand(MCOperand::createReg(AArch64::X0));
+ Add.addOperand(MCOperand::createReg(AArch64::X0));
Add.addOperand(SymTLSDescLo12);
- Add.addOperand(MCOperand::CreateImm(AArch64_AM::getShiftValue(0)));
- EmitToStreamer(OutStreamer, Add);
+ Add.addOperand(MCOperand::createImm(AArch64_AM::getShiftValue(0)));
+ EmitToStreamer(*OutStreamer, Add);
// Emit a relocation-annotation. This expands to no code, but requests
// the following instruction gets an R_AARCH64_TLSDESC_CALL.
MCInst TLSDescCall;
TLSDescCall.setOpcode(AArch64::TLSDESCCALL);
TLSDescCall.addOperand(Sym);
- EmitToStreamer(OutStreamer, TLSDescCall);
+ EmitToStreamer(*OutStreamer, TLSDescCall);
MCInst Blr;
Blr.setOpcode(AArch64::BLR);
- Blr.addOperand(MCOperand::CreateReg(AArch64::X1));
- EmitToStreamer(OutStreamer, Blr);
+ Blr.addOperand(MCOperand::createReg(AArch64::X1));
+ EmitToStreamer(*OutStreamer, Blr);
return;
}
case TargetOpcode::STACKMAP:
- return LowerSTACKMAP(OutStreamer, SM, *MI);
+ return LowerSTACKMAP(*OutStreamer, SM, *MI);
case TargetOpcode::PATCHPOINT:
- return LowerPATCHPOINT(OutStreamer, SM, *MI);
+ return LowerPATCHPOINT(*OutStreamer, SM, *MI);
}
// Finally, do the automated lowerings for everything else.
MCInst TmpInst;
MCInstLowering.Lower(MI, TmpInst);
- EmitToStreamer(OutStreamer, TmpInst);
+ EmitToStreamer(*OutStreamer, TmpInst);
}
// Force static initialization.
diff --git a/lib/Target/AArch64/AArch64BranchRelaxation.cpp b/lib/Target/AArch64/AArch64BranchRelaxation.cpp
index e2b6367..d973234 100644
--- a/lib/Target/AArch64/AArch64BranchRelaxation.cpp
+++ b/lib/Target/AArch64/AArch64BranchRelaxation.cpp
@@ -476,9 +476,7 @@ bool AArch64BranchRelaxation::runOnMachineFunction(MachineFunction &mf) {
DEBUG(dbgs() << "***** AArch64BranchRelaxation *****\n");
- TII = (const AArch64InstrInfo *)MF->getTarget()
- .getSubtargetImpl()
- ->getInstrInfo();
+ TII = (const AArch64InstrInfo *)MF->getSubtarget().getInstrInfo();
// Renumber all of the machine basic blocks in the function, guaranteeing that
// the numbers agree with the position of the block in the function.
diff --git a/lib/Target/AArch64/AArch64CallingConvention.h b/lib/Target/AArch64/AArch64CallingConvention.h
index baf80bc..1e2d1c3 100644
--- a/lib/Target/AArch64/AArch64CallingConvention.h
+++ b/lib/Target/AArch64/AArch64CallingConvention.h
@@ -46,7 +46,7 @@ static bool finishStackBlock(SmallVectorImpl<CCValAssign> &PendingMembers,
CCState &State, unsigned SlotAlign) {
unsigned Size = LocVT.getSizeInBits() / 8;
unsigned StackAlign = State.getMachineFunction()
- .getSubtarget()
+ .getTarget()
.getDataLayout()
->getStackAlignment();
unsigned Align = std::min(ArgFlags.getOrigAlign(), StackAlign);
diff --git a/lib/Target/AArch64/AArch64CallingConvention.td b/lib/Target/AArch64/AArch64CallingConvention.td
index a391e76..4691e94 100644
--- a/lib/Target/AArch64/AArch64CallingConvention.td
+++ b/lib/Target/AArch64/AArch64CallingConvention.td
@@ -16,7 +16,7 @@ class CCIfAlign<string Align, CCAction A> :
CCIf<!strconcat("ArgFlags.getOrigAlign() == ", Align), A>;
/// CCIfBigEndian - Match only if we're in big endian mode.
class CCIfBigEndian<CCAction A> :
- CCIf<"State.getMachineFunction().getSubtarget().getDataLayout()->isBigEndian()", A>;
+ CCIf<"State.getMachineFunction().getTarget().getDataLayout()->isBigEndian()", A>;
//===----------------------------------------------------------------------===//
// ARM AAPCS64 Calling Convention
diff --git a/lib/Target/AArch64/AArch64CleanupLocalDynamicTLSPass.cpp b/lib/Target/AArch64/AArch64CleanupLocalDynamicTLSPass.cpp
index ba4fc3b..06ff9af 100644
--- a/lib/Target/AArch64/AArch64CleanupLocalDynamicTLSPass.cpp
+++ b/lib/Target/AArch64/AArch64CleanupLocalDynamicTLSPass.cpp
@@ -92,9 +92,7 @@ struct LDTLSCleanup : public MachineFunctionPass {
MachineInstr *replaceTLSBaseAddrCall(MachineInstr *I,
unsigned TLSBaseAddrReg) {
MachineFunction *MF = I->getParent()->getParent();
- const AArch64TargetMachine *TM =
- static_cast<const AArch64TargetMachine *>(&MF->getTarget());
- const AArch64InstrInfo *TII = TM->getSubtargetImpl()->getInstrInfo();
+ const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
// Insert a Copy from TLSBaseAddrReg to x0, which is where the rest of the
// code sequence assumes the address will be.
@@ -112,9 +110,7 @@ struct LDTLSCleanup : public MachineFunctionPass {
// inserting a copy instruction after I. Returns the new instruction.
MachineInstr *setRegister(MachineInstr *I, unsigned *TLSBaseAddrReg) {
MachineFunction *MF = I->getParent()->getParent();
- const AArch64TargetMachine *TM =
- static_cast<const AArch64TargetMachine *>(&MF->getTarget());
- const AArch64InstrInfo *TII = TM->getSubtargetImpl()->getInstrInfo();
+ const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
// Create a virtual register for the TLS base address.
MachineRegisterInfo &RegInfo = MF->getRegInfo();
diff --git a/lib/Target/AArch64/AArch64CollectLOH.cpp b/lib/Target/AArch64/AArch64CollectLOH.cpp
index 87b545b..efdb2e3 100644
--- a/lib/Target/AArch64/AArch64CollectLOH.cpp
+++ b/lib/Target/AArch64/AArch64CollectLOH.cpp
@@ -279,18 +279,16 @@ static const SetOfMachineInstr *getUses(const InstrToInstrs *sets, unsigned reg,
/// definition. It also consider definitions of ADRP instructions as uses and
/// ignore other uses. The ADRPMode is used to collect the information for LHO
/// that involve ADRP operation only.
-static void initReachingDef(MachineFunction &MF,
+static void initReachingDef(const MachineFunction &MF,
InstrToInstrs *ColorOpToReachedUses,
BlockToInstrPerColor &Gen, BlockToRegSet &Kill,
BlockToSetOfInstrsPerColor &ReachableUses,
const MapRegToId &RegToId,
const MachineInstr *DummyOp, bool ADRPMode) {
- const TargetMachine &TM = MF.getTarget();
- const TargetRegisterInfo *TRI = TM.getSubtargetImpl()->getRegisterInfo();
-
+ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
unsigned NbReg = RegToId.size();
- for (MachineBasicBlock &MBB : MF) {
+ for (const MachineBasicBlock &MBB : MF) {
auto &BBGen = Gen[&MBB];
BBGen = make_unique<const MachineInstr *[]>(NbReg);
std::fill(BBGen.get(), BBGen.get() + NbReg, nullptr);
@@ -330,7 +328,7 @@ static void initReachingDef(MachineFunction &MF,
const uint32_t *PreservedRegs = MO.getRegMask();
// Set generated regs.
- for (const auto Entry : RegToId) {
+ for (const auto &Entry : RegToId) {
unsigned Reg = Entry.second;
// Use the global register ID when querying APIs external to this
// pass.
@@ -384,7 +382,7 @@ static void initReachingDef(MachineFunction &MF,
/// op.reachedUses
///
/// Out[bb] = Gen[bb] U (In[bb] - Kill[bb])
-static void reachingDefAlgorithm(MachineFunction &MF,
+static void reachingDefAlgorithm(const MachineFunction &MF,
InstrToInstrs *ColorOpToReachedUses,
BlockToSetOfInstrsPerColor &In,
BlockToSetOfInstrsPerColor &Out,
@@ -394,7 +392,7 @@ static void reachingDefAlgorithm(MachineFunction &MF,
bool HasChanged;
do {
HasChanged = false;
- for (MachineBasicBlock &MBB : MF) {
+ for (const MachineBasicBlock &MBB : MF) {
unsigned CurReg;
for (CurReg = 0; CurReg < NbReg; ++CurReg) {
SetOfMachineInstr &BBInSet = getSet(In, MBB, CurReg, NbReg);
@@ -403,7 +401,7 @@ static void reachingDefAlgorithm(MachineFunction &MF,
SetOfMachineInstr &BBOutSet = getSet(Out, MBB, CurReg, NbReg);
unsigned Size = BBOutSet.size();
// In[bb][color] = U Out[bb.predecessors][color]
- for (MachineBasicBlock *PredMBB : MBB.predecessors()) {
+ for (const MachineBasicBlock *PredMBB : MBB.predecessors()) {
SetOfMachineInstr &PredOutSet = getSet(Out, *PredMBB, CurReg, NbReg);
BBInSet.insert(PredOutSet.begin(), PredOutSet.end());
}
@@ -435,7 +433,7 @@ static void reachingDefAlgorithm(MachineFunction &MF,
/// @p DummyOp.
/// \pre ColorOpToReachedUses is an array of at least number of registers of
/// InstrToInstrs.
-static void reachingDef(MachineFunction &MF,
+static void reachingDef(const MachineFunction &MF,
InstrToInstrs *ColorOpToReachedUses,
const MapRegToId &RegToId, bool ADRPMode = false,
const MachineInstr *DummyOp = nullptr) {
@@ -985,7 +983,7 @@ static void computeOthers(const InstrToInstrs &UseToDefs,
/// Look for every register defined by potential LOHs candidates.
/// Map these registers with dense id in @p RegToId and vice-versa in
/// @p IdToReg. @p IdToReg is populated only in DEBUG mode.
-static void collectInvolvedReg(MachineFunction &MF, MapRegToId &RegToId,
+static void collectInvolvedReg(const MachineFunction &MF, MapRegToId &RegToId,
MapIdToReg &IdToReg,
const TargetRegisterInfo *TRI) {
unsigned CurRegId = 0;
@@ -1026,8 +1024,7 @@ static void collectInvolvedReg(MachineFunction &MF, MapRegToId &RegToId,
}
bool AArch64CollectLOH::runOnMachineFunction(MachineFunction &MF) {
- const TargetMachine &TM = MF.getTarget();
- const TargetRegisterInfo *TRI = TM.getSubtargetImpl()->getRegisterInfo();
+ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
const MachineDominatorTree *MDT = &getAnalysis<MachineDominatorTree>();
MapRegToId RegToId;
@@ -1043,8 +1040,7 @@ bool AArch64CollectLOH::runOnMachineFunction(MachineFunction &MF) {
MachineInstr *DummyOp = nullptr;
if (BasicBlockScopeOnly) {
- const AArch64InstrInfo *TII = static_cast<const AArch64InstrInfo *>(
- TM.getSubtargetImpl()->getInstrInfo());
+ const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
// For local analysis, create a dummy operation to record uses that are not
// local.
DummyOp = MF.CreateMachineInstr(TII->get(AArch64::COPY), DebugLoc());
diff --git a/lib/Target/AArch64/AArch64ConditionOptimizer.cpp b/lib/Target/AArch64/AArch64ConditionOptimizer.cpp
index 0fbd3c6..b9e41c6 100644
--- a/lib/Target/AArch64/AArch64ConditionOptimizer.cpp
+++ b/lib/Target/AArch64/AArch64ConditionOptimizer.cpp
@@ -67,6 +67,7 @@
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
@@ -86,11 +87,12 @@ namespace {
class AArch64ConditionOptimizer : public MachineFunctionPass {
const TargetInstrInfo *TII;
MachineDominatorTree *DomTree;
+ const MachineRegisterInfo *MRI;
public:
// Stores immediate, compare instruction opcode and branch condition (in this
// order) of adjusted comparison.
- typedef std::tuple<int, int, AArch64CC::CondCode> CmpInfo;
+ typedef std::tuple<int, unsigned, AArch64CC::CondCode> CmpInfo;
static char ID;
AArch64ConditionOptimizer() : MachineFunctionPass(ID) {}
@@ -116,7 +118,6 @@ void initializeAArch64ConditionOptimizerPass(PassRegistry &);
INITIALIZE_PASS_BEGIN(AArch64ConditionOptimizer, "aarch64-condopt",
"AArch64 CondOpt Pass", false, false)
INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
-INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
INITIALIZE_PASS_END(AArch64ConditionOptimizer, "aarch64-condopt",
"AArch64 CondOpt Pass", false, false)
@@ -127,8 +128,6 @@ FunctionPass *llvm::createAArch64ConditionOptimizerPass() {
void AArch64ConditionOptimizer::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<MachineDominatorTree>();
AU.addPreserved<MachineDominatorTree>();
- AU.addRequired<LiveIntervals>();
- AU.addPreserved<LiveIntervals>();
MachineFunctionPass::getAnalysisUsage(AU);
}
@@ -155,7 +154,7 @@ MachineInstr *AArch64ConditionOptimizer::findSuitableCompare(
// cmn is an alias for adds with a dead destination register.
case AArch64::ADDSWri:
case AArch64::ADDSXri:
- if (I->getOperand(0).isDead())
+ if (MRI->use_empty(I->getOperand(0).getReg()))
return I;
DEBUG(dbgs() << "Destination of cmp is not dead, " << *I << '\n');
@@ -216,7 +215,7 @@ static AArch64CC::CondCode getAdjustedCmp(AArch64CC::CondCode Cmp) {
// operator and condition code.
AArch64ConditionOptimizer::CmpInfo AArch64ConditionOptimizer::adjustCmp(
MachineInstr *CmpMI, AArch64CC::CondCode Cmp) {
- int Opc = CmpMI->getOpcode();
+ unsigned Opc = CmpMI->getOpcode();
// CMN (compare with negative immediate) is an alias to ADDS (as
// "operand - negative" == "operand + positive")
@@ -245,7 +244,7 @@ AArch64ConditionOptimizer::CmpInfo AArch64ConditionOptimizer::adjustCmp(
void AArch64ConditionOptimizer::modifyCmp(MachineInstr *CmpMI,
const CmpInfo &Info) {
int Imm;
- int Opc;
+ unsigned Opc;
AArch64CC::CondCode Cmp;
std::tie(Imm, Opc, Cmp) = Info;
@@ -304,8 +303,9 @@ bool AArch64ConditionOptimizer::adjustTo(MachineInstr *CmpMI,
bool AArch64ConditionOptimizer::runOnMachineFunction(MachineFunction &MF) {
DEBUG(dbgs() << "********** AArch64 Conditional Compares **********\n"
<< "********** Function: " << MF.getName() << '\n');
- TII = MF.getTarget().getSubtargetImpl()->getInstrInfo();
+ TII = MF.getSubtarget().getInstrInfo();
DomTree = &getAnalysis<MachineDominatorTree>();
+ MRI = &MF.getRegInfo();
bool Changed = false;
diff --git a/lib/Target/AArch64/AArch64ConditionalCompares.cpp b/lib/Target/AArch64/AArch64ConditionalCompares.cpp
index 54f53dc..2b0c92f 100644
--- a/lib/Target/AArch64/AArch64ConditionalCompares.cpp
+++ b/lib/Target/AArch64/AArch64ConditionalCompares.cpp
@@ -416,7 +416,7 @@ bool SSACCmpConv::canSpeculateInstrs(MachineBasicBlock *MBB,
// We never speculate stores, so an AA pointer isn't necessary.
bool DontMoveAcrossStore = true;
- if (!I.isSafeToMove(TII, nullptr, DontMoveAcrossStore)) {
+ if (!I.isSafeToMove(nullptr, DontMoveAcrossStore)) {
DEBUG(dbgs() << "Can't speculate: " << I);
return false;
}
@@ -893,15 +893,13 @@ bool AArch64ConditionalCompares::runOnMachineFunction(MachineFunction &MF) {
<< "********** Function: " << MF.getName() << '\n');
TII = MF.getSubtarget().getInstrInfo();
TRI = MF.getSubtarget().getRegisterInfo();
- SchedModel =
- MF.getTarget().getSubtarget<TargetSubtargetInfo>().getSchedModel();
+ SchedModel = MF.getSubtarget().getSchedModel();
MRI = &MF.getRegInfo();
DomTree = &getAnalysis<MachineDominatorTree>();
Loops = getAnalysisIfAvailable<MachineLoopInfo>();
Traces = &getAnalysis<MachineTraceMetrics>();
MinInstr = nullptr;
- MinSize = MF.getFunction()->getAttributes().hasAttribute(
- AttributeSet::FunctionIndex, Attribute::MinSize);
+ MinSize = MF.getFunction()->hasFnAttribute(Attribute::MinSize);
bool Changed = false;
CmpConv.runOnMachineFunction(MF);
diff --git a/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp b/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
index c850680..c2470f7 100644
--- a/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
+++ b/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
@@ -229,7 +229,7 @@ static bool isStartChunk(uint64_t Chunk) {
if (Chunk == 0 || Chunk == UINT64_MAX)
return false;
- return (CountLeadingOnes_64(Chunk) + countTrailingZeros(Chunk)) == 64;
+ return isMask_64(~Chunk);
}
/// \brief Check whether this chunk matches the pattern '0...1...' This pattern
@@ -239,7 +239,7 @@ static bool isEndChunk(uint64_t Chunk) {
if (Chunk == 0 || Chunk == UINT64_MAX)
return false;
- return (countLeadingZeros(Chunk) + CountTrailingOnes_64(Chunk)) == 64;
+ return isMask_64(Chunk);
}
/// \brief Clear or set all bits in the chunk at the given index.
@@ -698,12 +698,15 @@ bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB,
return expandMOVImm(MBB, MBBI, 32);
case AArch64::MOVi64imm:
return expandMOVImm(MBB, MBBI, 64);
- case AArch64::RET_ReallyLR:
- BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::RET))
- .addReg(AArch64::LR);
+ case AArch64::RET_ReallyLR: {
+ MachineInstrBuilder MIB =
+ BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::RET))
+ .addReg(AArch64::LR);
+ transferImpOps(MI, MIB, MIB);
MI.eraseFromParent();
return true;
}
+ }
return false;
}
diff --git a/lib/Target/AArch64/AArch64FastISel.cpp b/lib/Target/AArch64/AArch64FastISel.cpp
index ca4e97b..9977e2b 100644
--- a/lib/Target/AArch64/AArch64FastISel.cpp
+++ b/lib/Target/AArch64/AArch64FastISel.cpp
@@ -245,9 +245,10 @@ public:
unsigned fastMaterializeFloatZero(const ConstantFP* CF) override;
explicit AArch64FastISel(FunctionLoweringInfo &FuncInfo,
- const TargetLibraryInfo *LibInfo)
+ const TargetLibraryInfo *LibInfo)
: FastISel(FuncInfo, LibInfo, /*SkipTargetIndependentISel=*/true) {
- Subtarget = &TM.getSubtarget<AArch64Subtarget>();
+ Subtarget =
+ &static_cast<const AArch64Subtarget &>(FuncInfo.MF->getSubtarget());
Context = &FuncInfo.Fn->getContext();
}
@@ -663,20 +664,22 @@ bool AArch64FastISel::computeAddress(const Value *Obj, Address &Addr, Type *Ty)
Addr.setExtendType(AArch64_AM::LSL);
const Value *Src = U->getOperand(0);
- if (const auto *I = dyn_cast<Instruction>(Src))
- if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB)
- Src = I;
-
- // Fold the zext or sext when it won't become a noop.
- if (const auto *ZE = dyn_cast<ZExtInst>(Src)) {
- if (!isIntExtFree(ZE) && ZE->getOperand(0)->getType()->isIntegerTy(32)) {
- Addr.setExtendType(AArch64_AM::UXTW);
- Src = ZE->getOperand(0);
- }
- } else if (const auto *SE = dyn_cast<SExtInst>(Src)) {
- if (!isIntExtFree(SE) && SE->getOperand(0)->getType()->isIntegerTy(32)) {
- Addr.setExtendType(AArch64_AM::SXTW);
- Src = SE->getOperand(0);
+ if (const auto *I = dyn_cast<Instruction>(Src)) {
+ if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
+ // Fold the zext or sext when it won't become a noop.
+ if (const auto *ZE = dyn_cast<ZExtInst>(I)) {
+ if (!isIntExtFree(ZE) &&
+ ZE->getOperand(0)->getType()->isIntegerTy(32)) {
+ Addr.setExtendType(AArch64_AM::UXTW);
+ Src = ZE->getOperand(0);
+ }
+ } else if (const auto *SE = dyn_cast<SExtInst>(I)) {
+ if (!isIntExtFree(SE) &&
+ SE->getOperand(0)->getType()->isIntegerTy(32)) {
+ Addr.setExtendType(AArch64_AM::SXTW);
+ Src = SE->getOperand(0);
+ }
+ }
}
}
@@ -745,21 +748,22 @@ bool AArch64FastISel::computeAddress(const Value *Obj, Address &Addr, Type *Ty)
Addr.setExtendType(AArch64_AM::LSL);
const Value *Src = LHS;
- if (const auto *I = dyn_cast<Instruction>(Src))
- if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB)
- Src = I;
-
-
- // Fold the zext or sext when it won't become a noop.
- if (const auto *ZE = dyn_cast<ZExtInst>(Src)) {
- if (!isIntExtFree(ZE) && ZE->getOperand(0)->getType()->isIntegerTy(32)) {
- Addr.setExtendType(AArch64_AM::UXTW);
- Src = ZE->getOperand(0);
- }
- } else if (const auto *SE = dyn_cast<SExtInst>(Src)) {
- if (!isIntExtFree(SE) && SE->getOperand(0)->getType()->isIntegerTy(32)) {
- Addr.setExtendType(AArch64_AM::SXTW);
- Src = SE->getOperand(0);
+ if (const auto *I = dyn_cast<Instruction>(Src)) {
+ if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
+ // Fold the zext or sext when it won't become a noop.
+ if (const auto *ZE = dyn_cast<ZExtInst>(I)) {
+ if (!isIntExtFree(ZE) &&
+ ZE->getOperand(0)->getType()->isIntegerTy(32)) {
+ Addr.setExtendType(AArch64_AM::UXTW);
+ Src = ZE->getOperand(0);
+ }
+ } else if (const auto *SE = dyn_cast<SExtInst>(I)) {
+ if (!isIntExtFree(SE) &&
+ SE->getOperand(0)->getType()->isIntegerTy(32)) {
+ Addr.setExtendType(AArch64_AM::SXTW);
+ Src = SE->getOperand(0);
+ }
+ }
}
}
@@ -1916,7 +1920,8 @@ bool AArch64FastISel::selectLoad(const Instruction *I) {
// could select it. Emit a copy to subreg if necessary. FastISel will remove
// it when it selects the integer extend.
unsigned Reg = lookUpRegForValue(IntExtVal);
- if (!Reg) {
+ auto *MI = MRI.getUniqueVRegDef(Reg);
+ if (!MI) {
if (RetVT == MVT::i64 && VT <= MVT::i32) {
if (WantZExt) {
// Delete the last emitted instruction from emitLoad (SUBREG_TO_REG).
@@ -1934,10 +1939,7 @@ bool AArch64FastISel::selectLoad(const Instruction *I) {
// The integer extend has already been emitted - delete all the instructions
// that have been emitted by the integer extend lowering code and use the
// result from the load instruction directly.
- while (Reg) {
- auto *MI = MRI.getUniqueVRegDef(Reg);
- if (!MI)
- break;
+ while (MI) {
Reg = 0;
for (auto &Opnd : MI->uses()) {
if (Opnd.isReg()) {
@@ -1946,6 +1948,9 @@ bool AArch64FastISel::selectLoad(const Instruction *I) {
}
}
MI->eraseFromParent();
+ MI = nullptr;
+ if (Reg)
+ MI = MRI.getUniqueVRegDef(Reg);
}
updateValueMap(IntExtVal, ResultReg);
return true;
@@ -2571,7 +2576,7 @@ bool AArch64FastISel::optimizeSelect(const SelectInst *SI) {
Src1Reg = emitLogicalOp_ri(ISD::XOR, MVT::i32, Src1Reg, Src1IsKill, 1);
Src1IsKill = true;
}
- unsigned ResultReg = fastEmitInst_rr(Opc, &AArch64::GPR32spRegClass, Src1Reg,
+ unsigned ResultReg = fastEmitInst_rr(Opc, &AArch64::GPR32RegClass, Src1Reg,
Src1IsKill, Src2Reg, Src2IsKill);
updateValueMap(SI, ResultReg);
return true;
@@ -2677,8 +2682,11 @@ bool AArch64FastISel::selectSelect(const Instruction *I) {
return false;
bool CondIsKill = hasTrivialKill(Cond);
+ const MCInstrDesc &II = TII.get(AArch64::ANDSWri);
+ CondReg = constrainOperandRegClass(II, CondReg, 1);
+
// Emit a TST instruction (ANDS wzr, reg, #imm).
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ANDSWri),
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II,
AArch64::WZR)
.addReg(CondReg, getKillRegState(CondIsKill))
.addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
@@ -3033,6 +3041,11 @@ bool AArch64FastISel::finishCall(CallLoweringInfo &CLI, MVT RetVT,
// Copy all of the result registers out of their specified physreg.
MVT CopyVT = RVLocs[0].getValVT();
+
+ // TODO: Handle big-endian results
+ if (CopyVT.isVector() && !Subtarget->isLittleEndian())
+ return false;
+
unsigned ResultReg = createResultReg(TLI.getRegClassFor(CopyVT));
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(TargetOpcode::COPY), ResultReg)
@@ -3157,7 +3170,7 @@ bool AArch64FastISel::fastLowerCall(CallLoweringInfo &CLI) {
// Add a register mask with the call-preserved registers.
// Proper defs for return values will be added by setPhysRegsDeadExcept().
- MIB.addRegMask(TRI.getCallPreservedMask(CC));
+ MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC));
CLI.Call = MIB;
@@ -3256,7 +3269,7 @@ bool AArch64FastISel::foldXALUIntrinsic(AArch64CC::CondCode &CC,
std::swap(LHS, RHS);
// Simplify multiplies.
- unsigned IID = II->getIntrinsicID();
+ Intrinsic::ID IID = II->getIntrinsicID();
switch (IID) {
default:
break;
@@ -3324,8 +3337,7 @@ bool AArch64FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
MFI->setFrameAddressIsTaken(true);
const AArch64RegisterInfo *RegInfo =
- static_cast<const AArch64RegisterInfo *>(
- TM.getSubtargetImpl()->getRegisterInfo());
+ static_cast<const AArch64RegisterInfo *>(Subtarget->getRegisterInfo());
unsigned FramePtr = RegInfo->getFrameRegister(*(FuncInfo.MF));
unsigned SrcReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
@@ -3525,7 +3537,7 @@ bool AArch64FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
std::swap(LHS, RHS);
// Simplify multiplies.
- unsigned IID = II->getIntrinsicID();
+ Intrinsic::ID IID = II->getIntrinsicID();
switch (IID) {
default:
break;
@@ -3589,7 +3601,10 @@ bool AArch64FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
AArch64_AM::ASR, 31, /*WantResult=*/false);
} else {
assert(VT == MVT::i64 && "Unexpected value type.");
- MulReg = emitMul_rr(VT, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
+ // LHSReg and RHSReg cannot be killed by this Mul, since they are
+ // reused in the next instruction.
+ MulReg = emitMul_rr(VT, LHSReg, /*IsKill=*/false, RHSReg,
+ /*IsKill=*/false);
unsigned SMULHReg = fastEmit_rr(VT, VT, ISD::MULHS, LHSReg, LHSIsKill,
RHSReg, RHSIsKill);
emitSubs_rs(VT, SMULHReg, /*IsKill=*/true, MulReg, /*IsKill=*/false,
@@ -3618,7 +3633,10 @@ bool AArch64FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
AArch64::sub_32);
} else {
assert(VT == MVT::i64 && "Unexpected value type.");
- MulReg = emitMul_rr(VT, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
+ // LHSReg and RHSReg cannot be killed by this Mul, since they are
+ // reused in the next instruction.
+ MulReg = emitMul_rr(VT, LHSReg, /*IsKill=*/false, RHSReg,
+ /*IsKill=*/false);
unsigned UMULHReg = fastEmit_rr(VT, VT, ISD::MULHU, LHSReg, LHSIsKill,
RHSReg, RHSIsKill);
emitSubs_rr(VT, AArch64::XZR, /*IsKill=*/true, UMULHReg,
@@ -4563,7 +4581,7 @@ bool AArch64FastISel::selectShift(const Instruction *I) {
unsigned ResultReg = 0;
uint64_t ShiftVal = C->getZExtValue();
MVT SrcVT = RetVT;
- bool IsZExt = (I->getOpcode() == Instruction::AShr) ? false : true;
+ bool IsZExt = I->getOpcode() != Instruction::AShr;
const Value *Op0 = I->getOperand(0);
if (const auto *ZExt = dyn_cast<ZExtInst>(Op0)) {
if (!isIntExtFree(ZExt)) {
diff --git a/lib/Target/AArch64/AArch64FrameLowering.cpp b/lib/Target/AArch64/AArch64FrameLowering.cpp
index d8e9156..3ba7e70 100644
--- a/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -9,6 +9,82 @@
//
// This file contains the AArch64 implementation of TargetFrameLowering class.
//
+// On AArch64, stack frames are structured as follows:
+//
+// The stack grows downward.
+//
+// All of the individual frame areas on the frame below are optional, i.e. it's
+// possible to create a function so that the particular area isn't present
+// in the frame.
+//
+// At function entry, the "frame" looks as follows:
+//
+// | | Higher address
+// |-----------------------------------|
+// | |
+// | arguments passed on the stack |
+// | |
+// |-----------------------------------| <- sp
+// | | Lower address
+//
+//
+// After the prologue has run, the frame has the following general structure.
+// Note that this doesn't depict the case where a red-zone is used. Also,
+// technically the last frame area (VLAs) doesn't get created until in the
+// main function body, after the prologue is run. However, it's depicted here
+// for completeness.
+//
+// | | Higher address
+// |-----------------------------------|
+// | |
+// | arguments passed on the stack |
+// | |
+// |-----------------------------------|
+// | |
+// | prev_fp, prev_lr |
+// | (a.k.a. "frame record") |
+// |-----------------------------------| <- fp(=x29)
+// | |
+// | other callee-saved registers |
+// | |
+// |-----------------------------------|
+// |.empty.space.to.make.part.below....|
+// |.aligned.in.case.it.needs.more.than| (size of this area is unknown at
+// |.the.standard.16-byte.alignment....| compile time; if present)
+// |-----------------------------------|
+// | |
+// | local variables of fixed size |
+// | including spill slots |
+// |-----------------------------------| <- bp(not defined by ABI,
+// |.variable-sized.local.variables....| LLVM chooses X19)
+// |.(VLAs)............................| (size of this area is unknown at
+// |...................................| compile time)
+// |-----------------------------------| <- sp
+// | | Lower address
+//
+//
+// To access the data in a frame, at-compile time, a constant offset must be
+// computable from one of the pointers (fp, bp, sp) to access it. The size
+// of the areas with a dotted background cannot be computed at compile-time
+// if they are present, making it required to have all three of fp, bp and
+// sp to be set up to be able to access all contents in the frame areas,
+// assuming all of the frame areas are non-empty.
+//
+// For most functions, some of the frame areas are empty. For those functions,
+// it may not be necessary to set up fp or bp:
+// * A base pointer is definitly needed when there are both VLAs and local
+// variables with more-than-default alignment requirements.
+// * A frame pointer is definitly needed when there are local variables with
+// more-than-default alignment requirements.
+//
+// In some cases when a base pointer is not strictly needed, it is generated
+// anyway when offsets from the frame pointer to access local variables become
+// so large that the offset can't be encoded in the immediate fields of loads
+// or stores.
+//
+// FIXME: also explain the redzone concept.
+// FIXME: also explain the concept of reserved call frames.
+//
//===----------------------------------------------------------------------===//
#include "AArch64FrameLowering.h"
@@ -39,33 +115,12 @@ static cl::opt<bool> EnableRedZone("aarch64-redzone",
STATISTIC(NumRedZoneFunctions, "Number of functions using red zone");
-static unsigned estimateStackSize(MachineFunction &MF) {
- const MachineFrameInfo *FFI = MF.getFrameInfo();
- int Offset = 0;
- for (int i = FFI->getObjectIndexBegin(); i != 0; ++i) {
- int FixedOff = -FFI->getObjectOffset(i);
- if (FixedOff > Offset)
- Offset = FixedOff;
- }
- for (unsigned i = 0, e = FFI->getObjectIndexEnd(); i != e; ++i) {
- if (FFI->isDeadObjectIndex(i))
- continue;
- Offset += FFI->getObjectSize(i);
- unsigned Align = FFI->getObjectAlignment(i);
- // Adjust to alignment boundary
- Offset = (Offset + Align - 1) / Align * Align;
- }
- // This does not include the 16 bytes used for fp and lr.
- return (unsigned)Offset;
-}
-
bool AArch64FrameLowering::canUseRedZone(const MachineFunction &MF) const {
if (!EnableRedZone)
return false;
// Don't use the red zone if the function explicitly asks us not to.
// This is typically used for kernel code.
- if (MF.getFunction()->getAttributes().hasAttribute(
- AttributeSet::FunctionIndex, Attribute::NoRedZone))
+ if (MF.getFunction()->hasFnAttribute(Attribute::NoRedZone))
return false;
const MachineFrameInfo *MFI = MF.getFrameInfo();
@@ -84,16 +139,10 @@ bool AArch64FrameLowering::canUseRedZone(const MachineFunction &MF) const {
/// pointer register.
bool AArch64FrameLowering::hasFP(const MachineFunction &MF) const {
const MachineFrameInfo *MFI = MF.getFrameInfo();
-
-#ifndef NDEBUG
const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
- assert(!RegInfo->needsStackRealignment(MF) &&
- "No stack realignment on AArch64!");
-#endif
-
return (MFI->hasCalls() || MFI->hasVarSizedObjects() ||
MFI->isFrameAddressTaken() || MFI->hasStackMap() ||
- MFI->hasPatchPoint());
+ MFI->hasPatchPoint() || RegInfo->needsStackRealignment(MF));
}
/// hasReservedCallFrame - Under normal circumstances, when a frame pointer is
@@ -112,7 +161,7 @@ void AArch64FrameLowering::eliminateCallFramePseudoInstr(
const AArch64InstrInfo *TII =
static_cast<const AArch64InstrInfo *>(MF.getSubtarget().getInstrInfo());
DebugLoc DL = I->getDebugLoc();
- int Opc = I->getOpcode();
+ unsigned Opc = I->getOpcode();
bool IsDestroy = Opc == TII->getCallFrameDestroyOpcode();
uint64_t CalleePopAmount = IsDestroy ? I->getOperand(1).getImm() : 0;
@@ -167,7 +216,7 @@ void AArch64FrameLowering::emitCalleeSavedFrameMoves(
if (CSI.empty())
return;
- const DataLayout *TD = MF.getSubtarget().getDataLayout();
+ const DataLayout *TD = MF.getTarget().getDataLayout();
bool HasFP = hasFP(MF);
// Calculate amount of bytes used for return address storing.
@@ -201,8 +250,33 @@ void AArch64FrameLowering::emitCalleeSavedFrameMoves(
}
}
-void AArch64FrameLowering::emitPrologue(MachineFunction &MF) const {
- MachineBasicBlock &MBB = MF.front(); // Prologue goes in entry BB.
+/// Get FPOffset by analyzing the first instruction.
+static int getFPOffsetInPrologue(MachineInstr *MBBI) {
+ // First instruction must a) allocate the stack and b) have an immediate
+ // that is a multiple of -2.
+ assert(((MBBI->getOpcode() == AArch64::STPXpre ||
+ MBBI->getOpcode() == AArch64::STPDpre) &&
+ MBBI->getOperand(3).getReg() == AArch64::SP &&
+ MBBI->getOperand(4).getImm() < 0 &&
+ (MBBI->getOperand(4).getImm() & 1) == 0));
+
+ // Frame pointer is fp = sp - 16. Since the STPXpre subtracts the space
+ // required for the callee saved register area we get the frame pointer
+ // by addding that offset - 16 = -getImm()*8 - 2*8 = -(getImm() + 2) * 8.
+ int FPOffset = -(MBBI->getOperand(4).getImm() + 2) * 8;
+ assert(FPOffset >= 0 && "Bad Framepointer Offset");
+ return FPOffset;
+}
+
+static bool isCSSave(MachineInstr *MBBI) {
+ return MBBI->getOpcode() == AArch64::STPXi ||
+ MBBI->getOpcode() == AArch64::STPDi ||
+ MBBI->getOpcode() == AArch64::STPXpre ||
+ MBBI->getOpcode() == AArch64::STPDpre;
+}
+
+void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
+ MachineBasicBlock &MBB) const {
MachineBasicBlock::iterator MBBI = MBB.begin();
const MachineFrameInfo *MFI = MF.getFrameInfo();
const Function *Fn = MF.getFunction();
@@ -228,7 +302,7 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF) const {
AFI->setLocalStackSize(NumBytes);
// Label used to tie together the PROLOG_LABEL and the MachineMoves.
- MCSymbol *FrameLabel = MMI.getContext().CreateTempSymbol();
+ MCSymbol *FrameLabel = MMI.getContext().createTempSymbol();
// REDZONE: If the stack size is less than 128 bytes, we don't need
// to actually allocate.
@@ -251,27 +325,11 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF) const {
// Only set up FP if we actually need to.
int FPOffset = 0;
- if (HasFP) {
- // First instruction must a) allocate the stack and b) have an immediate
- // that is a multiple of -2.
- assert((MBBI->getOpcode() == AArch64::STPXpre ||
- MBBI->getOpcode() == AArch64::STPDpre) &&
- MBBI->getOperand(3).getReg() == AArch64::SP &&
- MBBI->getOperand(4).getImm() < 0 &&
- (MBBI->getOperand(4).getImm() & 1) == 0);
-
- // Frame pointer is fp = sp - 16. Since the STPXpre subtracts the space
- // required for the callee saved register area we get the frame pointer
- // by addding that offset - 16 = -getImm()*8 - 2*8 = -(getImm() + 2) * 8.
- FPOffset = -(MBBI->getOperand(4).getImm() + 2) * 8;
- assert(FPOffset >= 0 && "Bad Framepointer Offset");
- }
+ if (HasFP)
+ FPOffset = getFPOffsetInPrologue(MBBI);
// Move past the saves of the callee-saved registers.
- while (MBBI->getOpcode() == AArch64::STPXi ||
- MBBI->getOpcode() == AArch64::STPDi ||
- MBBI->getOpcode() == AArch64::STPXpre ||
- MBBI->getOpcode() == AArch64::STPDpre) {
+ while (isCSSave(MBBI)) {
++MBBI;
NumBytes -= 16;
}
@@ -289,11 +347,48 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF) const {
AFI->setLocalStackSize(NumBytes);
// Allocate space for the rest of the frame.
- if (NumBytes) {
- // If we're a leaf function, try using the red zone.
- if (!canUseRedZone(MF))
- emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP, -NumBytes, TII,
- MachineInstr::FrameSetup);
+
+ const unsigned Alignment = MFI->getMaxAlignment();
+ const bool NeedsRealignment = (Alignment > 16);
+ unsigned scratchSPReg = AArch64::SP;
+ if (NeedsRealignment) {
+ // Use the first callee-saved register as a scratch register
+ assert(MF.getRegInfo().isPhysRegUsed(AArch64::X9) &&
+ "No scratch register to align SP!");
+ scratchSPReg = AArch64::X9;
+ }
+
+ // If we're a leaf function, try using the red zone.
+ if (NumBytes && !canUseRedZone(MF))
+ // FIXME: in the case of dynamic re-alignment, NumBytes doesn't have
+ // the correct value here, as NumBytes also includes padding bytes,
+ // which shouldn't be counted here.
+ emitFrameOffset(MBB, MBBI, DL, scratchSPReg, AArch64::SP, -NumBytes, TII,
+ MachineInstr::FrameSetup);
+
+ assert(!(NeedsRealignment && NumBytes==0) &&
+ "NumBytes should never be 0 when realignment is needed");
+
+ if (NumBytes && NeedsRealignment) {
+ const unsigned NrBitsToZero = countTrailingZeros(Alignment);
+ assert(NrBitsToZero > 1);
+ assert(scratchSPReg != AArch64::SP);
+
+ // SUB X9, SP, NumBytes
+ // -- X9 is temporary register, so shouldn't contain any live data here,
+ // -- free to use. This is already produced by emitFrameOffset above.
+ // AND SP, X9, 0b11111...0000
+ // The logical immediates have a non-trivial encoding. The following
+ // formula computes the encoded immediate with all ones but
+ // NrBitsToZero zero bits as least significant bits.
+ uint32_t andMaskEncoded =
+ (1 <<12) // = N
+ | ((64-NrBitsToZero) << 6) // immr
+ | ((64-NrBitsToZero-1) << 0) // imms
+ ;
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::ANDXri), AArch64::SP)
+ .addReg(scratchSPReg, RegState::Kill)
+ .addImm(andMaskEncoded);
}
// If we need a base pointer, set it up here. It's whatever the value of the
@@ -303,15 +398,15 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF) const {
// FIXME: Clarify FrameSetup flags here.
// Note: Use emitFrameOffset() like above for FP if the FrameSetup flag is
// needed.
- //
- if (RegInfo->hasBasePointer(MF))
- TII->copyPhysReg(MBB, MBBI, DL, AArch64::X19, AArch64::SP, false);
+ if (RegInfo->hasBasePointer(MF)) {
+ TII->copyPhysReg(MBB, MBBI, DL, RegInfo->getBaseRegister(), AArch64::SP,
+ false);
+ }
if (needsFrameMoves) {
- const DataLayout *TD = MF.getSubtarget().getDataLayout();
+ const DataLayout *TD = MF.getTarget().getDataLayout();
const int StackGrowth = -TD->getPointerSize(0);
unsigned FramePtr = RegInfo->getFrameRegister(MF);
-
// An example of the prologue:
//
// .globl __foo
@@ -444,15 +539,19 @@ static bool isCSRestore(MachineInstr *MI, const MCPhysReg *CSRegs) {
void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
MachineBasicBlock &MBB) const {
MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
- assert(MBBI->isReturn() && "Can only insert epilog into returning blocks");
MachineFrameInfo *MFI = MF.getFrameInfo();
const AArch64InstrInfo *TII =
static_cast<const AArch64InstrInfo *>(MF.getSubtarget().getInstrInfo());
const AArch64RegisterInfo *RegInfo = static_cast<const AArch64RegisterInfo *>(
MF.getSubtarget().getRegisterInfo());
- DebugLoc DL = MBBI->getDebugLoc();
- unsigned RetOpcode = MBBI->getOpcode();
-
+ DebugLoc DL;
+ bool IsTailCallReturn = false;
+ if (MBB.end() != MBBI) {
+ DL = MBBI->getDebugLoc();
+ unsigned RetOpcode = MBBI->getOpcode();
+ IsTailCallReturn = RetOpcode == AArch64::TCRETURNdi ||
+ RetOpcode == AArch64::TCRETURNri;
+ }
int NumBytes = MFI->getStackSize();
const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
@@ -461,10 +560,10 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
if (MF.getFunction()->getCallingConv() == CallingConv::GHC)
return;
- // Initial and residual are named for consitency with the prologue. Note that
+ // Initial and residual are named for consistency with the prologue. Note that
// in the epilogue, the residual adjustment is executed first.
uint64_t ArgumentPopSize = 0;
- if (RetOpcode == AArch64::TCRETURNdi || RetOpcode == AArch64::TCRETURNri) {
+ if (IsTailCallReturn) {
MachineOperand &StackAdjust = MBBI->getOperand(1);
// For a tail-call in a callee-pops-arguments environment, some or all of
@@ -509,7 +608,7 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
unsigned NumRestores = 0;
// Move past the restores of the callee-saved registers.
- MachineBasicBlock::iterator LastPopI = MBBI;
+ MachineBasicBlock::iterator LastPopI = MBB.getFirstTerminator();
const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
if (LastPopI != MBB.begin()) {
do {
@@ -572,9 +671,9 @@ int AArch64FrameLowering::resolveFrameIndexReference(const MachineFunction &MF,
bool isFixed = MFI->isFixedObjectIndex(FI);
// Use frame pointer to reference fixed objects. Use it for locals if
- // there are VLAs (and thus the SP isn't reliable as a base).
- // Make sure useFPForScavengingIndex() does the right thing for the emergency
- // spill slot.
+ // there are VLAs or a dynamically realigned SP (and thus the SP isn't
+ // reliable as a base). Make sure useFPForScavengingIndex() does the
+ // right thing for the emergency spill slot.
bool UseFP = false;
if (AFI->hasStackFrame()) {
// Note: Keeping the following as multiple 'if' statements rather than
@@ -583,7 +682,8 @@ int AArch64FrameLowering::resolveFrameIndexReference(const MachineFunction &MF,
// Argument access should always use the FP.
if (isFixed) {
UseFP = hasFP(MF);
- } else if (hasFP(MF) && !RegInfo->hasBasePointer(MF)) {
+ } else if (hasFP(MF) && !RegInfo->hasBasePointer(MF) &&
+ !RegInfo->needsStackRealignment(MF)) {
// Use SP or FP, whichever gives us the best chance of the offset
// being in range for direct access. If the FPOffset is positive,
// that'll always be best, as the SP will be even further away.
@@ -599,6 +699,10 @@ int AArch64FrameLowering::resolveFrameIndexReference(const MachineFunction &MF,
}
}
+ assert((isFixed || !RegInfo->needsStackRealignment(MF) || !UseFP) &&
+ "In the presence of dynamic stack pointer realignment, "
+ "non-argument objects cannot be accessed through the frame pointer");
+
if (UseFP) {
FrameReg = RegInfo->getFrameRegister(MF);
return FPOffset;
@@ -696,6 +800,8 @@ bool AArch64FrameLowering::spillCalleeSavedRegisters(
if (StrOpc == AArch64::STPDpre || StrOpc == AArch64::STPXpre)
MIB.addReg(AArch64::SP, RegState::Define);
+ MBB.addLiveIn(Reg1);
+ MBB.addLiveIn(Reg2);
MIB.addReg(Reg2, getPrologueDeath(MF, Reg2))
.addReg(Reg1, getPrologueDeath(MF, Reg1))
.addReg(AArch64::SP)
@@ -795,6 +901,9 @@ void AArch64FrameLowering::processFunctionBeforeCalleeSavedScan(
if (RegInfo->hasBasePointer(MF))
MRI->setPhysRegUsed(RegInfo->getBaseRegister());
+ if (RegInfo->needsStackRealignment(MF) && !RegInfo->hasBasePointer(MF))
+ MRI->setPhysRegUsed(AArch64::X9);
+
// If any callee-saved registers are used, the frame cannot be eliminated.
unsigned NumGPRSpilled = 0;
unsigned NumFPRSpilled = 0;
@@ -868,7 +977,8 @@ void AArch64FrameLowering::processFunctionBeforeCalleeSavedScan(
// The CSR spill slots have not been allocated yet, so estimateStackSize
// won't include them.
MachineFrameInfo *MFI = MF.getFrameInfo();
- unsigned CFSize = estimateStackSize(MF) + 8 * (NumGPRSpilled + NumFPRSpilled);
+ unsigned CFSize =
+ MFI->estimateStackSize(MF) + 8 * (NumGPRSpilled + NumFPRSpilled);
DEBUG(dbgs() << "Estimated stack frame size: " << CFSize << " bytes.\n");
bool BigStack = (CFSize >= 256);
if (BigStack || !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF))
diff --git a/lib/Target/AArch64/AArch64FrameLowering.h b/lib/Target/AArch64/AArch64FrameLowering.h
index df3875f..b496fcc 100644
--- a/lib/Target/AArch64/AArch64FrameLowering.h
+++ b/lib/Target/AArch64/AArch64FrameLowering.h
@@ -22,7 +22,7 @@ class AArch64FrameLowering : public TargetFrameLowering {
public:
explicit AArch64FrameLowering()
: TargetFrameLowering(StackGrowsDown, 16, 0, 16,
- false /*StackRealignable*/) {}
+ true /*StackRealignable*/) {}
void emitCalleeSavedFrameMoves(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
@@ -34,7 +34,7 @@ public:
/// emitProlog/emitEpilog - These methods insert prolog and epilog code into
/// the function.
- void emitPrologue(MachineFunction &MF) const override;
+ void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
int getFrameIndexOffset(const MachineFunction &MF, int FI) const override;
diff --git a/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
index bb2e1e2..78a2021 100644
--- a/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -53,12 +53,10 @@ public:
}
bool runOnMachineFunction(MachineFunction &MF) override {
- AttributeSet FnAttrs = MF.getFunction()->getAttributes();
ForCodeSize =
- FnAttrs.hasAttribute(AttributeSet::FunctionIndex,
- Attribute::OptimizeForSize) ||
- FnAttrs.hasAttribute(AttributeSet::FunctionIndex, Attribute::MinSize);
- Subtarget = &TM.getSubtarget<AArch64Subtarget>();
+ MF.getFunction()->hasFnAttribute(Attribute::OptimizeForSize) ||
+ MF.getFunction()->hasFnAttribute(Attribute::MinSize);
+ Subtarget = &MF.getSubtarget<AArch64Subtarget>();
return SelectionDAGISel::runOnMachineFunction(MF);
}
@@ -67,7 +65,7 @@ public:
/// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
/// inline asm expressions.
bool SelectInlineAsmMemoryOperand(const SDValue &Op,
- char ConstraintCode,
+ unsigned ConstraintID,
std::vector<SDValue> &OutOps) override;
SDNode *SelectMLAV64LaneV128(SDNode *N);
@@ -134,8 +132,8 @@ public:
/// Generic helper for the createDTuple/createQTuple
/// functions. Those should almost always be called instead.
- SDValue createTuple(ArrayRef<SDValue> Vecs, unsigned RegClassIDs[],
- unsigned SubRegs[]);
+ SDValue createTuple(ArrayRef<SDValue> Vecs, const unsigned RegClassIDs[],
+ const unsigned SubRegs[]);
SDNode *SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc, bool isExt);
@@ -213,13 +211,20 @@ static bool isOpcWithIntImmediate(const SDNode *N, unsigned Opc,
}
bool AArch64DAGToDAGISel::SelectInlineAsmMemoryOperand(
- const SDValue &Op, char ConstraintCode, std::vector<SDValue> &OutOps) {
- assert(ConstraintCode == 'm' && "unexpected asm memory constraint");
- // Require the address to be in a register. That is safe for all AArch64
- // variants and it is hard to do anything much smarter without knowing
- // how the operand is used.
- OutOps.push_back(Op);
- return false;
+ const SDValue &Op, unsigned ConstraintID, std::vector<SDValue> &OutOps) {
+ switch(ConstraintID) {
+ default:
+ llvm_unreachable("Unexpected asm memory constraint");
+ case InlineAsm::Constraint_i:
+ case InlineAsm::Constraint_m:
+ case InlineAsm::Constraint_Q:
+ // Require the address to be in a register. That is safe for all AArch64
+ // variants and it is hard to do anything much smarter without knowing
+ // how the operand is used.
+ OutOps.push_back(Op);
+ return false;
+ }
+ return true;
}
/// SelectArithImmed - Select an immediate value that can be represented as
@@ -247,8 +252,9 @@ bool AArch64DAGToDAGISel::SelectArithImmed(SDValue N, SDValue &Val,
return false;
unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt);
- Val = CurDAG->getTargetConstant(Immed, MVT::i32);
- Shift = CurDAG->getTargetConstant(ShVal, MVT::i32);
+ SDLoc dl(N);
+ Val = CurDAG->getTargetConstant(Immed, dl, MVT::i32);
+ Shift = CurDAG->getTargetConstant(ShVal, dl, MVT::i32);
return true;
}
@@ -281,7 +287,8 @@ bool AArch64DAGToDAGISel::SelectNegArithImmed(SDValue N, SDValue &Val,
return false;
Immed &= 0xFFFFFFULL;
- return SelectArithImmed(CurDAG->getConstant(Immed, MVT::i32), Val, Shift);
+ return SelectArithImmed(CurDAG->getConstant(Immed, SDLoc(N), MVT::i32), Val,
+ Shift);
}
/// getShiftTypeForNode - Translate a shift node to the corresponding
@@ -301,7 +308,7 @@ static AArch64_AM::ShiftExtendType getShiftTypeForNode(SDValue N) {
}
}
-/// \brief Determine wether it is worth to fold V into an extended register.
+/// \brief Determine whether it is worth to fold V into an extended register.
bool AArch64DAGToDAGISel::isWorthFolding(SDValue V) const {
// it hurts if the value is used at least twice, unless we are optimizing
// for code size.
@@ -329,7 +336,7 @@ bool AArch64DAGToDAGISel::SelectShiftedRegister(SDValue N, bool AllowROR,
unsigned ShVal = AArch64_AM::getShifterImm(ShType, Val);
Reg = N.getOperand(0);
- Shift = CurDAG->getTargetConstant(ShVal, MVT::i32);
+ Shift = CurDAG->getTargetConstant(ShVal, SDLoc(N), MVT::i32);
return isWorthFolding(N);
}
@@ -430,6 +437,7 @@ static bool checkV64LaneV128(SDValue Op0, SDValue Op1, SDValue &StdOp,
/// is a lane in the upper half of a 128-bit vector. Recognize and select this
/// so that we don't emit unnecessary lane extracts.
SDNode *AArch64DAGToDAGISel::SelectMLAV64LaneV128(SDNode *N) {
+ SDLoc dl(N);
SDValue Op0 = N->getOperand(0);
SDValue Op1 = N->getOperand(1);
SDValue MLAOp1; // Will hold ordinary multiplicand for MLA.
@@ -446,7 +454,7 @@ SDNode *AArch64DAGToDAGISel::SelectMLAV64LaneV128(SDNode *N) {
return nullptr;
}
- SDValue LaneIdxVal = CurDAG->getTargetConstant(LaneIdx, MVT::i64);
+ SDValue LaneIdxVal = CurDAG->getTargetConstant(LaneIdx, dl, MVT::i64);
SDValue Ops[] = { Op0, MLAOp1, MLAOp2, LaneIdxVal };
@@ -469,10 +477,11 @@ SDNode *AArch64DAGToDAGISel::SelectMLAV64LaneV128(SDNode *N) {
break;
}
- return CurDAG->getMachineNode(MLAOpc, SDLoc(N), N->getValueType(0), Ops);
+ return CurDAG->getMachineNode(MLAOpc, dl, N->getValueType(0), Ops);
}
SDNode *AArch64DAGToDAGISel::SelectMULLV64LaneV128(unsigned IntNo, SDNode *N) {
+ SDLoc dl(N);
SDValue SMULLOp0;
SDValue SMULLOp1;
int LaneIdx;
@@ -481,7 +490,7 @@ SDNode *AArch64DAGToDAGISel::SelectMULLV64LaneV128(unsigned IntNo, SDNode *N) {
LaneIdx))
return nullptr;
- SDValue LaneIdxVal = CurDAG->getTargetConstant(LaneIdx, MVT::i64);
+ SDValue LaneIdxVal = CurDAG->getTargetConstant(LaneIdx, dl, MVT::i64);
SDValue Ops[] = { SMULLOp0, SMULLOp1, LaneIdxVal };
@@ -512,7 +521,7 @@ SDNode *AArch64DAGToDAGISel::SelectMULLV64LaneV128(unsigned IntNo, SDNode *N) {
} else
llvm_unreachable("Unrecognized intrinsic.");
- return CurDAG->getMachineNode(SMULLOpc, SDLoc(N), N->getValueType(0), Ops);
+ return CurDAG->getMachineNode(SMULLOpc, dl, N->getValueType(0), Ops);
}
/// Instructions that accept extend modifiers like UXTW expect the register
@@ -523,9 +532,10 @@ static SDValue narrowIfNeeded(SelectionDAG *CurDAG, SDValue N) {
if (N.getValueType() == MVT::i32)
return N;
- SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, MVT::i32);
+ SDLoc dl(N);
+ SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32);
MachineSDNode *Node = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
- SDLoc(N), MVT::i32, N, SubReg);
+ dl, MVT::i32, N, SubReg);
return SDValue(Node, 0);
}
@@ -565,7 +575,8 @@ bool AArch64DAGToDAGISel::SelectArithExtendedRegister(SDValue N, SDValue &Reg,
// (harmlessly) synthesize one by injected an EXTRACT_SUBREG here.
assert(Ext != AArch64_AM::UXTX && Ext != AArch64_AM::SXTX);
Reg = narrowIfNeeded(CurDAG, Reg);
- Shift = CurDAG->getTargetConstant(getArithExtendImm(Ext, ShiftVal), MVT::i32);
+ Shift = CurDAG->getTargetConstant(getArithExtendImm(Ext, ShiftVal), SDLoc(N),
+ MVT::i32);
return isWorthFolding(N);
}
@@ -595,11 +606,12 @@ static bool isWorthFoldingADDlow(SDValue N) {
/// reference, which determines the scale.
bool AArch64DAGToDAGISel::SelectAddrModeIndexed(SDValue N, unsigned Size,
SDValue &Base, SDValue &OffImm) {
+ SDLoc dl(N);
const TargetLowering *TLI = getTargetLowering();
if (N.getOpcode() == ISD::FrameIndex) {
int FI = cast<FrameIndexSDNode>(N)->getIndex();
Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
- OffImm = CurDAG->getTargetConstant(0, MVT::i64);
+ OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
return true;
}
@@ -632,7 +644,7 @@ bool AArch64DAGToDAGISel::SelectAddrModeIndexed(SDValue N, unsigned Size,
int FI = cast<FrameIndexSDNode>(Base)->getIndex();
Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
}
- OffImm = CurDAG->getTargetConstant(RHSC >> Scale, MVT::i64);
+ OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
return true;
}
}
@@ -648,7 +660,7 @@ bool AArch64DAGToDAGISel::SelectAddrModeIndexed(SDValue N, unsigned Size,
// add x0, Xbase, #offset
// ldr x0, [x0]
Base = N;
- OffImm = CurDAG->getTargetConstant(0, MVT::i64);
+ OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
return true;
}
@@ -675,7 +687,7 @@ bool AArch64DAGToDAGISel::SelectAddrModeUnscaled(SDValue N, unsigned Size,
const TargetLowering *TLI = getTargetLowering();
Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
}
- OffImm = CurDAG->getTargetConstant(RHSC, MVT::i64);
+ OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i64);
return true;
}
}
@@ -683,12 +695,12 @@ bool AArch64DAGToDAGISel::SelectAddrModeUnscaled(SDValue N, unsigned Size,
}
static SDValue Widen(SelectionDAG *CurDAG, SDValue N) {
- SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, MVT::i32);
+ SDLoc dl(N);
+ SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32);
SDValue ImpDef = SDValue(
- CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, SDLoc(N), MVT::i64),
- 0);
+ CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, MVT::i64), 0);
MachineSDNode *Node = CurDAG->getMachineNode(
- TargetOpcode::INSERT_SUBREG, SDLoc(N), MVT::i64, ImpDef, N, SubReg);
+ TargetOpcode::INSERT_SUBREG, dl, MVT::i64, ImpDef, N, SubReg);
return SDValue(Node, 0);
}
@@ -702,6 +714,7 @@ bool AArch64DAGToDAGISel::SelectExtendedSHL(SDValue N, unsigned Size,
if (!CSD || (CSD->getZExtValue() & 0x7) != CSD->getZExtValue())
return false;
+ SDLoc dl(N);
if (WantExtend) {
AArch64_AM::ShiftExtendType Ext =
getExtendTypeForNode(N.getOperand(0), true);
@@ -709,10 +722,11 @@ bool AArch64DAGToDAGISel::SelectExtendedSHL(SDValue N, unsigned Size,
return false;
Offset = narrowIfNeeded(CurDAG, N.getOperand(0).getOperand(0));
- SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, MVT::i32);
+ SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
+ MVT::i32);
} else {
Offset = N.getOperand(0);
- SignExtend = CurDAG->getTargetConstant(0, MVT::i32);
+ SignExtend = CurDAG->getTargetConstant(0, dl, MVT::i32);
}
unsigned LegalShiftVal = Log2_32(Size);
@@ -735,6 +749,7 @@ bool AArch64DAGToDAGISel::SelectAddrModeWRO(SDValue N, unsigned Size,
return false;
SDValue LHS = N.getOperand(0);
SDValue RHS = N.getOperand(1);
+ SDLoc dl(N);
// We don't want to match immediate adds here, because they are better lowered
// to the register-immediate addressing modes.
@@ -757,7 +772,7 @@ bool AArch64DAGToDAGISel::SelectAddrModeWRO(SDValue N, unsigned Size,
if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL &&
SelectExtendedSHL(RHS, Size, true, Offset, SignExtend)) {
Base = LHS;
- DoShift = CurDAG->getTargetConstant(true, MVT::i32);
+ DoShift = CurDAG->getTargetConstant(true, dl, MVT::i32);
return true;
}
@@ -765,12 +780,12 @@ bool AArch64DAGToDAGISel::SelectAddrModeWRO(SDValue N, unsigned Size,
if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL &&
SelectExtendedSHL(LHS, Size, true, Offset, SignExtend)) {
Base = RHS;
- DoShift = CurDAG->getTargetConstant(true, MVT::i32);
+ DoShift = CurDAG->getTargetConstant(true, dl, MVT::i32);
return true;
}
// There was no shift, whatever else we find.
- DoShift = CurDAG->getTargetConstant(false, MVT::i32);
+ DoShift = CurDAG->getTargetConstant(false, dl, MVT::i32);
AArch64_AM::ShiftExtendType Ext = AArch64_AM::InvalidShiftExtend;
// Try to match an unshifted extend on the LHS.
@@ -779,7 +794,8 @@ bool AArch64DAGToDAGISel::SelectAddrModeWRO(SDValue N, unsigned Size,
AArch64_AM::InvalidShiftExtend) {
Base = RHS;
Offset = narrowIfNeeded(CurDAG, LHS.getOperand(0));
- SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, MVT::i32);
+ SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
+ MVT::i32);
if (isWorthFolding(LHS))
return true;
}
@@ -790,7 +806,8 @@ bool AArch64DAGToDAGISel::SelectAddrModeWRO(SDValue N, unsigned Size,
AArch64_AM::InvalidShiftExtend) {
Base = LHS;
Offset = narrowIfNeeded(CurDAG, RHS.getOperand(0));
- SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, MVT::i32);
+ SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
+ MVT::i32);
if (isWorthFolding(RHS))
return true;
}
@@ -821,6 +838,7 @@ bool AArch64DAGToDAGISel::SelectAddrModeXRO(SDValue N, unsigned Size,
return false;
SDValue LHS = N.getOperand(0);
SDValue RHS = N.getOperand(1);
+ SDLoc DL(N);
// Check if this particular node is reused in any non-memory related
// operation. If yes, do not try to fold this node into the address
@@ -843,7 +861,7 @@ bool AArch64DAGToDAGISel::SelectAddrModeXRO(SDValue N, unsigned Size,
// MOV X0, WideImmediate
// LDR X2, [BaseReg, X0]
if (isa<ConstantSDNode>(RHS)) {
- int64_t ImmOff = (int64_t)dyn_cast<ConstantSDNode>(RHS)->getZExtValue();
+ int64_t ImmOff = (int64_t)cast<ConstantSDNode>(RHS)->getZExtValue();
unsigned Scale = Log2_32(Size);
// Skip the immediate can be seleced by load/store addressing mode.
// Also skip the immediate can be encoded by a single ADD (SUB is also
@@ -852,7 +870,6 @@ bool AArch64DAGToDAGISel::SelectAddrModeXRO(SDValue N, unsigned Size,
isPreferredADD(ImmOff) || isPreferredADD(-ImmOff))
return false;
- SDLoc DL(N.getNode());
SDValue Ops[] = { RHS };
SDNode *MOVI =
CurDAG->getMachineNode(AArch64::MOVi64imm, DL, MVT::i64, Ops);
@@ -868,7 +885,7 @@ bool AArch64DAGToDAGISel::SelectAddrModeXRO(SDValue N, unsigned Size,
if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL &&
SelectExtendedSHL(RHS, Size, false, Offset, SignExtend)) {
Base = LHS;
- DoShift = CurDAG->getTargetConstant(true, MVT::i32);
+ DoShift = CurDAG->getTargetConstant(true, DL, MVT::i32);
return true;
}
@@ -876,40 +893,40 @@ bool AArch64DAGToDAGISel::SelectAddrModeXRO(SDValue N, unsigned Size,
if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL &&
SelectExtendedSHL(LHS, Size, false, Offset, SignExtend)) {
Base = RHS;
- DoShift = CurDAG->getTargetConstant(true, MVT::i32);
+ DoShift = CurDAG->getTargetConstant(true, DL, MVT::i32);
return true;
}
// Match any non-shifted, non-extend, non-immediate add expression.
Base = LHS;
Offset = RHS;
- SignExtend = CurDAG->getTargetConstant(false, MVT::i32);
- DoShift = CurDAG->getTargetConstant(false, MVT::i32);
+ SignExtend = CurDAG->getTargetConstant(false, DL, MVT::i32);
+ DoShift = CurDAG->getTargetConstant(false, DL, MVT::i32);
// Reg1 + Reg2 is free: no check needed.
return true;
}
SDValue AArch64DAGToDAGISel::createDTuple(ArrayRef<SDValue> Regs) {
- static unsigned RegClassIDs[] = {
+ static const unsigned RegClassIDs[] = {
AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID};
- static unsigned SubRegs[] = { AArch64::dsub0, AArch64::dsub1,
- AArch64::dsub2, AArch64::dsub3 };
+ static const unsigned SubRegs[] = {AArch64::dsub0, AArch64::dsub1,
+ AArch64::dsub2, AArch64::dsub3};
return createTuple(Regs, RegClassIDs, SubRegs);
}
SDValue AArch64DAGToDAGISel::createQTuple(ArrayRef<SDValue> Regs) {
- static unsigned RegClassIDs[] = {
+ static const unsigned RegClassIDs[] = {
AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID};
- static unsigned SubRegs[] = { AArch64::qsub0, AArch64::qsub1,
- AArch64::qsub2, AArch64::qsub3 };
+ static const unsigned SubRegs[] = {AArch64::qsub0, AArch64::qsub1,
+ AArch64::qsub2, AArch64::qsub3};
return createTuple(Regs, RegClassIDs, SubRegs);
}
SDValue AArch64DAGToDAGISel::createTuple(ArrayRef<SDValue> Regs,
- unsigned RegClassIDs[],
- unsigned SubRegs[]) {
+ const unsigned RegClassIDs[],
+ const unsigned SubRegs[]) {
// There's no special register-class for a vector-list of 1 element: it's just
// a vector.
if (Regs.size() == 1)
@@ -917,18 +934,18 @@ SDValue AArch64DAGToDAGISel::createTuple(ArrayRef<SDValue> Regs,
assert(Regs.size() >= 2 && Regs.size() <= 4);
- SDLoc DL(Regs[0].getNode());
+ SDLoc DL(Regs[0]);
SmallVector<SDValue, 4> Ops;
// First operand of REG_SEQUENCE is the desired RegClass.
Ops.push_back(
- CurDAG->getTargetConstant(RegClassIDs[Regs.size() - 2], MVT::i32));
+ CurDAG->getTargetConstant(RegClassIDs[Regs.size() - 2], DL, MVT::i32));
// Then we get pairs of source & subregister-position for the components.
for (unsigned i = 0; i < Regs.size(); ++i) {
Ops.push_back(Regs[i]);
- Ops.push_back(CurDAG->getTargetConstant(SubRegs[i], MVT::i32));
+ Ops.push_back(CurDAG->getTargetConstant(SubRegs[i], DL, MVT::i32));
}
SDNode *N =
@@ -1025,19 +1042,21 @@ SDNode *AArch64DAGToDAGISel::SelectIndexedLoad(SDNode *N, bool &Done) {
SDValue Base = LD->getBasePtr();
ConstantSDNode *OffsetOp = cast<ConstantSDNode>(LD->getOffset());
int OffsetVal = (int)OffsetOp->getZExtValue();
- SDValue Offset = CurDAG->getTargetConstant(OffsetVal, MVT::i64);
+ SDLoc dl(N);
+ SDValue Offset = CurDAG->getTargetConstant(OffsetVal, dl, MVT::i64);
SDValue Ops[] = { Base, Offset, Chain };
- SDNode *Res = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i64, DstVT,
+ SDNode *Res = CurDAG->getMachineNode(Opcode, dl, MVT::i64, DstVT,
MVT::Other, Ops);
// Either way, we're replacing the node, so tell the caller that.
Done = true;
SDValue LoadedVal = SDValue(Res, 1);
if (InsertTo64) {
- SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, MVT::i32);
+ SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32);
LoadedVal =
SDValue(CurDAG->getMachineNode(
- AArch64::SUBREG_TO_REG, SDLoc(N), MVT::i64,
- CurDAG->getTargetConstant(0, MVT::i64), LoadedVal, SubReg),
+ AArch64::SUBREG_TO_REG, dl, MVT::i64,
+ CurDAG->getTargetConstant(0, dl, MVT::i64), LoadedVal,
+ SubReg),
0);
}
@@ -1054,13 +1073,10 @@ SDNode *AArch64DAGToDAGISel::SelectLoad(SDNode *N, unsigned NumVecs,
EVT VT = N->getValueType(0);
SDValue Chain = N->getOperand(0);
- SmallVector<SDValue, 6> Ops;
- Ops.push_back(N->getOperand(2)); // Mem operand;
- Ops.push_back(Chain);
+ SDValue Ops[] = {N->getOperand(2), // Mem operand;
+ Chain};
- std::vector<EVT> ResTys;
- ResTys.push_back(MVT::Untyped);
- ResTys.push_back(MVT::Other);
+ const EVT ResTys[] = {MVT::Untyped, MVT::Other};
SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
SDValue SuperReg = SDValue(Ld, 0);
@@ -1078,15 +1094,12 @@ SDNode *AArch64DAGToDAGISel::SelectPostLoad(SDNode *N, unsigned NumVecs,
EVT VT = N->getValueType(0);
SDValue Chain = N->getOperand(0);
- SmallVector<SDValue, 6> Ops;
- Ops.push_back(N->getOperand(1)); // Mem operand
- Ops.push_back(N->getOperand(2)); // Incremental
- Ops.push_back(Chain);
+ SDValue Ops[] = {N->getOperand(1), // Mem operand
+ N->getOperand(2), // Incremental
+ Chain};
- std::vector<EVT> ResTys;
- ResTys.push_back(MVT::i64); // Type of the write back register
- ResTys.push_back(MVT::Untyped);
- ResTys.push_back(MVT::Other);
+ const EVT ResTys[] = {MVT::i64, // Type of the write back register
+ MVT::Untyped, MVT::Other};
SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
@@ -1117,10 +1130,7 @@ SDNode *AArch64DAGToDAGISel::SelectStore(SDNode *N, unsigned NumVecs,
SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);
SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs);
- SmallVector<SDValue, 6> Ops;
- Ops.push_back(RegSeq);
- Ops.push_back(N->getOperand(NumVecs + 2));
- Ops.push_back(N->getOperand(0));
+ SDValue Ops[] = {RegSeq, N->getOperand(NumVecs + 2), N->getOperand(0)};
SDNode *St = CurDAG->getMachineNode(Opc, dl, N->getValueType(0), Ops);
return St;
@@ -1130,25 +1140,24 @@ SDNode *AArch64DAGToDAGISel::SelectPostStore(SDNode *N, unsigned NumVecs,
unsigned Opc) {
SDLoc dl(N);
EVT VT = N->getOperand(2)->getValueType(0);
- SmallVector<EVT, 2> ResTys;
- ResTys.push_back(MVT::i64); // Type of the write back register
- ResTys.push_back(MVT::Other); // Type for the Chain
+ const EVT ResTys[] = {MVT::i64, // Type of the write back register
+ MVT::Other}; // Type for the Chain
// Form a REG_SEQUENCE to force register allocation.
bool Is128Bit = VT.getSizeInBits() == 128;
SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);
SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs);
- SmallVector<SDValue, 6> Ops;
- Ops.push_back(RegSeq);
- Ops.push_back(N->getOperand(NumVecs + 1)); // base register
- Ops.push_back(N->getOperand(NumVecs + 2)); // Incremental
- Ops.push_back(N->getOperand(0)); // Chain
+ SDValue Ops[] = {RegSeq,
+ N->getOperand(NumVecs + 1), // base register
+ N->getOperand(NumVecs + 2), // Incremental
+ N->getOperand(0)}; // Chain
SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
return St;
}
+namespace {
/// WidenVector - Given a value in the V64 register class, produce the
/// equivalent value in the V128 register class.
class WidenVector {
@@ -1169,6 +1178,7 @@ public:
return DAG.getTargetInsertSubreg(AArch64::dsub, DL, WideTy, Undef, V64Reg);
}
};
+} // namespace
/// NarrowVector - Given a value in the V128 register class, produce the
/// equivalent value in the V64 register class.
@@ -1197,18 +1207,13 @@ SDNode *AArch64DAGToDAGISel::SelectLoadLane(SDNode *N, unsigned NumVecs,
SDValue RegSeq = createQTuple(Regs);
- std::vector<EVT> ResTys;
- ResTys.push_back(MVT::Untyped);
- ResTys.push_back(MVT::Other);
+ const EVT ResTys[] = {MVT::Untyped, MVT::Other};
unsigned LaneNo =
cast<ConstantSDNode>(N->getOperand(NumVecs + 2))->getZExtValue();
- SmallVector<SDValue, 6> Ops;
- Ops.push_back(RegSeq);
- Ops.push_back(CurDAG->getTargetConstant(LaneNo, MVT::i64));
- Ops.push_back(N->getOperand(NumVecs + 3));
- Ops.push_back(N->getOperand(0));
+ SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
+ N->getOperand(NumVecs + 3), N->getOperand(0)};
SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
SDValue SuperReg = SDValue(Ld, 0);
@@ -1242,20 +1247,18 @@ SDNode *AArch64DAGToDAGISel::SelectPostLoadLane(SDNode *N, unsigned NumVecs,
SDValue RegSeq = createQTuple(Regs);
- std::vector<EVT> ResTys;
- ResTys.push_back(MVT::i64); // Type of the write back register
- ResTys.push_back(MVT::Untyped);
- ResTys.push_back(MVT::Other);
+ const EVT ResTys[] = {MVT::i64, // Type of the write back register
+ RegSeq->getValueType(0), MVT::Other};
unsigned LaneNo =
cast<ConstantSDNode>(N->getOperand(NumVecs + 1))->getZExtValue();
- SmallVector<SDValue, 6> Ops;
- Ops.push_back(RegSeq);
- Ops.push_back(CurDAG->getTargetConstant(LaneNo, MVT::i64)); // Lane Number
- Ops.push_back(N->getOperand(NumVecs + 2)); // Base register
- Ops.push_back(N->getOperand(NumVecs + 3)); // Incremental
- Ops.push_back(N->getOperand(0));
+ SDValue Ops[] = {RegSeq,
+ CurDAG->getTargetConstant(LaneNo, dl,
+ MVT::i64), // Lane Number
+ N->getOperand(NumVecs + 2), // Base register
+ N->getOperand(NumVecs + 3), // Incremental
+ N->getOperand(0)};
SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
// Update uses of the write back register
@@ -1303,11 +1306,8 @@ SDNode *AArch64DAGToDAGISel::SelectStoreLane(SDNode *N, unsigned NumVecs,
unsigned LaneNo =
cast<ConstantSDNode>(N->getOperand(NumVecs + 2))->getZExtValue();
- SmallVector<SDValue, 6> Ops;
- Ops.push_back(RegSeq);
- Ops.push_back(CurDAG->getTargetConstant(LaneNo, MVT::i64));
- Ops.push_back(N->getOperand(NumVecs + 3));
- Ops.push_back(N->getOperand(0));
+ SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
+ N->getOperand(NumVecs + 3), N->getOperand(0)};
SDNode *St = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops);
// Transfer memoperands.
@@ -1333,19 +1333,16 @@ SDNode *AArch64DAGToDAGISel::SelectPostStoreLane(SDNode *N, unsigned NumVecs,
SDValue RegSeq = createQTuple(Regs);
- SmallVector<EVT, 2> ResTys;
- ResTys.push_back(MVT::i64); // Type of the write back register
- ResTys.push_back(MVT::Other);
+ const EVT ResTys[] = {MVT::i64, // Type of the write back register
+ MVT::Other};
unsigned LaneNo =
cast<ConstantSDNode>(N->getOperand(NumVecs + 1))->getZExtValue();
- SmallVector<SDValue, 6> Ops;
- Ops.push_back(RegSeq);
- Ops.push_back(CurDAG->getTargetConstant(LaneNo, MVT::i64));
- Ops.push_back(N->getOperand(NumVecs + 2)); // Base Register
- Ops.push_back(N->getOperand(NumVecs + 3)); // Incremental
- Ops.push_back(N->getOperand(0));
+ SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
+ N->getOperand(NumVecs + 2), // Base Register
+ N->getOperand(NumVecs + 3), // Incremental
+ N->getOperand(0)};
SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
// Transfer memoperands.
@@ -1424,12 +1421,17 @@ static bool isBitfieldExtractOpFromAnd(SelectionDAG *CurDAG, SDNode *N,
} else
return false;
- assert((BiggerPattern || (Srl_imm > 0 && Srl_imm < VT.getSizeInBits())) &&
- "bad amount in shift node!");
+ // Bail out on large immediates. This happens when no proper
+ // combining/constant folding was performed.
+ if (!BiggerPattern && (Srl_imm <= 0 || Srl_imm >= VT.getSizeInBits())) {
+ DEBUG((dbgs() << N
+ << ": Found large shift immediate, this should not happen\n"));
+ return false;
+ }
LSB = Srl_imm;
- MSB = Srl_imm + (VT == MVT::i32 ? CountTrailingOnes_32(And_imm)
- : CountTrailingOnes_64(And_imm)) -
+ MSB = Srl_imm + (VT == MVT::i32 ? countTrailingOnes<uint32_t>(And_imm)
+ : countTrailingOnes<uint64_t>(And_imm)) -
1;
if (ClampMSB)
// Since we're moving the extend before the right shift operation, we need
@@ -1473,7 +1475,7 @@ static bool isSeveralBitsExtractOpFromShr(SDNode *N, unsigned &Opc,
return false;
// Check whether we really have several bits extract here.
- unsigned BitWide = 64 - CountLeadingOnes_64(~(And_mask >> Srl_imm));
+ unsigned BitWide = 64 - countLeadingOnes(~(And_mask >> Srl_imm));
if (BitWide && isMask_64(And_mask >> Srl_imm)) {
if (N->getValueType(0) == MVT::i32)
Opc = AArch64::UBFMWri;
@@ -1529,7 +1531,14 @@ static bool isBitfieldExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0,
} else
return false;
- assert(Shl_imm < VT.getSizeInBits() && "bad amount in shift node!");
+ // Missing combines/constant folding may have left us with strange
+ // constants.
+ if (Shl_imm >= VT.getSizeInBits()) {
+ DEBUG((dbgs() << N
+ << ": Found large shift immediate, this should not happen\n"));
+ return false;
+ }
+
uint64_t Srl_imm = 0;
if (!isIntImmediate(N->getOperand(1), Srl_imm))
return false;
@@ -1596,23 +1605,24 @@ SDNode *AArch64DAGToDAGISel::SelectBitfieldExtractOp(SDNode *N) {
return nullptr;
EVT VT = N->getValueType(0);
+ SDLoc dl(N);
// If the bit extract operation is 64bit but the original type is 32bit, we
// need to add one EXTRACT_SUBREG.
if ((Opc == AArch64::SBFMXri || Opc == AArch64::UBFMXri) && VT == MVT::i32) {
- SDValue Ops64[] = {Opd0, CurDAG->getTargetConstant(LSB, MVT::i64),
- CurDAG->getTargetConstant(MSB, MVT::i64)};
+ SDValue Ops64[] = {Opd0, CurDAG->getTargetConstant(LSB, dl, MVT::i64),
+ CurDAG->getTargetConstant(MSB, dl, MVT::i64)};
- SDNode *BFM = CurDAG->getMachineNode(Opc, SDLoc(N), MVT::i64, Ops64);
- SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, MVT::i32);
+ SDNode *BFM = CurDAG->getMachineNode(Opc, dl, MVT::i64, Ops64);
+ SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32);
MachineSDNode *Node =
- CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, SDLoc(N), MVT::i32,
+ CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::i32,
SDValue(BFM, 0), SubReg);
return Node;
}
- SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(LSB, VT),
- CurDAG->getTargetConstant(MSB, VT)};
+ SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(LSB, dl, VT),
+ CurDAG->getTargetConstant(MSB, dl, VT)};
return CurDAG->SelectNodeTo(N, Opc, VT, Ops);
}
@@ -1816,6 +1826,7 @@ static SDValue getLeftShift(SelectionDAG *CurDAG, SDValue Op, int ShlAmount) {
return Op;
EVT VT = Op.getValueType();
+ SDLoc dl(Op);
unsigned BitWidth = VT.getSizeInBits();
unsigned UBFMOpc = BitWidth == 32 ? AArch64::UBFMWri : AArch64::UBFMXri;
@@ -1823,16 +1834,16 @@ static SDValue getLeftShift(SelectionDAG *CurDAG, SDValue Op, int ShlAmount) {
if (ShlAmount > 0) {
// LSL wD, wN, #Amt == UBFM wD, wN, #32-Amt, #31-Amt
ShiftNode = CurDAG->getMachineNode(
- UBFMOpc, SDLoc(Op), VT, Op,
- CurDAG->getTargetConstant(BitWidth - ShlAmount, VT),
- CurDAG->getTargetConstant(BitWidth - 1 - ShlAmount, VT));
+ UBFMOpc, dl, VT, Op,
+ CurDAG->getTargetConstant(BitWidth - ShlAmount, dl, VT),
+ CurDAG->getTargetConstant(BitWidth - 1 - ShlAmount, dl, VT));
} else {
// LSR wD, wN, #Amt == UBFM wD, wN, #Amt, #32-1
assert(ShlAmount < 0 && "expected right shift");
int ShrAmount = -ShlAmount;
ShiftNode = CurDAG->getMachineNode(
- UBFMOpc, SDLoc(Op), VT, Op, CurDAG->getTargetConstant(ShrAmount, VT),
- CurDAG->getTargetConstant(BitWidth - 1, VT));
+ UBFMOpc, dl, VT, Op, CurDAG->getTargetConstant(ShrAmount, dl, VT),
+ CurDAG->getTargetConstant(BitWidth - 1, dl, VT));
}
return SDValue(ShiftNode, 0);
@@ -1872,7 +1883,7 @@ static bool isBitfieldPositioningOp(SelectionDAG *CurDAG, SDValue Op,
return false;
ShiftAmount = countTrailingZeros(NonZeroBits);
- MaskWidth = CountTrailingOnes_64(NonZeroBits >> ShiftAmount);
+ MaskWidth = countTrailingOnes(NonZeroBits >> ShiftAmount);
// BFI encompasses sufficiently many nodes that it's worth inserting an extra
// LSL/LSR if the mask in NonZeroBits doesn't quite match up with the ISD::SHL
@@ -1997,10 +2008,11 @@ SDNode *AArch64DAGToDAGISel::SelectBitfieldInsertOp(SDNode *N) {
return nullptr;
EVT VT = N->getValueType(0);
+ SDLoc dl(N);
SDValue Ops[] = { Opd0,
Opd1,
- CurDAG->getTargetConstant(LSB, VT),
- CurDAG->getTargetConstant(MSB, VT) };
+ CurDAG->getTargetConstant(LSB, dl, VT),
+ CurDAG->getTargetConstant(MSB, dl, VT) };
return CurDAG->SelectNodeTo(N, Opc, VT, Ops);
}
@@ -2098,7 +2110,7 @@ AArch64DAGToDAGISel::SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos,
// finding FBits, but it must still be in range.
if (FBits == 0 || FBits > RegWidth) return false;
- FixedPos = CurDAG->getTargetConstant(FBits, MVT::i32);
+ FixedPos = CurDAG->getTargetConstant(FBits, SDLoc(N), MVT::i32);
return true;
}
@@ -2213,8 +2225,9 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
unsigned Shifter = AArch64_AM::getShifterImm(AArch64_AM::LSL, 0);
const TargetLowering *TLI = getTargetLowering();
SDValue TFI = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
- SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, MVT::i32),
- CurDAG->getTargetConstant(Shifter, MVT::i32) };
+ SDLoc DL(Node);
+ SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, DL, MVT::i32),
+ CurDAG->getTargetConstant(Shifter, DL, MVT::i32) };
return CurDAG->SelectNodeTo(Node, AArch64::ADDXri, MVT::i64, Ops);
}
case ISD::INTRINSIC_W_CHAIN: {
@@ -2250,11 +2263,7 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
SDValue MemAddr = Node->getOperand(4);
// Place arguments in the right order.
- SmallVector<SDValue, 7> Ops;
- Ops.push_back(ValLo);
- Ops.push_back(ValHi);
- Ops.push_back(MemAddr);
- Ops.push_back(Chain);
+ SDValue Ops[] = {ValLo, ValHi, MemAddr, Chain};
SDNode *St = CurDAG->getMachineNode(Op, DL, MVT::i32, MVT::Other, Ops);
// Transfer memoperands.
diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp
index 6458d56..e6108c3 100644
--- a/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -25,6 +25,7 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/IR/Function.h"
+#include "llvm/IR/GetElementPtrTypeIterator.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/Type.h"
#include "llvm/Support/CommandLine.h"
@@ -75,10 +76,9 @@ cl::opt<bool> EnableAArch64ELFLocalDynamicTLSGeneration(
cl::desc("Allow AArch64 Local Dynamic TLS code generation"),
cl::init(false));
-
-AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM)
- : TargetLowering(TM) {
- Subtarget = &TM.getSubtarget<AArch64Subtarget>();
+AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
+ const AArch64Subtarget &STI)
+ : TargetLowering(TM), Subtarget(&STI) {
// AArch64 doesn't have comparisons which set GPRs or setcc instructions, so
// we have to make something up. Arbitrarily, choose ZeroOrOne.
@@ -120,7 +120,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM)
}
// Compute derived properties from the register classes
- computeRegisterProperties();
+ computeRegisterProperties(Subtarget->getRegisterInfo());
// Provide all sorts of operation actions
setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
@@ -282,14 +282,39 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM)
setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom);
setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
- // f16 is storage-only, so we promote operations to f32 if we know this is
- // valid, and ignore them otherwise. The operations not mentioned here will
- // fail to select, but this is not a major problem as no source language
- // should be emitting native f16 operations yet.
- setOperationAction(ISD::FADD, MVT::f16, Promote);
- setOperationAction(ISD::FDIV, MVT::f16, Promote);
- setOperationAction(ISD::FMUL, MVT::f16, Promote);
- setOperationAction(ISD::FSUB, MVT::f16, Promote);
+ // f16 is a storage-only type, always promote it to f32.
+ setOperationAction(ISD::SETCC, MVT::f16, Promote);
+ setOperationAction(ISD::BR_CC, MVT::f16, Promote);
+ setOperationAction(ISD::SELECT_CC, MVT::f16, Promote);
+ setOperationAction(ISD::SELECT, MVT::f16, Promote);
+ setOperationAction(ISD::FADD, MVT::f16, Promote);
+ setOperationAction(ISD::FSUB, MVT::f16, Promote);
+ setOperationAction(ISD::FMUL, MVT::f16, Promote);
+ setOperationAction(ISD::FDIV, MVT::f16, Promote);
+ setOperationAction(ISD::FREM, MVT::f16, Promote);
+ setOperationAction(ISD::FMA, MVT::f16, Promote);
+ setOperationAction(ISD::FNEG, MVT::f16, Promote);
+ setOperationAction(ISD::FABS, MVT::f16, Promote);
+ setOperationAction(ISD::FCEIL, MVT::f16, Promote);
+ setOperationAction(ISD::FCOPYSIGN, MVT::f16, Promote);
+ setOperationAction(ISD::FCOS, MVT::f16, Promote);
+ setOperationAction(ISD::FFLOOR, MVT::f16, Promote);
+ setOperationAction(ISD::FNEARBYINT, MVT::f16, Promote);
+ setOperationAction(ISD::FPOW, MVT::f16, Promote);
+ setOperationAction(ISD::FPOWI, MVT::f16, Promote);
+ setOperationAction(ISD::FRINT, MVT::f16, Promote);
+ setOperationAction(ISD::FSIN, MVT::f16, Promote);
+ setOperationAction(ISD::FSINCOS, MVT::f16, Promote);
+ setOperationAction(ISD::FSQRT, MVT::f16, Promote);
+ setOperationAction(ISD::FEXP, MVT::f16, Promote);
+ setOperationAction(ISD::FEXP2, MVT::f16, Promote);
+ setOperationAction(ISD::FLOG, MVT::f16, Promote);
+ setOperationAction(ISD::FLOG2, MVT::f16, Promote);
+ setOperationAction(ISD::FLOG10, MVT::f16, Promote);
+ setOperationAction(ISD::FROUND, MVT::f16, Promote);
+ setOperationAction(ISD::FTRUNC, MVT::f16, Promote);
+ setOperationAction(ISD::FMINNUM, MVT::f16, Promote);
+ setOperationAction(ISD::FMAXNUM, MVT::f16, Promote);
// v4f16 is also a storage-only type, so promote it to v4f32 when that is
// known to be safe.
@@ -371,9 +396,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM)
setOperationAction(ISD::FLOG10, MVT::v8f16, Expand);
// AArch64 has implementations of a lot of rounding-like FP operations.
- static MVT RoundingTypes[] = { MVT::f32, MVT::f64};
- for (unsigned I = 0; I < array_lengthof(RoundingTypes); ++I) {
- MVT Ty = RoundingTypes[I];
+ for (MVT Ty : {MVT::f32, MVT::f64}) {
setOperationAction(ISD::FFLOOR, Ty, Legal);
setOperationAction(ISD::FNEARBYINT, Ty, Legal);
setOperationAction(ISD::FCEIL, Ty, Legal);
@@ -469,6 +492,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM)
setTargetDAGCombine(ISD::SELECT);
setTargetDAGCombine(ISD::VSELECT);
+ setTargetDAGCombine(ISD::SELECT_CC);
setTargetDAGCombine(ISD::INTRINSIC_VOID);
setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN);
@@ -484,6 +508,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM)
// Enable TBZ/TBNZ
MaskAndBranchFoldingIsLegal = true;
+ EnableExtLdPromotion = true;
setMinFunctionAlignment(2);
@@ -534,11 +559,21 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM)
setOperationAction(ISD::SINT_TO_FP, MVT::v4i8, Promote);
setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Promote);
setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Promote);
+ // i8 and i16 vector elements also need promotion to i32 for v8i8 or v8i16
+ // -> v8f16 conversions.
+ setOperationAction(ISD::SINT_TO_FP, MVT::v8i8, Promote);
+ setOperationAction(ISD::UINT_TO_FP, MVT::v8i8, Promote);
+ setOperationAction(ISD::SINT_TO_FP, MVT::v8i16, Promote);
+ setOperationAction(ISD::UINT_TO_FP, MVT::v8i16, Promote);
// Similarly, there is no direct i32 -> f64 vector conversion instruction.
setOperationAction(ISD::SINT_TO_FP, MVT::v2i32, Custom);
setOperationAction(ISD::UINT_TO_FP, MVT::v2i32, Custom);
setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Custom);
setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Custom);
+ // Or, direct i32 -> f16 vector conversion. Set it so custom, so the
+ // conversion happens in two steps: v4i32 -> v4f32 -> v4f16
+ setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Custom);
+ setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Custom);
// AArch64 doesn't have MUL.2d:
setOperationAction(ISD::MUL, MVT::v2i64, Expand);
@@ -570,9 +605,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM)
}
// AArch64 has implementations of a lot of rounding-like FP operations.
- static MVT RoundingVecTypes[] = {MVT::v2f32, MVT::v4f32, MVT::v2f64 };
- for (unsigned I = 0; I < array_lengthof(RoundingVecTypes); ++I) {
- MVT Ty = RoundingVecTypes[I];
+ for (MVT Ty : {MVT::v2f32, MVT::v4f32, MVT::v2f64}) {
setOperationAction(ISD::FFLOOR, Ty, Legal);
setOperationAction(ISD::FNEARBYINT, Ty, Legal);
setOperationAction(ISD::FCEIL, Ty, Legal);
@@ -647,6 +680,12 @@ void AArch64TargetLowering::addTypeForNEON(EVT VT, EVT PromotedBitwiseVT) {
setOperationAction(ISD::FP_TO_SINT, VT.getSimpleVT(), Custom);
setOperationAction(ISD::FP_TO_UINT, VT.getSimpleVT(), Custom);
+ // [SU][MIN|MAX] are available for all NEON types apart from i64.
+ if (!VT.isFloatingPoint() &&
+ VT.getSimpleVT() != MVT::v2i64 && VT.getSimpleVT() != MVT::v1i64)
+ for (unsigned Opcode : {ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX})
+ setOperationAction(Opcode, VT.getSimpleVT(), Legal);
+
if (Subtarget->isLittleEndian()) {
for (unsigned im = (unsigned)ISD::PRE_INC;
im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {
@@ -739,13 +778,6 @@ MVT AArch64TargetLowering::getScalarShiftAmountTy(EVT LHSTy) const {
return MVT::i64;
}
-unsigned AArch64TargetLowering::getMaximalGlobalOffset() const {
- // FIXME: On AArch64, this depends on the type.
- // Basically, the addressable offsets are up to 4095 * Ty.getSizeInBytes().
- // and the offset has to be a multiple of the related size in bytes.
- return 4095;
-}
-
FastISel *
AArch64TargetLowering::createFastISel(FunctionLoweringInfo &funcInfo,
const TargetLibraryInfo *libInfo) const {
@@ -753,9 +785,8 @@ AArch64TargetLowering::createFastISel(FunctionLoweringInfo &funcInfo,
}
const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
- switch (Opcode) {
- default:
- return nullptr;
+ switch ((AArch64ISD::NodeType)Opcode) {
+ case AArch64ISD::FIRST_NUMBER: break;
case AArch64ISD::CALL: return "AArch64ISD::CALL";
case AArch64ISD::ADRP: return "AArch64ISD::ADRP";
case AArch64ISD::ADDlow: return "AArch64ISD::ADDlow";
@@ -827,6 +858,12 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
case AArch64ISD::FCMGTz: return "AArch64ISD::FCMGTz";
case AArch64ISD::FCMLEz: return "AArch64ISD::FCMLEz";
case AArch64ISD::FCMLTz: return "AArch64ISD::FCMLTz";
+ case AArch64ISD::SADDV: return "AArch64ISD::SADDV";
+ case AArch64ISD::UADDV: return "AArch64ISD::UADDV";
+ case AArch64ISD::SMINV: return "AArch64ISD::SMINV";
+ case AArch64ISD::UMINV: return "AArch64ISD::UMINV";
+ case AArch64ISD::SMAXV: return "AArch64ISD::SMAXV";
+ case AArch64ISD::UMAXV: return "AArch64ISD::UMAXV";
case AArch64ISD::NOT: return "AArch64ISD::NOT";
case AArch64ISD::BIT: return "AArch64ISD::BIT";
case AArch64ISD::CBZ: return "AArch64ISD::CBZ";
@@ -834,6 +871,7 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
case AArch64ISD::TBZ: return "AArch64ISD::TBZ";
case AArch64ISD::TBNZ: return "AArch64ISD::TBNZ";
case AArch64ISD::TC_RETURN: return "AArch64ISD::TC_RETURN";
+ case AArch64ISD::PREFETCH: return "AArch64ISD::PREFETCH";
case AArch64ISD::SITOF: return "AArch64ISD::SITOF";
case AArch64ISD::UITOF: return "AArch64ISD::UITOF";
case AArch64ISD::NVCAST: return "AArch64ISD::NVCAST";
@@ -869,6 +907,7 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
case AArch64ISD::SMULL: return "AArch64ISD::SMULL";
case AArch64ISD::UMULL: return "AArch64ISD::UMULL";
}
+ return nullptr;
}
MachineBasicBlock *
@@ -886,9 +925,8 @@ AArch64TargetLowering::EmitF128CSEL(MachineInstr *MI,
// EndBB:
// Dest = PHI [IfTrue, TrueBB], [IfFalse, OrigBB]
- const TargetInstrInfo *TII =
- getTargetMachine().getSubtargetImpl()->getInstrInfo();
MachineFunction *MF = MBB->getParent();
+ const TargetInstrInfo *TII = Subtarget->getInstrInfo();
const BasicBlock *LLVM_BB = MBB->getBasicBlock();
DebugLoc DL = MI->getDebugLoc();
MachineFunction::iterator It = MBB;
@@ -1151,7 +1189,7 @@ static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
isLegalArithImmed(C - 1ULL))) {
CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT;
C = (VT == MVT::i32) ? (uint32_t)(C - 1) : C - 1;
- RHS = DAG.getConstant(C, VT);
+ RHS = DAG.getConstant(C, dl, VT);
}
break;
case ISD::SETULT:
@@ -1161,7 +1199,7 @@ static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
(VT == MVT::i64 && C != 0ULL && isLegalArithImmed(C - 1ULL))) {
CC = (CC == ISD::SETULT) ? ISD::SETULE : ISD::SETUGT;
C = (VT == MVT::i32) ? (uint32_t)(C - 1) : C - 1;
- RHS = DAG.getConstant(C, VT);
+ RHS = DAG.getConstant(C, dl, VT);
}
break;
case ISD::SETLE:
@@ -1172,7 +1210,7 @@ static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
isLegalArithImmed(C + 1ULL))) {
CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE;
C = (VT == MVT::i32) ? (uint32_t)(C + 1) : C + 1;
- RHS = DAG.getConstant(C, VT);
+ RHS = DAG.getConstant(C, dl, VT);
}
break;
case ISD::SETULE:
@@ -1183,7 +1221,7 @@ static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
isLegalArithImmed(C + 1ULL))) {
CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
C = (VT == MVT::i32) ? (uint32_t)(C + 1) : C + 1;
- RHS = DAG.getConstant(C, VT);
+ RHS = DAG.getConstant(C, dl, VT);
}
break;
}
@@ -1217,10 +1255,11 @@ static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, LHS.getValueType(), LHS,
DAG.getValueType(MVT::i16));
Cmp = emitComparison(SExt,
- DAG.getConstant(ValueofRHS, RHS.getValueType()),
+ DAG.getConstant(ValueofRHS, dl,
+ RHS.getValueType()),
CC, dl, DAG);
AArch64CC = changeIntCCToAArch64CC(CC);
- AArch64cc = DAG.getConstant(AArch64CC, MVT::i32);
+ AArch64cc = DAG.getConstant(AArch64CC, dl, MVT::i32);
return Cmp;
}
}
@@ -1228,7 +1267,7 @@ static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
}
Cmp = emitComparison(LHS, RHS, CC, dl, DAG);
AArch64CC = changeIntCCToAArch64CC(CC);
- AArch64cc = DAG.getConstant(AArch64CC, MVT::i32);
+ AArch64cc = DAG.getConstant(AArch64CC, dl, MVT::i32);
return Cmp;
}
@@ -1264,7 +1303,7 @@ getAArch64XALUOOp(AArch64CC::CondCode &CC, SDValue Op, SelectionDAG &DAG) {
case ISD::SMULO:
case ISD::UMULO: {
CC = AArch64CC::NE;
- bool IsSigned = (Op.getOpcode() == ISD::SMULO) ? true : false;
+ bool IsSigned = Op.getOpcode() == ISD::SMULO;
if (Op.getValueType() == MVT::i32) {
unsigned ExtendOpc = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
// For a 32 bit multiply with overflow check we want the instruction
@@ -1275,7 +1314,7 @@ getAArch64XALUOOp(AArch64CC::CondCode &CC, SDValue Op, SelectionDAG &DAG) {
RHS = DAG.getNode(ExtendOpc, DL, MVT::i64, RHS);
SDValue Mul = DAG.getNode(ISD::MUL, DL, MVT::i64, LHS, RHS);
SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Mul,
- DAG.getConstant(0, MVT::i64));
+ DAG.getConstant(0, DL, MVT::i64));
// On AArch64 the upper 32 bits are always zero extended for a 32 bit
// operation. We need to clear out the upper 32 bits, because we used a
// widening multiply that wrote all 64 bits. In the end this should be a
@@ -1288,10 +1327,10 @@ getAArch64XALUOOp(AArch64CC::CondCode &CC, SDValue Op, SelectionDAG &DAG) {
// check we have to arithmetic shift right the 32nd bit of the result by
// 31 bits. Then we compare the result to the upper 32 bits.
SDValue UpperBits = DAG.getNode(ISD::SRL, DL, MVT::i64, Add,
- DAG.getConstant(32, MVT::i64));
+ DAG.getConstant(32, DL, MVT::i64));
UpperBits = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, UpperBits);
SDValue LowerBits = DAG.getNode(ISD::SRA, DL, MVT::i32, Value,
- DAG.getConstant(31, MVT::i64));
+ DAG.getConstant(31, DL, MVT::i64));
// It is important that LowerBits is last, otherwise the arithmetic
// shift will not be folded into the compare (SUBS).
SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32);
@@ -1304,10 +1343,11 @@ getAArch64XALUOOp(AArch64CC::CondCode &CC, SDValue Op, SelectionDAG &DAG) {
// pattern:
// (i64 AArch64ISD::SUBS i64 0, (i64 srl i64 %Mul, i64 32)
SDValue UpperBits = DAG.getNode(ISD::SRL, DL, MVT::i64, Mul,
- DAG.getConstant(32, MVT::i64));
+ DAG.getConstant(32, DL, MVT::i64));
SDVTList VTs = DAG.getVTList(MVT::i64, MVT::i32);
Overflow =
- DAG.getNode(AArch64ISD::SUBS, DL, VTs, DAG.getConstant(0, MVT::i64),
+ DAG.getNode(AArch64ISD::SUBS, DL, VTs,
+ DAG.getConstant(0, DL, MVT::i64),
UpperBits).getValue(1);
}
break;
@@ -1318,7 +1358,7 @@ getAArch64XALUOOp(AArch64CC::CondCode &CC, SDValue Op, SelectionDAG &DAG) {
if (IsSigned) {
SDValue UpperBits = DAG.getNode(ISD::MULHS, DL, MVT::i64, LHS, RHS);
SDValue LowerBits = DAG.getNode(ISD::SRA, DL, MVT::i64, Value,
- DAG.getConstant(63, MVT::i64));
+ DAG.getConstant(63, DL, MVT::i64));
// It is important that LowerBits is last, otherwise the arithmetic
// shift will not be folded into the compare (SUBS).
SDVTList VTs = DAG.getVTList(MVT::i64, MVT::i32);
@@ -1328,7 +1368,8 @@ getAArch64XALUOOp(AArch64CC::CondCode &CC, SDValue Op, SelectionDAG &DAG) {
SDValue UpperBits = DAG.getNode(ISD::MULHU, DL, MVT::i64, LHS, RHS);
SDVTList VTs = DAG.getVTList(MVT::i64, MVT::i32);
Overflow =
- DAG.getNode(AArch64ISD::SUBS, DL, VTs, DAG.getConstant(0, MVT::i64),
+ DAG.getNode(AArch64ISD::SUBS, DL, VTs,
+ DAG.getConstant(0, DL, MVT::i64),
UpperBits).getValue(1);
}
break;
@@ -1347,10 +1388,7 @@ getAArch64XALUOOp(AArch64CC::CondCode &CC, SDValue Op, SelectionDAG &DAG) {
SDValue AArch64TargetLowering::LowerF128Call(SDValue Op, SelectionDAG &DAG,
RTLIB::Libcall Call) const {
- SmallVector<SDValue, 2> Ops;
- for (unsigned i = 0, e = Op->getNumOperands(); i != e; ++i)
- Ops.push_back(Op.getOperand(i));
-
+ SmallVector<SDValue, 2> Ops(Op->op_begin(), Op->op_end());
return makeLibCall(DAG, Call, MVT::f128, &Ops[0], Ops.size(), false,
SDLoc(Op)).first;
}
@@ -1405,7 +1443,7 @@ static SDValue LowerXOR(SDValue Op, SelectionDAG &DAG) {
FVal = Other;
TVal = DAG.getNode(ISD::XOR, dl, Other.getValueType(), Other,
- DAG.getConstant(-1ULL, Other.getValueType()));
+ DAG.getConstant(-1ULL, dl, Other.getValueType()));
return DAG.getNode(AArch64ISD::CSEL, dl, Sel.getValueType(), FVal, TVal,
CCVal, Cmp);
@@ -1455,24 +1493,25 @@ static SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) {
if (!DAG.getTargetLoweringInfo().isTypeLegal(Op.getValueType()))
return SDValue();
+ SDLoc dl(Op);
AArch64CC::CondCode CC;
// The actual operation that sets the overflow or carry flag.
SDValue Value, Overflow;
std::tie(Value, Overflow) = getAArch64XALUOOp(CC, Op, DAG);
// We use 0 and 1 as false and true values.
- SDValue TVal = DAG.getConstant(1, MVT::i32);
- SDValue FVal = DAG.getConstant(0, MVT::i32);
+ SDValue TVal = DAG.getConstant(1, dl, MVT::i32);
+ SDValue FVal = DAG.getConstant(0, dl, MVT::i32);
// We use an inverted condition, because the conditional select is inverted
// too. This will allow it to be selected to a single instruction:
// CSINC Wd, WZR, WZR, invert(cond).
- SDValue CCVal = DAG.getConstant(getInvertedCondCode(CC), MVT::i32);
- Overflow = DAG.getNode(AArch64ISD::CSEL, SDLoc(Op), MVT::i32, FVal, TVal,
+ SDValue CCVal = DAG.getConstant(getInvertedCondCode(CC), dl, MVT::i32);
+ Overflow = DAG.getNode(AArch64ISD::CSEL, dl, MVT::i32, FVal, TVal,
CCVal, Overflow);
SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
- return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op), VTs, Value, Overflow);
+ return DAG.getNode(ISD::MERGE_VALUES, dl, VTs, Value, Overflow);
}
// Prefetch operands are:
@@ -1503,7 +1542,7 @@ static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG) {
(Locality << 1) | // Cache level bits
(unsigned)IsStream; // Stream bit
return DAG.getNode(AArch64ISD::PREFETCH, DL, MVT::Other, Op.getOperand(0),
- DAG.getConstant(PrfOp, MVT::i32), Op.getOperand(1));
+ DAG.getConstant(PrfOp, DL, MVT::i32), Op.getOperand(1));
}
SDValue AArch64TargetLowering::LowerFP_EXTEND(SDValue Op,
@@ -1567,6 +1606,14 @@ SDValue AArch64TargetLowering::LowerFP_TO_INT(SDValue Op,
if (Op.getOperand(0).getValueType().isVector())
return LowerVectorFP_TO_INT(Op, DAG);
+ // f16 conversions are promoted to f32.
+ if (Op.getOperand(0).getValueType() == MVT::f16) {
+ SDLoc dl(Op);
+ return DAG.getNode(
+ Op.getOpcode(), dl, Op.getValueType(),
+ DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, Op.getOperand(0)));
+ }
+
if (Op.getOperand(0).getValueType() != MVT::f128) {
// It's legal except when f128 is involved
return Op;
@@ -1578,10 +1625,7 @@ SDValue AArch64TargetLowering::LowerFP_TO_INT(SDValue Op,
else
LC = RTLIB::getFPTOUINT(Op.getOperand(0).getValueType(), Op.getValueType());
- SmallVector<SDValue, 2> Ops;
- for (unsigned i = 0, e = Op->getNumOperands(); i != e; ++i)
- Ops.push_back(Op.getOperand(i));
-
+ SmallVector<SDValue, 2> Ops(Op->op_begin(), Op->op_end());
return makeLibCall(DAG, LC, Op.getValueType(), &Ops[0], Ops.size(), false,
SDLoc(Op)).first;
}
@@ -1600,7 +1644,7 @@ static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
MVT::getVectorVT(MVT::getFloatingPointVT(InVT.getScalarSizeInBits()),
InVT.getVectorNumElements());
In = DAG.getNode(Op.getOpcode(), dl, CastVT, In);
- return DAG.getNode(ISD::FP_ROUND, dl, VT, In, DAG.getIntPtrConstant(0));
+ return DAG.getNode(ISD::FP_ROUND, dl, VT, In, DAG.getIntPtrConstant(0, dl));
}
if (VT.getSizeInBits() > InVT.getSizeInBits()) {
@@ -1619,6 +1663,15 @@ SDValue AArch64TargetLowering::LowerINT_TO_FP(SDValue Op,
if (Op.getValueType().isVector())
return LowerVectorINT_TO_FP(Op, DAG);
+ // f16 conversions are promoted to f32.
+ if (Op.getValueType() == MVT::f16) {
+ SDLoc dl(Op);
+ return DAG.getNode(
+ ISD::FP_ROUND, dl, MVT::f16,
+ DAG.getNode(Op.getOpcode(), dl, MVT::f32, Op.getOperand(0)),
+ DAG.getIntPtrConstant(0, dl));
+ }
+
// i128 conversions are libcalls.
if (Op.getOperand(0).getValueType() == MVT::i128)
return SDValue();
@@ -1679,7 +1732,7 @@ static SDValue LowerBITCAST(SDValue Op, SelectionDAG &DAG) {
Op = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Op);
return SDValue(
DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, MVT::f16, Op,
- DAG.getTargetConstant(AArch64::hsub, MVT::i32)),
+ DAG.getTargetConstant(AArch64::hsub, DL, MVT::i32)),
0);
}
@@ -1753,6 +1806,7 @@ static SDValue skipExtensionForVectorMULL(SDNode *N, SelectionDAG &DAG) {
assert(N->getOpcode() == ISD::BUILD_VECTOR && "expected BUILD_VECTOR");
EVT VT = N->getValueType(0);
+ SDLoc dl(N);
unsigned EltSize = VT.getVectorElementType().getSizeInBits() / 2;
unsigned NumElts = VT.getVectorNumElements();
MVT TruncVT = MVT::getIntegerVT(EltSize);
@@ -1762,9 +1816,9 @@ static SDValue skipExtensionForVectorMULL(SDNode *N, SelectionDAG &DAG) {
const APInt &CInt = C->getAPIntValue();
// Element types smaller than 32 bits are not legal, so use i32 elements.
// The values are implicitly truncated so sext vs. zext doesn't matter.
- Ops.push_back(DAG.getConstant(CInt.zextOrTrunc(32), MVT::i32));
+ Ops.push_back(DAG.getConstant(CInt.zextOrTrunc(32), dl, MVT::i32));
}
- return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N),
+ return DAG.getNode(ISD::BUILD_VECTOR, dl,
MVT::getVectorVT(TruncVT, NumElts), Ops);
}
@@ -2219,8 +2273,7 @@ void AArch64TargetLowering::saveVarArgRegisters(CCState &CCInfo,
AArch64::X3, AArch64::X4, AArch64::X5,
AArch64::X6, AArch64::X7 };
static const unsigned NumGPRArgRegs = array_lengthof(GPRArgRegs);
- unsigned FirstVariadicGPR =
- CCInfo.getFirstUnallocated(GPRArgRegs, NumGPRArgRegs);
+ unsigned FirstVariadicGPR = CCInfo.getFirstUnallocated(GPRArgRegs);
unsigned GPRSaveSize = 8 * (NumGPRArgRegs - FirstVariadicGPR);
int GPRIdx = 0;
@@ -2237,7 +2290,7 @@ void AArch64TargetLowering::saveVarArgRegisters(CCState &CCInfo,
MachinePointerInfo::getStack(i * 8), false, false, 0);
MemOps.push_back(Store);
FIN = DAG.getNode(ISD::ADD, DL, getPointerTy(), FIN,
- DAG.getConstant(8, getPointerTy()));
+ DAG.getConstant(8, DL, getPointerTy()));
}
}
FuncInfo->setVarArgsGPRIndex(GPRIdx);
@@ -2248,8 +2301,7 @@ void AArch64TargetLowering::saveVarArgRegisters(CCState &CCInfo,
AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3,
AArch64::Q4, AArch64::Q5, AArch64::Q6, AArch64::Q7};
static const unsigned NumFPRArgRegs = array_lengthof(FPRArgRegs);
- unsigned FirstVariadicFPR =
- CCInfo.getFirstUnallocated(FPRArgRegs, NumFPRArgRegs);
+ unsigned FirstVariadicFPR = CCInfo.getFirstUnallocated(FPRArgRegs);
unsigned FPRSaveSize = 16 * (NumFPRArgRegs - FirstVariadicFPR);
int FPRIdx = 0;
@@ -2267,7 +2319,7 @@ void AArch64TargetLowering::saveVarArgRegisters(CCState &CCInfo,
MachinePointerInfo::getStack(i * 16), false, false, 0);
MemOps.push_back(Store);
FIN = DAG.getNode(ISD::ADD, DL, getPointerTy(), FIN,
- DAG.getConstant(16, getPointerTy()));
+ DAG.getConstant(16, DL, getPointerTy()));
}
}
FuncInfo->setVarArgsFPRIndex(FPRIdx);
@@ -2619,8 +2671,9 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
// Adjust the stack pointer for the new arguments...
// These operations are automatically eliminated by the prolog/epilog pass
if (!IsSibCall)
- Chain =
- DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true), DL);
+ Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, DL,
+ true),
+ DL);
SDValue StackPtr = DAG.getCopyFromReg(Chain, DL, AArch64::SP, getPointerTy());
@@ -2690,7 +2743,7 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
}
unsigned LocMemOffset = VA.getLocMemOffset();
int32_t Offset = LocMemOffset + BEAlign;
- SDValue PtrOff = DAG.getIntPtrConstant(Offset);
+ SDValue PtrOff = DAG.getIntPtrConstant(Offset, DL);
PtrOff = DAG.getNode(ISD::ADD, DL, getPointerTy(), StackPtr, PtrOff);
if (IsTailCall) {
@@ -2705,7 +2758,7 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
// clobbered.
Chain = addTokenForArgument(Chain, DAG, MF.getFrameInfo(), FI);
} else {
- SDValue PtrOff = DAG.getIntPtrConstant(Offset);
+ SDValue PtrOff = DAG.getIntPtrConstant(Offset, DL);
DstAddr = DAG.getNode(ISD::ADD, DL, getPointerTy(), StackPtr, PtrOff);
DstInfo = MachinePointerInfo::getStack(LocMemOffset);
@@ -2713,11 +2766,12 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
if (Outs[i].Flags.isByVal()) {
SDValue SizeNode =
- DAG.getConstant(Outs[i].Flags.getByValSize(), MVT::i64);
+ DAG.getConstant(Outs[i].Flags.getByValSize(), DL, MVT::i64);
SDValue Cpy = DAG.getMemcpy(
Chain, DL, DstAddr, Arg, SizeNode, Outs[i].Flags.getByValAlign(),
- /*isVol = */ false,
- /*AlwaysInline = */ false, DstInfo, MachinePointerInfo());
+ /*isVol = */ false, /*AlwaysInline = */ false,
+ /*isTailCall = */ false,
+ DstInfo, MachinePointerInfo());
MemOpChains.push_back(Cpy);
} else {
@@ -2782,8 +2836,8 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
// we've carefully laid out the parameters so that when sp is reset they'll be
// in the correct location.
if (IsTailCall && !IsSibCall) {
- Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
- DAG.getIntPtrConstant(0, true), InFlag, DL);
+ Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, DL, true),
+ DAG.getIntPtrConstant(0, DL, true), InFlag, DL);
InFlag = Chain.getValue(1);
}
@@ -2795,7 +2849,7 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
// Each tail call may have to adjust the stack by a different amount, so
// this information must travel along with the operation for eventual
// consumption by emitEpilogue.
- Ops.push_back(DAG.getTargetConstant(FPDiff, MVT::i32));
+ Ops.push_back(DAG.getTargetConstant(FPDiff, DL, MVT::i32));
}
// Add argument registers to the end of the list so that they are known live
@@ -2806,19 +2860,16 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
// Add a register mask operand representing the call-preserved registers.
const uint32_t *Mask;
- const TargetRegisterInfo *TRI =
- getTargetMachine().getSubtargetImpl()->getRegisterInfo();
- const AArch64RegisterInfo *ARI =
- static_cast<const AArch64RegisterInfo *>(TRI);
+ const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
if (IsThisReturn) {
// For 'this' returns, use the X0-preserving mask if applicable
- Mask = ARI->getThisReturnPreservedMask(CallConv);
+ Mask = TRI->getThisReturnPreservedMask(MF, CallConv);
if (!Mask) {
IsThisReturn = false;
- Mask = ARI->getCallPreservedMask(CallConv);
+ Mask = TRI->getCallPreservedMask(MF, CallConv);
}
} else
- Mask = ARI->getCallPreservedMask(CallConv);
+ Mask = TRI->getCallPreservedMask(MF, CallConv);
assert(Mask && "Missing call preserved mask for calling convention");
Ops.push_back(DAG.getRegisterMask(Mask));
@@ -2830,8 +2881,10 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
// If we're doing a tall call, use a TC_RETURN here rather than an
// actual call instruction.
- if (IsTailCall)
+ if (IsTailCall) {
+ MF.getFrameInfo()->setHasTailCall();
return DAG.getNode(AArch64ISD::TC_RETURN, DL, NodeTys, Ops);
+ }
// Returns a chain and a flag for retval copy to use.
Chain = DAG.getNode(AArch64ISD::CALL, DL, NodeTys, Ops);
@@ -2841,8 +2894,8 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
? RoundUpToAlignment(NumBytes, 16)
: 0;
- Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
- DAG.getIntPtrConstant(CalleePopBytes, true),
+ Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, DL, true),
+ DAG.getIntPtrConstant(CalleePopBytes, DL, true),
InFlag, DL);
if (!Ins.empty())
InFlag = Chain.getValue(1);
@@ -2958,7 +3011,7 @@ SDValue AArch64TargetLowering::LowerGlobalAddress(SDValue Op,
/*isInvariant=*/ true, 8);
if (GN->getOffset() != 0)
return DAG.getNode(ISD::ADD, DL, PtrVT, GlobalAddr,
- DAG.getConstant(GN->getOffset(), PtrVT));
+ DAG.getConstant(GN->getOffset(), DL, PtrVT));
return GlobalAddr;
}
@@ -3038,11 +3091,8 @@ AArch64TargetLowering::LowerDarwinGlobalTLSAddress(SDValue Op,
// TLS calls preserve all registers except those that absolutely must be
// trashed: X0 (it takes an argument), LR (it's a call) and NZCV (let's not be
// silly).
- const TargetRegisterInfo *TRI =
- getTargetMachine().getSubtargetImpl()->getRegisterInfo();
- const AArch64RegisterInfo *ARI =
- static_cast<const AArch64RegisterInfo *>(TRI);
- const uint32_t *Mask = ARI->getTLSCallPreservedMask();
+ const uint32_t *Mask =
+ Subtarget->getRegisterInfo()->getTLSCallPreservedMask();
// Finally, we can make the call. This is just a degenerate version of a
// normal AArch64 call node: x0 takes the address of the descriptor, and
@@ -3125,11 +3175,13 @@ AArch64TargetLowering::LowerELFGlobalTLSAddress(SDValue Op,
SDValue TPWithOff_lo =
SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, ThreadBase,
- HiVar, DAG.getTargetConstant(0, MVT::i32)),
+ HiVar,
+ DAG.getTargetConstant(0, DL, MVT::i32)),
0);
SDValue TPWithOff =
SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, TPWithOff_lo,
- LoVar, DAG.getTargetConstant(0, MVT::i32)),
+ LoVar,
+ DAG.getTargetConstant(0, DL, MVT::i32)),
0);
return TPWithOff;
} else if (Model == TLSModel::InitialExec) {
@@ -3165,10 +3217,10 @@ AArch64TargetLowering::LowerELFGlobalTLSAddress(SDValue Op,
AArch64II::MO_TLS | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
TPOff = SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, TPOff, HiVar,
- DAG.getTargetConstant(0, MVT::i32)),
+ DAG.getTargetConstant(0, DL, MVT::i32)),
0);
TPOff = SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, TPOff, LoVar,
- DAG.getTargetConstant(0, MVT::i32)),
+ DAG.getTargetConstant(0, DL, MVT::i32)),
0);
} else if (Model == TLSModel::GeneralDynamic) {
// The call needs a relocation too for linker relaxation. It doesn't make
@@ -3211,7 +3263,7 @@ SDValue AArch64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
// If softenSetCCOperands returned a scalar, we need to compare the result
// against zero to select between true and false values.
if (!RHS.getNode()) {
- RHS = DAG.getConstant(0, LHS.getValueType());
+ RHS = DAG.getConstant(0, dl, LHS.getValueType());
CC = ISD::SETNE;
}
}
@@ -3236,10 +3288,10 @@ SDValue AArch64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
if (CC == ISD::SETNE)
OFCC = getInvertedCondCode(OFCC);
- SDValue CCVal = DAG.getConstant(OFCC, MVT::i32);
+ SDValue CCVal = DAG.getConstant(OFCC, dl, MVT::i32);
- return DAG.getNode(AArch64ISD::BRCOND, SDLoc(LHS), MVT::Other, Chain, Dest,
- CCVal, Overflow);
+ return DAG.getNode(AArch64ISD::BRCOND, dl, MVT::Other, Chain, Dest, CCVal,
+ Overflow);
}
if (LHS.getValueType().isInteger()) {
@@ -3261,7 +3313,8 @@ SDValue AArch64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
SDValue Test = LHS.getOperand(0);
uint64_t Mask = LHS.getConstantOperandVal(1);
return DAG.getNode(AArch64ISD::TBZ, dl, MVT::Other, Chain, Test,
- DAG.getConstant(Log2_64(Mask), MVT::i64), Dest);
+ DAG.getConstant(Log2_64(Mask), dl, MVT::i64),
+ Dest);
}
return DAG.getNode(AArch64ISD::CBZ, dl, MVT::Other, Chain, LHS, Dest);
@@ -3276,7 +3329,8 @@ SDValue AArch64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
SDValue Test = LHS.getOperand(0);
uint64_t Mask = LHS.getConstantOperandVal(1);
return DAG.getNode(AArch64ISD::TBNZ, dl, MVT::Other, Chain, Test,
- DAG.getConstant(Log2_64(Mask), MVT::i64), Dest);
+ DAG.getConstant(Log2_64(Mask), dl, MVT::i64),
+ Dest);
}
return DAG.getNode(AArch64ISD::CBNZ, dl, MVT::Other, Chain, LHS, Dest);
@@ -3286,7 +3340,7 @@ SDValue AArch64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
// becomes redundant. This would also increase register pressure.
uint64_t Mask = LHS.getValueType().getSizeInBits() - 1;
return DAG.getNode(AArch64ISD::TBNZ, dl, MVT::Other, Chain, LHS,
- DAG.getConstant(Mask, MVT::i64), Dest);
+ DAG.getConstant(Mask, dl, MVT::i64), Dest);
}
}
if (RHSC && RHSC->getSExtValue() == -1 && CC == ISD::SETGT &&
@@ -3296,7 +3350,7 @@ SDValue AArch64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
// becomes redundant. This would also increase register pressure.
uint64_t Mask = LHS.getValueType().getSizeInBits() - 1;
return DAG.getNode(AArch64ISD::TBZ, dl, MVT::Other, Chain, LHS,
- DAG.getConstant(Mask, MVT::i64), Dest);
+ DAG.getConstant(Mask, dl, MVT::i64), Dest);
}
SDValue CCVal;
@@ -3312,11 +3366,11 @@ SDValue AArch64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
SDValue Cmp = emitComparison(LHS, RHS, CC, dl, DAG);
AArch64CC::CondCode CC1, CC2;
changeFPCCToAArch64CC(CC, CC1, CC2);
- SDValue CC1Val = DAG.getConstant(CC1, MVT::i32);
+ SDValue CC1Val = DAG.getConstant(CC1, dl, MVT::i32);
SDValue BR1 =
DAG.getNode(AArch64ISD::BRCOND, dl, MVT::Other, Chain, Dest, CC1Val, Cmp);
if (CC2 != AArch64CC::AL) {
- SDValue CC2Val = DAG.getConstant(CC2, MVT::i32);
+ SDValue CC2Val = DAG.getConstant(CC2, dl, MVT::i32);
return DAG.getNode(AArch64ISD::BRCOND, dl, MVT::Other, BR1, Dest, CC2Val,
Cmp);
}
@@ -3336,7 +3390,8 @@ SDValue AArch64TargetLowering::LowerFCOPYSIGN(SDValue Op,
if (SrcVT == MVT::f32 && VT == MVT::f64)
In2 = DAG.getNode(ISD::FP_EXTEND, DL, VT, In2);
else if (SrcVT == MVT::f64 && VT == MVT::f32)
- In2 = DAG.getNode(ISD::FP_ROUND, DL, VT, In2, DAG.getIntPtrConstant(0));
+ In2 = DAG.getNode(ISD::FP_ROUND, DL, VT, In2,
+ DAG.getIntPtrConstant(0, DL));
else
// FIXME: Src type is different, bail out for now. Can VT really be a
// vector type?
@@ -3345,11 +3400,12 @@ SDValue AArch64TargetLowering::LowerFCOPYSIGN(SDValue Op,
EVT VecVT;
EVT EltVT;
- SDValue EltMask, VecVal1, VecVal2;
+ uint64_t EltMask;
+ SDValue VecVal1, VecVal2;
if (VT == MVT::f32 || VT == MVT::v2f32 || VT == MVT::v4f32) {
EltVT = MVT::i32;
VecVT = MVT::v4i32;
- EltMask = DAG.getConstant(0x80000000ULL, EltVT);
+ EltMask = 0x80000000ULL;
if (!VT.isVector()) {
VecVal1 = DAG.getTargetInsertSubreg(AArch64::ssub, DL, VecVT,
@@ -3367,7 +3423,7 @@ SDValue AArch64TargetLowering::LowerFCOPYSIGN(SDValue Op,
// We want to materialize a mask with the the high bit set, but the AdvSIMD
// immediate moves cannot materialize that in a single instruction for
// 64-bit elements. Instead, materialize zero and then negate it.
- EltMask = DAG.getConstant(0, EltVT);
+ EltMask = 0;
if (!VT.isVector()) {
VecVal1 = DAG.getTargetInsertSubreg(AArch64::dsub, DL, VecVT,
@@ -3382,11 +3438,7 @@ SDValue AArch64TargetLowering::LowerFCOPYSIGN(SDValue Op,
llvm_unreachable("Invalid type for copysign!");
}
- std::vector<SDValue> BuildVectorOps;
- for (unsigned i = 0; i < VecVT.getVectorNumElements(); ++i)
- BuildVectorOps.push_back(EltMask);
-
- SDValue BuildVec = DAG.getNode(ISD::BUILD_VECTOR, DL, VecVT, BuildVectorOps);
+ SDValue BuildVec = DAG.getConstant(EltMask, DL, VecVT);
// If we couldn't materialize the mask above, then the mask vector will be
// the zero vector, and we need to negate it here.
@@ -3408,8 +3460,8 @@ SDValue AArch64TargetLowering::LowerFCOPYSIGN(SDValue Op,
}
SDValue AArch64TargetLowering::LowerCTPOP(SDValue Op, SelectionDAG &DAG) const {
- if (DAG.getMachineFunction().getFunction()->getAttributes().hasAttribute(
- AttributeSet::FunctionIndex, Attribute::NoImplicitFloat))
+ if (DAG.getMachineFunction().getFunction()->hasFnAttribute(
+ Attribute::NoImplicitFloat))
return SDValue();
if (!Subtarget->hasNEON())
@@ -3426,21 +3478,15 @@ SDValue AArch64TargetLowering::LowerCTPOP(SDValue Op, SelectionDAG &DAG) const {
SDValue Val = Op.getOperand(0);
SDLoc DL(Op);
EVT VT = Op.getValueType();
- SDValue ZeroVec = DAG.getUNDEF(MVT::v8i8);
- SDValue VecVal;
- if (VT == MVT::i32) {
- VecVal = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Val);
- VecVal = DAG.getTargetInsertSubreg(AArch64::ssub, DL, MVT::v8i8, ZeroVec,
- VecVal);
- } else {
- VecVal = DAG.getNode(ISD::BITCAST, DL, MVT::v8i8, Val);
- }
+ if (VT == MVT::i32)
+ Val = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Val);
+ Val = DAG.getNode(ISD::BITCAST, DL, MVT::v8i8, Val);
- SDValue CtPop = DAG.getNode(ISD::CTPOP, DL, MVT::v8i8, VecVal);
+ SDValue CtPop = DAG.getNode(ISD::CTPOP, DL, MVT::v8i8, Val);
SDValue UaddLV = DAG.getNode(
ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32,
- DAG.getConstant(Intrinsic::aarch64_neon_uaddlv, MVT::i32), CtPop);
+ DAG.getConstant(Intrinsic::aarch64_neon_uaddlv, DL, MVT::i32), CtPop);
if (VT == MVT::i64)
UaddLV = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, UaddLV);
@@ -3459,8 +3505,8 @@ SDValue AArch64TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
// We chose ZeroOrOneBooleanContents, so use zero and one.
EVT VT = Op.getValueType();
- SDValue TVal = DAG.getConstant(1, VT);
- SDValue FVal = DAG.getConstant(0, VT);
+ SDValue TVal = DAG.getConstant(1, dl, VT);
+ SDValue FVal = DAG.getConstant(0, dl, VT);
// Handle f128 first, since one possible outcome is a normal integer
// comparison which gets picked up by the next if statement.
@@ -3497,7 +3543,7 @@ SDValue AArch64TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
changeFPCCToAArch64CC(CC, CC1, CC2);
if (CC2 == AArch64CC::AL) {
changeFPCCToAArch64CC(ISD::getSetCCInverse(CC, false), CC1, CC2);
- SDValue CC1Val = DAG.getConstant(CC1, MVT::i32);
+ SDValue CC1Val = DAG.getConstant(CC1, dl, MVT::i32);
// Note that we inverted the condition above, so we reverse the order of
// the true and false operands here. This will allow the setcc to be
@@ -3510,11 +3556,11 @@ SDValue AArch64TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
// of the first as the RHS. We're effectively OR'ing the two CC's together.
// FIXME: It would be nice if we could match the two CSELs to two CSINCs.
- SDValue CC1Val = DAG.getConstant(CC1, MVT::i32);
+ SDValue CC1Val = DAG.getConstant(CC1, dl, MVT::i32);
SDValue CS1 =
DAG.getNode(AArch64ISD::CSEL, dl, VT, TVal, FVal, CC1Val, Cmp);
- SDValue CC2Val = DAG.getConstant(CC2, MVT::i32);
+ SDValue CC2Val = DAG.getConstant(CC2, dl, MVT::i32);
return DAG.getNode(AArch64ISD::CSEL, dl, VT, TVal, CS1, CC2Val, Cmp);
}
}
@@ -3529,7 +3575,8 @@ SDValue AArch64TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
/// operations would *not* be semantically equivalent.
static bool selectCCOpsAreFMaxCompatible(SDValue Cmp, SDValue Result) {
if (Cmp == Result)
- return true;
+ return (Cmp.getValueType() == MVT::f32 ||
+ Cmp.getValueType() == MVT::f64);
ConstantFPSDNode *CCmp = dyn_cast<ConstantFPSDNode>(Cmp);
ConstantFPSDNode *CResult = dyn_cast<ConstantFPSDNode>(Result);
@@ -3544,49 +3591,10 @@ static bool selectCCOpsAreFMaxCompatible(SDValue Cmp, SDValue Result) {
return Result->getOpcode() == ISD::FP_EXTEND && Result->getOperand(0) == Cmp;
}
-SDValue AArch64TargetLowering::LowerSELECT(SDValue Op,
- SelectionDAG &DAG) const {
- SDValue CC = Op->getOperand(0);
- SDValue TVal = Op->getOperand(1);
- SDValue FVal = Op->getOperand(2);
- SDLoc DL(Op);
-
- unsigned Opc = CC.getOpcode();
- // Optimize {s|u}{add|sub|mul}.with.overflow feeding into a select
- // instruction.
- if (CC.getResNo() == 1 &&
- (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO ||
- Opc == ISD::USUBO || Opc == ISD::SMULO || Opc == ISD::UMULO)) {
- // Only lower legal XALUO ops.
- if (!DAG.getTargetLoweringInfo().isTypeLegal(CC->getValueType(0)))
- return SDValue();
-
- AArch64CC::CondCode OFCC;
- SDValue Value, Overflow;
- std::tie(Value, Overflow) = getAArch64XALUOOp(OFCC, CC.getValue(0), DAG);
- SDValue CCVal = DAG.getConstant(OFCC, MVT::i32);
-
- return DAG.getNode(AArch64ISD::CSEL, DL, Op.getValueType(), TVal, FVal,
- CCVal, Overflow);
- }
-
- if (CC.getOpcode() == ISD::SETCC)
- return DAG.getSelectCC(DL, CC.getOperand(0), CC.getOperand(1), TVal, FVal,
- cast<CondCodeSDNode>(CC.getOperand(2))->get());
- else
- return DAG.getSelectCC(DL, CC, DAG.getConstant(0, CC.getValueType()), TVal,
- FVal, ISD::SETNE);
-}
-
-SDValue AArch64TargetLowering::LowerSELECT_CC(SDValue Op,
+SDValue AArch64TargetLowering::LowerSELECT_CC(ISD::CondCode CC, SDValue LHS,
+ SDValue RHS, SDValue TVal,
+ SDValue FVal, SDLoc dl,
SelectionDAG &DAG) const {
- ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
- SDValue LHS = Op.getOperand(0);
- SDValue RHS = Op.getOperand(1);
- SDValue TVal = Op.getOperand(2);
- SDValue FVal = Op.getOperand(3);
- SDLoc dl(Op);
-
// Handle f128 first, because it will result in a comparison of some RTLIB
// call result against zero.
if (LHS.getValueType() == MVT::f128) {
@@ -3595,7 +3603,7 @@ SDValue AArch64TargetLowering::LowerSELECT_CC(SDValue Op,
// If softenSetCCOperands returned a scalar, we need to compare the result
// against zero to select between true and false values.
if (!RHS.getNode()) {
- RHS = DAG.getConstant(0, LHS.getValueType());
+ RHS = DAG.getConstant(0, dl, LHS.getValueType());
CC = ISD::SETNE;
}
}
@@ -3694,67 +3702,27 @@ SDValue AArch64TargetLowering::LowerSELECT_CC(SDValue Op,
SDValue CCVal;
SDValue Cmp = getAArch64Cmp(LHS, RHS, CC, CCVal, DAG, dl);
- EVT VT = Op.getValueType();
+ EVT VT = TVal.getValueType();
return DAG.getNode(Opcode, dl, VT, TVal, FVal, CCVal, Cmp);
}
// Now we know we're dealing with FP values.
assert(LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64);
assert(LHS.getValueType() == RHS.getValueType());
- EVT VT = Op.getValueType();
-
- // Try to match this select into a max/min operation, which have dedicated
- // opcode in the instruction set.
- // FIXME: This is not correct in the presence of NaNs, so we only enable this
- // in no-NaNs mode.
- if (getTargetMachine().Options.NoNaNsFPMath) {
- SDValue MinMaxLHS = TVal, MinMaxRHS = FVal;
- if (selectCCOpsAreFMaxCompatible(LHS, MinMaxRHS) &&
- selectCCOpsAreFMaxCompatible(RHS, MinMaxLHS)) {
- CC = ISD::getSetCCSwappedOperands(CC);
- std::swap(MinMaxLHS, MinMaxRHS);
- }
-
- if (selectCCOpsAreFMaxCompatible(LHS, MinMaxLHS) &&
- selectCCOpsAreFMaxCompatible(RHS, MinMaxRHS)) {
- switch (CC) {
- default:
- break;
- case ISD::SETGT:
- case ISD::SETGE:
- case ISD::SETUGT:
- case ISD::SETUGE:
- case ISD::SETOGT:
- case ISD::SETOGE:
- return DAG.getNode(AArch64ISD::FMAX, dl, VT, MinMaxLHS, MinMaxRHS);
- break;
- case ISD::SETLT:
- case ISD::SETLE:
- case ISD::SETULT:
- case ISD::SETULE:
- case ISD::SETOLT:
- case ISD::SETOLE:
- return DAG.getNode(AArch64ISD::FMIN, dl, VT, MinMaxLHS, MinMaxRHS);
- break;
- }
- }
- }
-
- // If that fails, we'll need to perform an FCMP + CSEL sequence. Go ahead
- // and do the comparison.
+ EVT VT = TVal.getValueType();
SDValue Cmp = emitComparison(LHS, RHS, CC, dl, DAG);
// Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't totally
// clean. Some of them require two CSELs to implement.
AArch64CC::CondCode CC1, CC2;
changeFPCCToAArch64CC(CC, CC1, CC2);
- SDValue CC1Val = DAG.getConstant(CC1, MVT::i32);
+ SDValue CC1Val = DAG.getConstant(CC1, dl, MVT::i32);
SDValue CS1 = DAG.getNode(AArch64ISD::CSEL, dl, VT, TVal, FVal, CC1Val, Cmp);
// If we need a second CSEL, emit it, using the output of the first as the
// RHS. We're effectively OR'ing the two CC's together.
if (CC2 != AArch64CC::AL) {
- SDValue CC2Val = DAG.getConstant(CC2, MVT::i32);
+ SDValue CC2Val = DAG.getConstant(CC2, dl, MVT::i32);
return DAG.getNode(AArch64ISD::CSEL, dl, VT, TVal, CS1, CC2Val, Cmp);
}
@@ -3762,6 +3730,58 @@ SDValue AArch64TargetLowering::LowerSELECT_CC(SDValue Op,
return CS1;
}
+SDValue AArch64TargetLowering::LowerSELECT_CC(SDValue Op,
+ SelectionDAG &DAG) const {
+ ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+ SDValue TVal = Op.getOperand(2);
+ SDValue FVal = Op.getOperand(3);
+ SDLoc DL(Op);
+ return LowerSELECT_CC(CC, LHS, RHS, TVal, FVal, DL, DAG);
+}
+
+SDValue AArch64TargetLowering::LowerSELECT(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDValue CCVal = Op->getOperand(0);
+ SDValue TVal = Op->getOperand(1);
+ SDValue FVal = Op->getOperand(2);
+ SDLoc DL(Op);
+
+ unsigned Opc = CCVal.getOpcode();
+ // Optimize {s|u}{add|sub|mul}.with.overflow feeding into a select
+ // instruction.
+ if (CCVal.getResNo() == 1 &&
+ (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO ||
+ Opc == ISD::USUBO || Opc == ISD::SMULO || Opc == ISD::UMULO)) {
+ // Only lower legal XALUO ops.
+ if (!DAG.getTargetLoweringInfo().isTypeLegal(CCVal->getValueType(0)))
+ return SDValue();
+
+ AArch64CC::CondCode OFCC;
+ SDValue Value, Overflow;
+ std::tie(Value, Overflow) = getAArch64XALUOOp(OFCC, CCVal.getValue(0), DAG);
+ SDValue CCVal = DAG.getConstant(OFCC, DL, MVT::i32);
+
+ return DAG.getNode(AArch64ISD::CSEL, DL, Op.getValueType(), TVal, FVal,
+ CCVal, Overflow);
+ }
+
+ // Lower it the same way as we would lower a SELECT_CC node.
+ ISD::CondCode CC;
+ SDValue LHS, RHS;
+ if (CCVal.getOpcode() == ISD::SETCC) {
+ LHS = CCVal.getOperand(0);
+ RHS = CCVal.getOperand(1);
+ CC = cast<CondCodeSDNode>(CCVal->getOperand(2))->get();
+ } else {
+ LHS = CCVal;
+ RHS = DAG.getConstant(0, DL, CCVal.getValueType());
+ CC = ISD::SETNE;
+ }
+ return LowerSELECT_CC(CC, LHS, RHS, TVal, FVal, DL, DAG);
+}
+
SDValue AArch64TargetLowering::LowerJumpTable(SDValue Op,
SelectionDAG &DAG) const {
// Jump table entries as PC relative offsets. No additional tweaking
@@ -3892,11 +3912,11 @@ SDValue AArch64TargetLowering::LowerAAPCS_VASTART(SDValue Op,
SDValue GRTop, GRTopAddr;
GRTopAddr = DAG.getNode(ISD::ADD, DL, getPointerTy(), VAList,
- DAG.getConstant(8, getPointerTy()));
+ DAG.getConstant(8, DL, getPointerTy()));
GRTop = DAG.getFrameIndex(FuncInfo->getVarArgsGPRIndex(), getPointerTy());
GRTop = DAG.getNode(ISD::ADD, DL, getPointerTy(), GRTop,
- DAG.getConstant(GPRSize, getPointerTy()));
+ DAG.getConstant(GPRSize, DL, getPointerTy()));
MemOps.push_back(DAG.getStore(Chain, DL, GRTop, GRTopAddr,
MachinePointerInfo(SV, 8), false, false, 8));
@@ -3907,11 +3927,11 @@ SDValue AArch64TargetLowering::LowerAAPCS_VASTART(SDValue Op,
if (FPRSize > 0) {
SDValue VRTop, VRTopAddr;
VRTopAddr = DAG.getNode(ISD::ADD, DL, getPointerTy(), VAList,
- DAG.getConstant(16, getPointerTy()));
+ DAG.getConstant(16, DL, getPointerTy()));
VRTop = DAG.getFrameIndex(FuncInfo->getVarArgsFPRIndex(), getPointerTy());
VRTop = DAG.getNode(ISD::ADD, DL, getPointerTy(), VRTop,
- DAG.getConstant(FPRSize, getPointerTy()));
+ DAG.getConstant(FPRSize, DL, getPointerTy()));
MemOps.push_back(DAG.getStore(Chain, DL, VRTop, VRTopAddr,
MachinePointerInfo(SV, 16), false, false, 8));
@@ -3919,15 +3939,17 @@ SDValue AArch64TargetLowering::LowerAAPCS_VASTART(SDValue Op,
// int __gr_offs at offset 24
SDValue GROffsAddr = DAG.getNode(ISD::ADD, DL, getPointerTy(), VAList,
- DAG.getConstant(24, getPointerTy()));
- MemOps.push_back(DAG.getStore(Chain, DL, DAG.getConstant(-GPRSize, MVT::i32),
+ DAG.getConstant(24, DL, getPointerTy()));
+ MemOps.push_back(DAG.getStore(Chain, DL,
+ DAG.getConstant(-GPRSize, DL, MVT::i32),
GROffsAddr, MachinePointerInfo(SV, 24), false,
false, 4));
// int __vr_offs at offset 28
SDValue VROffsAddr = DAG.getNode(ISD::ADD, DL, getPointerTy(), VAList,
- DAG.getConstant(28, getPointerTy()));
- MemOps.push_back(DAG.getStore(Chain, DL, DAG.getConstant(-FPRSize, MVT::i32),
+ DAG.getConstant(28, DL, getPointerTy()));
+ MemOps.push_back(DAG.getStore(Chain, DL,
+ DAG.getConstant(-FPRSize, DL, MVT::i32),
VROffsAddr, MachinePointerInfo(SV, 28), false,
false, 4));
@@ -3944,13 +3966,15 @@ SDValue AArch64TargetLowering::LowerVACOPY(SDValue Op,
SelectionDAG &DAG) const {
// AAPCS has three pointers and two ints (= 32 bytes), Darwin has single
// pointer.
+ SDLoc DL(Op);
unsigned VaListSize = Subtarget->isTargetDarwin() ? 8 : 32;
const Value *DestSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue();
const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
- return DAG.getMemcpy(Op.getOperand(0), SDLoc(Op), Op.getOperand(1),
- Op.getOperand(2), DAG.getConstant(VaListSize, MVT::i32),
- 8, false, false, MachinePointerInfo(DestSV),
+ return DAG.getMemcpy(Op.getOperand(0), DL, Op.getOperand(1),
+ Op.getOperand(2),
+ DAG.getConstant(VaListSize, DL, MVT::i32),
+ 8, false, false, false, MachinePointerInfo(DestSV),
MachinePointerInfo(SrcSV));
}
@@ -3972,9 +3996,9 @@ SDValue AArch64TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
if (Align > 8) {
assert(((Align & (Align - 1)) == 0) && "Expected Align to be a power of 2");
VAList = DAG.getNode(ISD::ADD, DL, getPointerTy(), VAList,
- DAG.getConstant(Align - 1, getPointerTy()));
+ DAG.getConstant(Align - 1, DL, getPointerTy()));
VAList = DAG.getNode(ISD::AND, DL, getPointerTy(), VAList,
- DAG.getConstant(-(int64_t)Align, getPointerTy()));
+ DAG.getConstant(-(int64_t)Align, DL, getPointerTy()));
}
Type *ArgTy = VT.getTypeForEVT(*DAG.getContext());
@@ -3994,7 +4018,7 @@ SDValue AArch64TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
// Increment the pointer, VAList, to the next vaarg
SDValue VANext = DAG.getNode(ISD::ADD, DL, getPointerTy(), VAList,
- DAG.getConstant(ArgSize, getPointerTy()));
+ DAG.getConstant(ArgSize, DL, getPointerTy()));
// Store the incremented VAList to the legalized pointer
SDValue APStore = DAG.getStore(Chain, DL, VANext, Addr, MachinePointerInfo(V),
false, false, 0);
@@ -4006,7 +4030,7 @@ SDValue AArch64TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
MachinePointerInfo(), false, false, false, 0);
// Round the value down to an f32.
SDValue NarrowFP = DAG.getNode(ISD::FP_ROUND, DL, VT, WideFP.getValue(0),
- DAG.getIntPtrConstant(1));
+ DAG.getIntPtrConstant(1, DL));
SDValue Ops[] = { NarrowFP, WideFP.getValue(1) };
// Merge the rounded value with the chain output of the load.
return DAG.getMergeValues(Ops, DL);
@@ -4055,7 +4079,7 @@ SDValue AArch64TargetLowering::LowerRETURNADDR(SDValue Op,
unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
if (Depth) {
SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
- SDValue Offset = DAG.getConstant(8, getPointerTy());
+ SDValue Offset = DAG.getConstant(8, DL, getPointerTy());
return DAG.getLoad(VT, DL, DAG.getEntryNode(),
DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset),
MachinePointerInfo(), false, false, false, 0);
@@ -4083,15 +4107,15 @@ SDValue AArch64TargetLowering::LowerShiftRightParts(SDValue Op,
assert(Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS);
SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i64,
- DAG.getConstant(VTBits, MVT::i64), ShAmt);
+ DAG.getConstant(VTBits, dl, MVT::i64), ShAmt);
SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, ShAmt);
SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i64, ShAmt,
- DAG.getConstant(VTBits, MVT::i64));
+ DAG.getConstant(VTBits, dl, MVT::i64));
SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, RevShAmt);
- SDValue Cmp = emitComparison(ExtraShAmt, DAG.getConstant(0, MVT::i64),
+ SDValue Cmp = emitComparison(ExtraShAmt, DAG.getConstant(0, dl, MVT::i64),
ISD::SETGE, dl, DAG);
- SDValue CCVal = DAG.getConstant(AArch64CC::GE, MVT::i32);
+ SDValue CCVal = DAG.getConstant(AArch64CC::GE, dl, MVT::i32);
SDValue FalseValLo = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
SDValue TrueValLo = DAG.getNode(Opc, dl, VT, ShOpHi, ExtraShAmt);
@@ -4103,8 +4127,9 @@ SDValue AArch64TargetLowering::LowerShiftRightParts(SDValue Op,
SDValue FalseValHi = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt);
SDValue TrueValHi = Opc == ISD::SRA
? DAG.getNode(Opc, dl, VT, ShOpHi,
- DAG.getConstant(VTBits - 1, MVT::i64))
- : DAG.getConstant(0, VT);
+ DAG.getConstant(VTBits - 1, dl,
+ MVT::i64))
+ : DAG.getConstant(0, dl, VT);
SDValue Hi =
DAG.getNode(AArch64ISD::CSEL, dl, VT, TrueValHi, FalseValHi, CCVal, Cmp);
@@ -4127,24 +4152,24 @@ SDValue AArch64TargetLowering::LowerShiftLeftParts(SDValue Op,
assert(Op.getOpcode() == ISD::SHL_PARTS);
SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i64,
- DAG.getConstant(VTBits, MVT::i64), ShAmt);
+ DAG.getConstant(VTBits, dl, MVT::i64), ShAmt);
SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, RevShAmt);
SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i64, ShAmt,
- DAG.getConstant(VTBits, MVT::i64));
+ DAG.getConstant(VTBits, dl, MVT::i64));
SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, ShAmt);
SDValue Tmp3 = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ExtraShAmt);
SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
- SDValue Cmp = emitComparison(ExtraShAmt, DAG.getConstant(0, MVT::i64),
+ SDValue Cmp = emitComparison(ExtraShAmt, DAG.getConstant(0, dl, MVT::i64),
ISD::SETGE, dl, DAG);
- SDValue CCVal = DAG.getConstant(AArch64CC::GE, MVT::i32);
+ SDValue CCVal = DAG.getConstant(AArch64CC::GE, dl, MVT::i32);
SDValue Hi =
DAG.getNode(AArch64ISD::CSEL, dl, VT, Tmp3, FalseVal, CCVal, Cmp);
// AArch64 shifts of larger than register sizes are wrapped rather than
// clamped, so we can't just emit "lo << a" if a is too big.
- SDValue TrueValLo = DAG.getConstant(0, VT);
+ SDValue TrueValLo = DAG.getConstant(0, dl, VT);
SDValue FalseValLo = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt);
SDValue Lo =
DAG.getNode(AArch64ISD::CSEL, dl, VT, TrueValLo, FalseValLo, CCVal, Cmp);
@@ -4258,7 +4283,8 @@ AArch64TargetLowering::getSingleConstraintMatchWeight(
std::pair<unsigned, const TargetRegisterClass *>
AArch64TargetLowering::getRegForInlineAsmConstraint(
- const std::string &Constraint, MVT VT) const {
+ const TargetRegisterInfo *TRI, const std::string &Constraint,
+ MVT VT) const {
if (Constraint.size() == 1) {
switch (Constraint[0]) {
case 'r':
@@ -4287,7 +4313,7 @@ AArch64TargetLowering::getRegForInlineAsmConstraint(
// Use the default implementation in TargetLowering to convert the register
// constraint into a member of a register class.
std::pair<unsigned, const TargetRegisterClass *> Res;
- Res = TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
+ Res = TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
// Not found as a standard register?
if (!Res.second) {
@@ -4436,7 +4462,7 @@ void AArch64TargetLowering::LowerAsmOperandForConstraint(
}
// All assembler immediates are 64-bit integers.
- Result = DAG.getTargetConstant(CVal, MVT::i64);
+ Result = DAG.getTargetConstant(CVal, SDLoc(Op), MVT::i64);
break;
}
@@ -4462,7 +4488,7 @@ static SDValue WidenVector(SDValue V64Reg, SelectionDAG &DAG) {
SDLoc DL(V64Reg);
return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, WideTy, DAG.getUNDEF(WideTy),
- V64Reg, DAG.getConstant(0, MVT::i32));
+ V64Reg, DAG.getConstant(0, DL, MVT::i32));
}
/// getExtFactor - Determine the adjustment factor for the position when
@@ -4594,25 +4620,26 @@ SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op,
// The extraction can just take the second half
Src.ShuffleVec =
DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
- DAG.getConstant(NumSrcElts, MVT::i64));
+ DAG.getConstant(NumSrcElts, dl, MVT::i64));
Src.WindowBase = -NumSrcElts;
} else if (Src.MaxElt < NumSrcElts) {
// The extraction can just take the first half
Src.ShuffleVec =
DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
- DAG.getConstant(0, MVT::i64));
+ DAG.getConstant(0, dl, MVT::i64));
} else {
// An actual VEXT is needed
SDValue VEXTSrc1 =
DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
- DAG.getConstant(0, MVT::i64));
+ DAG.getConstant(0, dl, MVT::i64));
SDValue VEXTSrc2 =
DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
- DAG.getConstant(NumSrcElts, MVT::i64));
+ DAG.getConstant(NumSrcElts, dl, MVT::i64));
unsigned Imm = Src.MinElt * getExtFactor(VEXTSrc1);
Src.ShuffleVec = DAG.getNode(AArch64ISD::EXT, dl, DestVT, VEXTSrc1,
- VEXTSrc2, DAG.getConstant(Imm, MVT::i32));
+ VEXTSrc2,
+ DAG.getConstant(Imm, dl, MVT::i32));
Src.WindowBase = -Src.MinElt;
}
}
@@ -4947,11 +4974,11 @@ static SDValue tryFormConcatFromShuffle(SDValue Op, SelectionDAG &DAG) {
VT.getVectorNumElements() / 2);
if (SplitV0) {
V0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, CastVT, V0,
- DAG.getConstant(0, MVT::i64));
+ DAG.getConstant(0, DL, MVT::i64));
}
if (V1.getValueType().getSizeInBits() == 128) {
V1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, CastVT, V1,
- DAG.getConstant(0, MVT::i64));
+ DAG.getConstant(0, DL, MVT::i64));
}
return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, V0, V1);
}
@@ -5018,7 +5045,7 @@ static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
unsigned Opcode;
if (EltTy == MVT::i8)
Opcode = AArch64ISD::DUPLANE8;
- else if (EltTy == MVT::i16)
+ else if (EltTy == MVT::i16 || EltTy == MVT::f16)
Opcode = AArch64ISD::DUPLANE16;
else if (EltTy == MVT::i32 || EltTy == MVT::f32)
Opcode = AArch64ISD::DUPLANE32;
@@ -5029,7 +5056,7 @@ static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
if (VT.getSizeInBits() == 64)
OpLHS = WidenVector(OpLHS, DAG);
- SDValue Lane = DAG.getConstant(OpNum - OP_VDUP0, MVT::i64);
+ SDValue Lane = DAG.getConstant(OpNum - OP_VDUP0, dl, MVT::i64);
return DAG.getNode(Opcode, dl, VT, OpLHS, Lane);
}
case OP_VEXT1:
@@ -5037,7 +5064,7 @@ static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
case OP_VEXT3: {
unsigned Imm = (OpNum - OP_VEXT1 + 1) * getExtFactor(OpLHS);
return DAG.getNode(AArch64ISD::EXT, dl, VT, OpLHS, OpRHS,
- DAG.getConstant(Imm, MVT::i32));
+ DAG.getConstant(Imm, dl, MVT::i32));
}
case OP_VUZPL:
return DAG.getNode(AArch64ISD::UZP1, dl, DAG.getVTList(VT, VT), OpLHS,
@@ -5074,7 +5101,7 @@ static SDValue GenerateTBL(SDValue Op, ArrayRef<int> ShuffleMask,
for (int Val : ShuffleMask) {
for (unsigned Byte = 0; Byte < BytesPerElt; ++Byte) {
unsigned Offset = Byte + Val * BytesPerElt;
- TBLMask.push_back(DAG.getConstant(Offset, MVT::i32));
+ TBLMask.push_back(DAG.getConstant(Offset, DL, MVT::i32));
}
}
@@ -5094,7 +5121,7 @@ static SDValue GenerateTBL(SDValue Op, ArrayRef<int> ShuffleMask,
V1Cst = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v16i8, V1Cst, V1Cst);
Shuffle = DAG.getNode(
ISD::INTRINSIC_WO_CHAIN, DL, IndexVT,
- DAG.getConstant(Intrinsic::aarch64_neon_tbl1, MVT::i32), V1Cst,
+ DAG.getConstant(Intrinsic::aarch64_neon_tbl1, DL, MVT::i32), V1Cst,
DAG.getNode(ISD::BUILD_VECTOR, DL, IndexVT,
makeArrayRef(TBLMask.data(), IndexLen)));
} else {
@@ -5102,7 +5129,7 @@ static SDValue GenerateTBL(SDValue Op, ArrayRef<int> ShuffleMask,
V1Cst = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v16i8, V1Cst, V2Cst);
Shuffle = DAG.getNode(
ISD::INTRINSIC_WO_CHAIN, DL, IndexVT,
- DAG.getConstant(Intrinsic::aarch64_neon_tbl1, MVT::i32), V1Cst,
+ DAG.getConstant(Intrinsic::aarch64_neon_tbl1, DL, MVT::i32), V1Cst,
DAG.getNode(ISD::BUILD_VECTOR, DL, IndexVT,
makeArrayRef(TBLMask.data(), IndexLen)));
} else {
@@ -5114,7 +5141,8 @@ static SDValue GenerateTBL(SDValue Op, ArrayRef<int> ShuffleMask,
// &TBLMask[0], IndexLen));
Shuffle = DAG.getNode(
ISD::INTRINSIC_WO_CHAIN, DL, IndexVT,
- DAG.getConstant(Intrinsic::aarch64_neon_tbl2, MVT::i32), V1Cst, V2Cst,
+ DAG.getConstant(Intrinsic::aarch64_neon_tbl2, DL, MVT::i32),
+ V1Cst, V2Cst,
DAG.getNode(ISD::BUILD_VECTOR, DL, IndexVT,
makeArrayRef(TBLMask.data(), IndexLen)));
}
@@ -5183,7 +5211,7 @@ SDValue AArch64TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
} else if (VT.getSizeInBits() == 64)
V1 = WidenVector(V1, DAG);
- return DAG.getNode(Opcode, dl, VT, V1, DAG.getConstant(Lane, MVT::i64));
+ return DAG.getNode(Opcode, dl, VT, V1, DAG.getConstant(Lane, dl, MVT::i64));
}
if (isREVMask(ShuffleMask, VT, 64))
@@ -5200,12 +5228,12 @@ SDValue AArch64TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
std::swap(V1, V2);
Imm *= getExtFactor(V1);
return DAG.getNode(AArch64ISD::EXT, dl, V1.getValueType(), V1, V2,
- DAG.getConstant(Imm, MVT::i32));
+ DAG.getConstant(Imm, dl, MVT::i32));
} else if (V2->getOpcode() == ISD::UNDEF &&
isSingletonEXTMask(ShuffleMask, VT, Imm)) {
Imm *= getExtFactor(V1);
return DAG.getNode(AArch64ISD::EXT, dl, V1.getValueType(), V1, V1,
- DAG.getConstant(Imm, MVT::i32));
+ DAG.getConstant(Imm, dl, MVT::i32));
}
unsigned WhichResult;
@@ -5244,7 +5272,7 @@ SDValue AArch64TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
int NumInputElements = V1.getValueType().getVectorNumElements();
if (isINSMask(ShuffleMask, NumInputElements, DstIsLeft, Anomaly)) {
SDValue DstVec = DstIsLeft ? V1 : V2;
- SDValue DstLaneV = DAG.getConstant(Anomaly, MVT::i64);
+ SDValue DstLaneV = DAG.getConstant(Anomaly, dl, MVT::i64);
SDValue SrcVec = V1;
int SrcLane = ShuffleMask[Anomaly];
@@ -5252,7 +5280,7 @@ SDValue AArch64TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
SrcVec = V2;
SrcLane -= VT.getVectorNumElements();
}
- SDValue SrcLaneV = DAG.getConstant(SrcLane, MVT::i64);
+ SDValue SrcLaneV = DAG.getConstant(SrcLane, dl, MVT::i64);
EVT ScalarVT = VT.getVectorElementType();
@@ -5342,8 +5370,8 @@ SDValue AArch64TargetLowering::LowerVectorAND(SDValue Op,
CnstVal = AArch64_AM::encodeAdvSIMDModImmType1(CnstVal);
MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
SDValue Mov = DAG.getNode(AArch64ISD::BICi, dl, MovTy, LHS,
- DAG.getConstant(CnstVal, MVT::i32),
- DAG.getConstant(0, MVT::i32));
+ DAG.getConstant(CnstVal, dl, MVT::i32),
+ DAG.getConstant(0, dl, MVT::i32));
return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
}
@@ -5351,8 +5379,8 @@ SDValue AArch64TargetLowering::LowerVectorAND(SDValue Op,
CnstVal = AArch64_AM::encodeAdvSIMDModImmType2(CnstVal);
MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
SDValue Mov = DAG.getNode(AArch64ISD::BICi, dl, MovTy, LHS,
- DAG.getConstant(CnstVal, MVT::i32),
- DAG.getConstant(8, MVT::i32));
+ DAG.getConstant(CnstVal, dl, MVT::i32),
+ DAG.getConstant(8, dl, MVT::i32));
return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
}
@@ -5360,8 +5388,8 @@ SDValue AArch64TargetLowering::LowerVectorAND(SDValue Op,
CnstVal = AArch64_AM::encodeAdvSIMDModImmType3(CnstVal);
MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
SDValue Mov = DAG.getNode(AArch64ISD::BICi, dl, MovTy, LHS,
- DAG.getConstant(CnstVal, MVT::i32),
- DAG.getConstant(16, MVT::i32));
+ DAG.getConstant(CnstVal, dl, MVT::i32),
+ DAG.getConstant(16, dl, MVT::i32));
return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
}
@@ -5369,8 +5397,8 @@ SDValue AArch64TargetLowering::LowerVectorAND(SDValue Op,
CnstVal = AArch64_AM::encodeAdvSIMDModImmType4(CnstVal);
MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
SDValue Mov = DAG.getNode(AArch64ISD::BICi, dl, MovTy, LHS,
- DAG.getConstant(CnstVal, MVT::i32),
- DAG.getConstant(24, MVT::i32));
+ DAG.getConstant(CnstVal, dl, MVT::i32),
+ DAG.getConstant(24, dl, MVT::i32));
return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
}
@@ -5378,8 +5406,8 @@ SDValue AArch64TargetLowering::LowerVectorAND(SDValue Op,
CnstVal = AArch64_AM::encodeAdvSIMDModImmType5(CnstVal);
MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v8i16 : MVT::v4i16;
SDValue Mov = DAG.getNode(AArch64ISD::BICi, dl, MovTy, LHS,
- DAG.getConstant(CnstVal, MVT::i32),
- DAG.getConstant(0, MVT::i32));
+ DAG.getConstant(CnstVal, dl, MVT::i32),
+ DAG.getConstant(0, dl, MVT::i32));
return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
}
@@ -5387,8 +5415,8 @@ SDValue AArch64TargetLowering::LowerVectorAND(SDValue Op,
CnstVal = AArch64_AM::encodeAdvSIMDModImmType6(CnstVal);
MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v8i16 : MVT::v4i16;
SDValue Mov = DAG.getNode(AArch64ISD::BICi, dl, MovTy, LHS,
- DAG.getConstant(CnstVal, MVT::i32),
- DAG.getConstant(8, MVT::i32));
+ DAG.getConstant(CnstVal, dl, MVT::i32),
+ DAG.getConstant(8, dl, MVT::i32));
return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
}
}
@@ -5492,7 +5520,8 @@ static SDValue tryLowerToSLI(SDNode *N, SelectionDAG &DAG) {
IsShiftRight ? Intrinsic::aarch64_neon_vsri : Intrinsic::aarch64_neon_vsli;
SDValue ResultSLI =
DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
- DAG.getConstant(Intrin, MVT::i32), X, Y, Shift.getOperand(1));
+ DAG.getConstant(Intrin, DL, MVT::i32), X, Y,
+ Shift.getOperand(1));
DEBUG(dbgs() << "aarch64-lower: transformed: \n");
DEBUG(N->dump(&DAG));
@@ -5542,8 +5571,8 @@ SDValue AArch64TargetLowering::LowerVectorOR(SDValue Op,
CnstVal = AArch64_AM::encodeAdvSIMDModImmType1(CnstVal);
MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
SDValue Mov = DAG.getNode(AArch64ISD::ORRi, dl, MovTy, LHS,
- DAG.getConstant(CnstVal, MVT::i32),
- DAG.getConstant(0, MVT::i32));
+ DAG.getConstant(CnstVal, dl, MVT::i32),
+ DAG.getConstant(0, dl, MVT::i32));
return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
}
@@ -5551,8 +5580,8 @@ SDValue AArch64TargetLowering::LowerVectorOR(SDValue Op,
CnstVal = AArch64_AM::encodeAdvSIMDModImmType2(CnstVal);
MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
SDValue Mov = DAG.getNode(AArch64ISD::ORRi, dl, MovTy, LHS,
- DAG.getConstant(CnstVal, MVT::i32),
- DAG.getConstant(8, MVT::i32));
+ DAG.getConstant(CnstVal, dl, MVT::i32),
+ DAG.getConstant(8, dl, MVT::i32));
return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
}
@@ -5560,8 +5589,8 @@ SDValue AArch64TargetLowering::LowerVectorOR(SDValue Op,
CnstVal = AArch64_AM::encodeAdvSIMDModImmType3(CnstVal);
MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
SDValue Mov = DAG.getNode(AArch64ISD::ORRi, dl, MovTy, LHS,
- DAG.getConstant(CnstVal, MVT::i32),
- DAG.getConstant(16, MVT::i32));
+ DAG.getConstant(CnstVal, dl, MVT::i32),
+ DAG.getConstant(16, dl, MVT::i32));
return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
}
@@ -5569,8 +5598,8 @@ SDValue AArch64TargetLowering::LowerVectorOR(SDValue Op,
CnstVal = AArch64_AM::encodeAdvSIMDModImmType4(CnstVal);
MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
SDValue Mov = DAG.getNode(AArch64ISD::ORRi, dl, MovTy, LHS,
- DAG.getConstant(CnstVal, MVT::i32),
- DAG.getConstant(24, MVT::i32));
+ DAG.getConstant(CnstVal, dl, MVT::i32),
+ DAG.getConstant(24, dl, MVT::i32));
return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
}
@@ -5578,8 +5607,8 @@ SDValue AArch64TargetLowering::LowerVectorOR(SDValue Op,
CnstVal = AArch64_AM::encodeAdvSIMDModImmType5(CnstVal);
MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v8i16 : MVT::v4i16;
SDValue Mov = DAG.getNode(AArch64ISD::ORRi, dl, MovTy, LHS,
- DAG.getConstant(CnstVal, MVT::i32),
- DAG.getConstant(0, MVT::i32));
+ DAG.getConstant(CnstVal, dl, MVT::i32),
+ DAG.getConstant(0, dl, MVT::i32));
return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
}
@@ -5587,8 +5616,8 @@ SDValue AArch64TargetLowering::LowerVectorOR(SDValue Op,
CnstVal = AArch64_AM::encodeAdvSIMDModImmType6(CnstVal);
MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v8i16 : MVT::v4i16;
SDValue Mov = DAG.getNode(AArch64ISD::ORRi, dl, MovTy, LHS,
- DAG.getConstant(CnstVal, MVT::i32),
- DAG.getConstant(8, MVT::i32));
+ DAG.getConstant(CnstVal, dl, MVT::i32),
+ DAG.getConstant(8, dl, MVT::i32));
return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
}
}
@@ -5623,7 +5652,7 @@ static SDValue NormalizeBuildVector(SDValue Op,
if (Lane.getOpcode() == ISD::Constant) {
APInt LowBits(EltTy.getSizeInBits(),
cast<ConstantSDNode>(Lane)->getZExtValue());
- Lane = DAG.getConstant(LowBits.getZExtValue(), MVT::i32);
+ Lane = DAG.getConstant(LowBits.getZExtValue(), dl, MVT::i32);
}
Ops.push_back(Lane);
}
@@ -5661,13 +5690,13 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
CnstVal = AArch64_AM::encodeAdvSIMDModImmType10(CnstVal);
if (VT.getSizeInBits() == 128) {
SDValue Mov = DAG.getNode(AArch64ISD::MOVIedit, dl, MVT::v2i64,
- DAG.getConstant(CnstVal, MVT::i32));
+ DAG.getConstant(CnstVal, dl, MVT::i32));
return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
}
// Support the V64 version via subregister insertion.
SDValue Mov = DAG.getNode(AArch64ISD::MOVIedit, dl, MVT::f64,
- DAG.getConstant(CnstVal, MVT::i32));
+ DAG.getConstant(CnstVal, dl, MVT::i32));
return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
}
@@ -5675,8 +5704,8 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
CnstVal = AArch64_AM::encodeAdvSIMDModImmType1(CnstVal);
MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
SDValue Mov = DAG.getNode(AArch64ISD::MOVIshift, dl, MovTy,
- DAG.getConstant(CnstVal, MVT::i32),
- DAG.getConstant(0, MVT::i32));
+ DAG.getConstant(CnstVal, dl, MVT::i32),
+ DAG.getConstant(0, dl, MVT::i32));
return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
}
@@ -5684,8 +5713,8 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
CnstVal = AArch64_AM::encodeAdvSIMDModImmType2(CnstVal);
MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
SDValue Mov = DAG.getNode(AArch64ISD::MOVIshift, dl, MovTy,
- DAG.getConstant(CnstVal, MVT::i32),
- DAG.getConstant(8, MVT::i32));
+ DAG.getConstant(CnstVal, dl, MVT::i32),
+ DAG.getConstant(8, dl, MVT::i32));
return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
}
@@ -5693,8 +5722,8 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
CnstVal = AArch64_AM::encodeAdvSIMDModImmType3(CnstVal);
MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
SDValue Mov = DAG.getNode(AArch64ISD::MOVIshift, dl, MovTy,
- DAG.getConstant(CnstVal, MVT::i32),
- DAG.getConstant(16, MVT::i32));
+ DAG.getConstant(CnstVal, dl, MVT::i32),
+ DAG.getConstant(16, dl, MVT::i32));
return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
}
@@ -5702,8 +5731,8 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
CnstVal = AArch64_AM::encodeAdvSIMDModImmType4(CnstVal);
MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
SDValue Mov = DAG.getNode(AArch64ISD::MOVIshift, dl, MovTy,
- DAG.getConstant(CnstVal, MVT::i32),
- DAG.getConstant(24, MVT::i32));
+ DAG.getConstant(CnstVal, dl, MVT::i32),
+ DAG.getConstant(24, dl, MVT::i32));
return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
}
@@ -5711,8 +5740,8 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
CnstVal = AArch64_AM::encodeAdvSIMDModImmType5(CnstVal);
MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v8i16 : MVT::v4i16;
SDValue Mov = DAG.getNode(AArch64ISD::MOVIshift, dl, MovTy,
- DAG.getConstant(CnstVal, MVT::i32),
- DAG.getConstant(0, MVT::i32));
+ DAG.getConstant(CnstVal, dl, MVT::i32),
+ DAG.getConstant(0, dl, MVT::i32));
return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
}
@@ -5720,8 +5749,8 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
CnstVal = AArch64_AM::encodeAdvSIMDModImmType6(CnstVal);
MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v8i16 : MVT::v4i16;
SDValue Mov = DAG.getNode(AArch64ISD::MOVIshift, dl, MovTy,
- DAG.getConstant(CnstVal, MVT::i32),
- DAG.getConstant(8, MVT::i32));
+ DAG.getConstant(CnstVal, dl, MVT::i32),
+ DAG.getConstant(8, dl, MVT::i32));
return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
}
@@ -5729,8 +5758,8 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
CnstVal = AArch64_AM::encodeAdvSIMDModImmType7(CnstVal);
MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
SDValue Mov = DAG.getNode(AArch64ISD::MOVImsl, dl, MovTy,
- DAG.getConstant(CnstVal, MVT::i32),
- DAG.getConstant(264, MVT::i32));
+ DAG.getConstant(CnstVal, dl, MVT::i32),
+ DAG.getConstant(264, dl, MVT::i32));
return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
}
@@ -5738,8 +5767,8 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
CnstVal = AArch64_AM::encodeAdvSIMDModImmType8(CnstVal);
MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
SDValue Mov = DAG.getNode(AArch64ISD::MOVImsl, dl, MovTy,
- DAG.getConstant(CnstVal, MVT::i32),
- DAG.getConstant(272, MVT::i32));
+ DAG.getConstant(CnstVal, dl, MVT::i32),
+ DAG.getConstant(272, dl, MVT::i32));
return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
}
@@ -5747,7 +5776,7 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
CnstVal = AArch64_AM::encodeAdvSIMDModImmType9(CnstVal);
MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v16i8 : MVT::v8i8;
SDValue Mov = DAG.getNode(AArch64ISD::MOVI, dl, MovTy,
- DAG.getConstant(CnstVal, MVT::i32));
+ DAG.getConstant(CnstVal, dl, MVT::i32));
return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
}
@@ -5756,7 +5785,7 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
CnstVal = AArch64_AM::encodeAdvSIMDModImmType11(CnstVal);
MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4f32 : MVT::v2f32;
SDValue Mov = DAG.getNode(AArch64ISD::FMOV, dl, MovTy,
- DAG.getConstant(CnstVal, MVT::i32));
+ DAG.getConstant(CnstVal, dl, MVT::i32));
return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
}
@@ -5764,7 +5793,7 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
VT.getSizeInBits() == 128) {
CnstVal = AArch64_AM::encodeAdvSIMDModImmType12(CnstVal);
SDValue Mov = DAG.getNode(AArch64ISD::FMOV, dl, MVT::v2f64,
- DAG.getConstant(CnstVal, MVT::i32));
+ DAG.getConstant(CnstVal, dl, MVT::i32));
return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
}
@@ -5774,8 +5803,8 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
CnstVal = AArch64_AM::encodeAdvSIMDModImmType1(CnstVal);
MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
SDValue Mov = DAG.getNode(AArch64ISD::MVNIshift, dl, MovTy,
- DAG.getConstant(CnstVal, MVT::i32),
- DAG.getConstant(0, MVT::i32));
+ DAG.getConstant(CnstVal, dl, MVT::i32),
+ DAG.getConstant(0, dl, MVT::i32));
return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
}
@@ -5783,8 +5812,8 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
CnstVal = AArch64_AM::encodeAdvSIMDModImmType2(CnstVal);
MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
SDValue Mov = DAG.getNode(AArch64ISD::MVNIshift, dl, MovTy,
- DAG.getConstant(CnstVal, MVT::i32),
- DAG.getConstant(8, MVT::i32));
+ DAG.getConstant(CnstVal, dl, MVT::i32),
+ DAG.getConstant(8, dl, MVT::i32));
return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
}
@@ -5792,8 +5821,8 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
CnstVal = AArch64_AM::encodeAdvSIMDModImmType3(CnstVal);
MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
SDValue Mov = DAG.getNode(AArch64ISD::MVNIshift, dl, MovTy,
- DAG.getConstant(CnstVal, MVT::i32),
- DAG.getConstant(16, MVT::i32));
+ DAG.getConstant(CnstVal, dl, MVT::i32),
+ DAG.getConstant(16, dl, MVT::i32));
return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
}
@@ -5801,8 +5830,8 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
CnstVal = AArch64_AM::encodeAdvSIMDModImmType4(CnstVal);
MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
SDValue Mov = DAG.getNode(AArch64ISD::MVNIshift, dl, MovTy,
- DAG.getConstant(CnstVal, MVT::i32),
- DAG.getConstant(24, MVT::i32));
+ DAG.getConstant(CnstVal, dl, MVT::i32),
+ DAG.getConstant(24, dl, MVT::i32));
return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
}
@@ -5810,8 +5839,8 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
CnstVal = AArch64_AM::encodeAdvSIMDModImmType5(CnstVal);
MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v8i16 : MVT::v4i16;
SDValue Mov = DAG.getNode(AArch64ISD::MVNIshift, dl, MovTy,
- DAG.getConstant(CnstVal, MVT::i32),
- DAG.getConstant(0, MVT::i32));
+ DAG.getConstant(CnstVal, dl, MVT::i32),
+ DAG.getConstant(0, dl, MVT::i32));
return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
}
@@ -5819,8 +5848,8 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
CnstVal = AArch64_AM::encodeAdvSIMDModImmType6(CnstVal);
MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v8i16 : MVT::v4i16;
SDValue Mov = DAG.getNode(AArch64ISD::MVNIshift, dl, MovTy,
- DAG.getConstant(CnstVal, MVT::i32),
- DAG.getConstant(8, MVT::i32));
+ DAG.getConstant(CnstVal, dl, MVT::i32),
+ DAG.getConstant(8, dl, MVT::i32));
return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
}
@@ -5828,8 +5857,8 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
CnstVal = AArch64_AM::encodeAdvSIMDModImmType7(CnstVal);
MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
SDValue Mov = DAG.getNode(AArch64ISD::MVNImsl, dl, MovTy,
- DAG.getConstant(CnstVal, MVT::i32),
- DAG.getConstant(264, MVT::i32));
+ DAG.getConstant(CnstVal, dl, MVT::i32),
+ DAG.getConstant(264, dl, MVT::i32));
return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
}
@@ -5837,8 +5866,8 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
CnstVal = AArch64_AM::encodeAdvSIMDModImmType8(CnstVal);
MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
SDValue Mov = DAG.getNode(AArch64ISD::MVNImsl, dl, MovTy,
- DAG.getConstant(CnstVal, MVT::i32),
- DAG.getConstant(272, MVT::i32));
+ DAG.getConstant(CnstVal, dl, MVT::i32),
+ DAG.getConstant(272, dl, MVT::i32));
return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
}
}
@@ -5921,8 +5950,10 @@ FailedModImm:
if (VT.getVectorElementType().isFloatingPoint()) {
SmallVector<SDValue, 8> Ops;
- MVT NewType =
- (VT.getVectorElementType() == MVT::f32) ? MVT::i32 : MVT::i64;
+ EVT EltTy = VT.getVectorElementType();
+ assert ((EltTy == MVT::f16 || EltTy == MVT::f32 || EltTy == MVT::f64) &&
+ "Unsupported floating-point vector type");
+ MVT NewType = MVT::getIntegerVT(EltTy.getSizeInBits());
for (unsigned i = 0; i < NumElts; ++i)
Ops.push_back(DAG.getNode(ISD::BITCAST, dl, NewType, Op.getOperand(i)));
EVT VecVT = EVT::getVectorVT(*DAG.getContext(), NewType, NumElts);
@@ -5942,7 +5973,7 @@ FailedModImm:
// Now insert the non-constant lanes.
for (unsigned i = 0; i < NumElts; ++i) {
SDValue V = Op.getOperand(i);
- SDValue LaneIdx = DAG.getConstant(i, MVT::i64);
+ SDValue LaneIdx = DAG.getConstant(i, dl, MVT::i64);
if (!isa<ConstantSDNode>(V) && !isa<ConstantFPSDNode>(V)) {
// Note that type legalization likely mucked about with the VT of the
// source operand, so we may have to convert it here before inserting.
@@ -5984,7 +6015,7 @@ FailedModImm:
unsigned SubIdx = ElemSize == 32 ? AArch64::ssub : AArch64::dsub;
MachineSDNode *N =
DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, dl, VT, Vec, Op0,
- DAG.getTargetConstant(SubIdx, MVT::i32));
+ DAG.getTargetConstant(SubIdx, dl, MVT::i32));
Vec = SDValue(N, 0);
++i;
}
@@ -5992,7 +6023,7 @@ FailedModImm:
SDValue V = Op.getOperand(i);
if (V.getOpcode() == ISD::UNDEF)
continue;
- SDValue LaneIdx = DAG.getConstant(i, MVT::i64);
+ SDValue LaneIdx = DAG.getConstant(i, dl, MVT::i64);
Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Vec, V, LaneIdx);
}
return Vec;
@@ -6215,10 +6246,11 @@ SDValue AArch64TargetLowering::LowerVectorSRA_SRL_SHL(SDValue Op,
case ISD::SHL:
if (isVShiftLImm(Op.getOperand(1), VT, false, Cnt) && Cnt < EltSize)
- return DAG.getNode(AArch64ISD::VSHL, SDLoc(Op), VT, Op.getOperand(0),
- DAG.getConstant(Cnt, MVT::i32));
+ return DAG.getNode(AArch64ISD::VSHL, DL, VT, Op.getOperand(0),
+ DAG.getConstant(Cnt, DL, MVT::i32));
return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
- DAG.getConstant(Intrinsic::aarch64_neon_ushl, MVT::i32),
+ DAG.getConstant(Intrinsic::aarch64_neon_ushl, DL,
+ MVT::i32),
Op.getOperand(0), Op.getOperand(1));
case ISD::SRA:
case ISD::SRL:
@@ -6227,8 +6259,8 @@ SDValue AArch64TargetLowering::LowerVectorSRA_SRL_SHL(SDValue Op,
Cnt < EltSize) {
unsigned Opc =
(Op.getOpcode() == ISD::SRA) ? AArch64ISD::VASHR : AArch64ISD::VLSHR;
- return DAG.getNode(Opc, SDLoc(Op), VT, Op.getOperand(0),
- DAG.getConstant(Cnt, MVT::i32));
+ return DAG.getNode(Opc, DL, VT, Op.getOperand(0),
+ DAG.getConstant(Cnt, DL, MVT::i32));
}
// Right shift register. Note, there is not a shift right register
@@ -6240,7 +6272,8 @@ SDValue AArch64TargetLowering::LowerVectorSRA_SRL_SHL(SDValue Op,
SDValue NegShift = DAG.getNode(AArch64ISD::NEG, DL, VT, Op.getOperand(1));
SDValue NegShiftLeft =
DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
- DAG.getConstant(Opc, MVT::i32), Op.getOperand(0), NegShift);
+ DAG.getConstant(Opc, DL, MVT::i32), Op.getOperand(0),
+ NegShift);
return NegShiftLeft;
}
@@ -6521,6 +6554,34 @@ bool AArch64TargetLowering::isTruncateFree(EVT VT1, EVT VT2) const {
return NumBits1 > NumBits2;
}
+/// Check if it is profitable to hoist instruction in then/else to if.
+/// Not profitable if I and it's user can form a FMA instruction
+/// because we prefer FMSUB/FMADD.
+bool AArch64TargetLowering::isProfitableToHoist(Instruction *I) const {
+ if (I->getOpcode() != Instruction::FMul)
+ return true;
+
+ if (I->getNumUses() != 1)
+ return true;
+
+ Instruction *User = I->user_back();
+
+ if (User &&
+ !(User->getOpcode() == Instruction::FSub ||
+ User->getOpcode() == Instruction::FAdd))
+ return true;
+
+ const TargetOptions &Options = getTargetMachine().Options;
+ EVT VT = getValueType(User->getOperand(0)->getType());
+
+ if (isFMAFasterThanFMulAndFAdd(VT) &&
+ isOperationLegalOrCustom(ISD::FMA, VT) &&
+ (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath))
+ return false;
+
+ return true;
+}
+
// All 32-bit GPR operations implicitly zero the high-half of the corresponding
// 64-bit GPR.
bool AArch64TargetLowering::isZExtFree(Type *Ty1, Type *Ty2) const {
@@ -6553,6 +6614,59 @@ bool AArch64TargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
VT1.getSizeInBits() <= 32);
}
+bool AArch64TargetLowering::isExtFreeImpl(const Instruction *Ext) const {
+ if (isa<FPExtInst>(Ext))
+ return false;
+
+ // Vector types are next free.
+ if (Ext->getType()->isVectorTy())
+ return false;
+
+ for (const Use &U : Ext->uses()) {
+ // The extension is free if we can fold it with a left shift in an
+ // addressing mode or an arithmetic operation: add, sub, and cmp.
+
+ // Is there a shift?
+ const Instruction *Instr = cast<Instruction>(U.getUser());
+
+ // Is this a constant shift?
+ switch (Instr->getOpcode()) {
+ case Instruction::Shl:
+ if (!isa<ConstantInt>(Instr->getOperand(1)))
+ return false;
+ break;
+ case Instruction::GetElementPtr: {
+ gep_type_iterator GTI = gep_type_begin(Instr);
+ std::advance(GTI, U.getOperandNo());
+ Type *IdxTy = *GTI;
+ // This extension will end up with a shift because of the scaling factor.
+ // 8-bit sized types have a scaling factor of 1, thus a shift amount of 0.
+ // Get the shift amount based on the scaling factor:
+ // log2(sizeof(IdxTy)) - log2(8).
+ uint64_t ShiftAmt =
+ countTrailingZeros(getDataLayout()->getTypeStoreSizeInBits(IdxTy)) - 3;
+ // Is the constant foldable in the shift of the addressing mode?
+ // I.e., shift amount is between 1 and 4 inclusive.
+ if (ShiftAmt == 0 || ShiftAmt > 4)
+ return false;
+ break;
+ }
+ case Instruction::Trunc:
+ // Check if this is a noop.
+ // trunc(sext ty1 to ty2) to ty1.
+ if (Instr->getType() == Ext->getOperand(0)->getType())
+ continue;
+ // FALL THROUGH.
+ default:
+ return false;
+ }
+
+ // At this point we can use the bfm family, so this extension is free
+ // for that use.
+ }
+ return true;
+}
+
bool AArch64TargetLowering::hasPairedLoad(Type *LoadedType,
unsigned &RequiredAligment) const {
if (!LoadedType->isIntegerTy() && !LoadedType->isFloatTy())
@@ -6591,13 +6705,22 @@ EVT AArch64TargetLowering::getOptimalMemOpType(uint64_t Size, unsigned DstAlign,
bool Fast;
const Function *F = MF.getFunction();
if (Subtarget->hasFPARMv8() && !IsMemset && Size >= 16 &&
- !F->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
- Attribute::NoImplicitFloat) &&
+ !F->hasFnAttribute(Attribute::NoImplicitFloat) &&
(memOpAlign(SrcAlign, DstAlign, 16) ||
(allowsMisalignedMemoryAccesses(MVT::f128, 0, 1, &Fast) && Fast)))
return MVT::f128;
- return Size >= 8 ? MVT::i64 : MVT::i32;
+ if (Size >= 8 &&
+ (memOpAlign(SrcAlign, DstAlign, 8) ||
+ (allowsMisalignedMemoryAccesses(MVT::i64, 0, 1, &Fast) && Fast)))
+ return MVT::i64;
+
+ if (Size >= 4 &&
+ (memOpAlign(SrcAlign, DstAlign, 4) ||
+ (allowsMisalignedMemoryAccesses(MVT::i32, 0, 1, &Fast) && Fast)))
+ return MVT::i32;
+
+ return MVT::Other;
}
// 12-bit optionally shifted immediates are legal for adds.
@@ -6748,7 +6871,7 @@ bool AArch64TargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
unsigned LZ = countLeadingZeros((uint64_t)Val);
unsigned Shift = (63 - LZ) / 16;
// MOVZ is free so return true for one or fewer MOVK.
- return (Shift < 3) ? true : false;
+ return Shift < 3;
}
// Generate SUBS and CSEL for integer abs.
@@ -6766,14 +6889,14 @@ static SDValue performIntegerAbsCombine(SDNode *N, SelectionDAG &DAG) {
N1.getOpcode() == ISD::SRA && N1.getOperand(0) == N0.getOperand(0))
if (ConstantSDNode *Y1C = dyn_cast<ConstantSDNode>(N1.getOperand(1)))
if (Y1C->getAPIntValue() == VT.getSizeInBits() - 1) {
- SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, VT),
+ SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
N0.getOperand(0));
// Generate SUBS & CSEL.
SDValue Cmp =
DAG.getNode(AArch64ISD::SUBS, DL, DAG.getVTList(VT, MVT::i32),
- N0.getOperand(0), DAG.getConstant(0, VT));
+ N0.getOperand(0), DAG.getConstant(0, DL, VT));
return DAG.getNode(AArch64ISD::CSEL, DL, VT, N0.getOperand(0), Neg,
- DAG.getConstant(AArch64CC::PL, MVT::i32),
+ DAG.getConstant(AArch64CC::PL, DL, MVT::i32),
SDValue(Cmp.getNode(), 1));
}
return SDValue();
@@ -6802,8 +6925,8 @@ AArch64TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
SDLoc DL(N);
SDValue N0 = N->getOperand(0);
unsigned Lg2 = Divisor.countTrailingZeros();
- SDValue Zero = DAG.getConstant(0, VT);
- SDValue Pow2MinusOne = DAG.getConstant((1ULL << Lg2) - 1, VT);
+ SDValue Zero = DAG.getConstant(0, DL, VT);
+ SDValue Pow2MinusOne = DAG.getConstant((1ULL << Lg2) - 1, DL, VT);
// Add (N0 < 0) ? Pow2 - 1 : 0;
SDValue CCVal;
@@ -6819,7 +6942,7 @@ AArch64TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
// Divide by pow2.
SDValue SRA =
- DAG.getNode(ISD::SRA, DL, VT, CSel, DAG.getConstant(Lg2, MVT::i64));
+ DAG.getNode(ISD::SRA, DL, VT, CSel, DAG.getConstant(Lg2, DL, MVT::i64));
// If we're dividing by a positive value, we're done. Otherwise, we must
// negate the result.
@@ -6828,7 +6951,7 @@ AArch64TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
if (Created)
Created->push_back(SRA.getNode());
- return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, VT), SRA);
+ return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), SRA);
}
static SDValue performMulCombine(SDNode *N, SelectionDAG &DAG,
@@ -6845,45 +6968,46 @@ static SDValue performMulCombine(SDNode *N, SelectionDAG &DAG,
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
APInt Value = C->getAPIntValue();
EVT VT = N->getValueType(0);
+ SDLoc DL(N);
if (Value.isNonNegative()) {
// (mul x, 2^N + 1) => (add (shl x, N), x)
APInt VM1 = Value - 1;
if (VM1.isPowerOf2()) {
SDValue ShiftedVal =
- DAG.getNode(ISD::SHL, SDLoc(N), VT, N->getOperand(0),
- DAG.getConstant(VM1.logBase2(), MVT::i64));
- return DAG.getNode(ISD::ADD, SDLoc(N), VT, ShiftedVal,
+ DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
+ DAG.getConstant(VM1.logBase2(), DL, MVT::i64));
+ return DAG.getNode(ISD::ADD, DL, VT, ShiftedVal,
N->getOperand(0));
}
// (mul x, 2^N - 1) => (sub (shl x, N), x)
APInt VP1 = Value + 1;
if (VP1.isPowerOf2()) {
SDValue ShiftedVal =
- DAG.getNode(ISD::SHL, SDLoc(N), VT, N->getOperand(0),
- DAG.getConstant(VP1.logBase2(), MVT::i64));
- return DAG.getNode(ISD::SUB, SDLoc(N), VT, ShiftedVal,
+ DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
+ DAG.getConstant(VP1.logBase2(), DL, MVT::i64));
+ return DAG.getNode(ISD::SUB, DL, VT, ShiftedVal,
N->getOperand(0));
}
} else {
- // (mul x, -(2^N + 1)) => - (add (shl x, N), x)
- APInt VNM1 = -Value - 1;
- if (VNM1.isPowerOf2()) {
- SDValue ShiftedVal =
- DAG.getNode(ISD::SHL, SDLoc(N), VT, N->getOperand(0),
- DAG.getConstant(VNM1.logBase2(), MVT::i64));
- SDValue Add =
- DAG.getNode(ISD::ADD, SDLoc(N), VT, ShiftedVal, N->getOperand(0));
- return DAG.getNode(ISD::SUB, SDLoc(N), VT, DAG.getConstant(0, VT), Add);
- }
// (mul x, -(2^N - 1)) => (sub x, (shl x, N))
APInt VNP1 = -Value + 1;
if (VNP1.isPowerOf2()) {
SDValue ShiftedVal =
- DAG.getNode(ISD::SHL, SDLoc(N), VT, N->getOperand(0),
- DAG.getConstant(VNP1.logBase2(), MVT::i64));
- return DAG.getNode(ISD::SUB, SDLoc(N), VT, N->getOperand(0),
+ DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
+ DAG.getConstant(VNP1.logBase2(), DL, MVT::i64));
+ return DAG.getNode(ISD::SUB, DL, VT, N->getOperand(0),
ShiftedVal);
}
+ // (mul x, -(2^N + 1)) => - (add (shl x, N), x)
+ APInt VNM1 = -Value - 1;
+ if (VNM1.isPowerOf2()) {
+ SDValue ShiftedVal =
+ DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
+ DAG.getConstant(VNM1.logBase2(), DL, MVT::i64));
+ SDValue Add =
+ DAG.getNode(ISD::ADD, DL, VT, ShiftedVal, N->getOperand(0));
+ return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Add);
+ }
}
}
return SDValue();
@@ -7037,7 +7161,7 @@ static SDValue tryCombineToEXTR(SDNode *N,
}
return DAG.getNode(AArch64ISD::EXTR, DL, VT, LHS, RHS,
- DAG.getConstant(ShiftRHS, MVT::i64));
+ DAG.getConstant(ShiftRHS, DL, MVT::i64));
}
static SDValue tryCombineToBSL(SDNode *N,
@@ -7165,10 +7289,10 @@ static SDValue performBitcastCombine(SDNode *N,
SDLoc dl(N);
unsigned NumElements = VT.getVectorNumElements();
if (idx) {
- SDValue HalfIdx = DAG.getConstant(NumElements, MVT::i64);
+ SDValue HalfIdx = DAG.getConstant(NumElements, dl, MVT::i64);
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, Source, HalfIdx);
} else {
- SDValue SubReg = DAG.getTargetConstant(AArch64::dsub, MVT::i32);
+ SDValue SubReg = DAG.getTargetConstant(AArch64::dsub, dl, MVT::i32);
return SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, VT,
Source, SubReg),
0);
@@ -7178,22 +7302,55 @@ static SDValue performBitcastCombine(SDNode *N,
static SDValue performConcatVectorsCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
SelectionDAG &DAG) {
+ SDLoc dl(N);
+ EVT VT = N->getValueType(0);
+ SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
+
+ // Optimize concat_vectors of truncated vectors, where the intermediate
+ // type is illegal, to avoid said illegality, e.g.,
+ // (v4i16 (concat_vectors (v2i16 (truncate (v2i64))),
+ // (v2i16 (truncate (v2i64)))))
+ // ->
+ // (v4i16 (truncate (vector_shuffle (v4i32 (bitcast (v2i64))),
+ // (v4i32 (bitcast (v2i64))),
+ // <0, 2, 4, 6>)))
+ // This isn't really target-specific, but ISD::TRUNCATE legality isn't keyed
+ // on both input and result type, so we might generate worse code.
+ // On AArch64 we know it's fine for v2i64->v4i16 and v4i32->v8i8.
+ if (N->getNumOperands() == 2 &&
+ N0->getOpcode() == ISD::TRUNCATE &&
+ N1->getOpcode() == ISD::TRUNCATE) {
+ SDValue N00 = N0->getOperand(0);
+ SDValue N10 = N1->getOperand(0);
+ EVT N00VT = N00.getValueType();
+
+ if (N00VT == N10.getValueType() &&
+ (N00VT == MVT::v2i64 || N00VT == MVT::v4i32) &&
+ N00VT.getScalarSizeInBits() == 4 * VT.getScalarSizeInBits()) {
+ MVT MidVT = (N00VT == MVT::v2i64 ? MVT::v4i32 : MVT::v8i16);
+ SmallVector<int, 8> Mask(MidVT.getVectorNumElements());
+ for (size_t i = 0; i < Mask.size(); ++i)
+ Mask[i] = i * 2;
+ return DAG.getNode(ISD::TRUNCATE, dl, VT,
+ DAG.getVectorShuffle(
+ MidVT, dl,
+ DAG.getNode(ISD::BITCAST, dl, MidVT, N00),
+ DAG.getNode(ISD::BITCAST, dl, MidVT, N10), Mask));
+ }
+ }
+
// Wait 'til after everything is legalized to try this. That way we have
// legal vector types and such.
if (DCI.isBeforeLegalizeOps())
return SDValue();
- SDLoc dl(N);
- EVT VT = N->getValueType(0);
-
// If we see a (concat_vectors (v1x64 A), (v1x64 A)) it's really a vector
// splat. The indexed instructions are going to be expecting a DUPLANE64, so
// canonicalise to that.
- if (N->getOperand(0) == N->getOperand(1) && VT.getVectorNumElements() == 2) {
+ if (N0 == N1 && VT.getVectorNumElements() == 2) {
assert(VT.getVectorElementType().getSizeInBits() == 64);
- return DAG.getNode(AArch64ISD::DUPLANE64, dl, VT,
- WidenVector(N->getOperand(0), DAG),
- DAG.getConstant(0, MVT::i64));
+ return DAG.getNode(AArch64ISD::DUPLANE64, dl, VT, WidenVector(N0, DAG),
+ DAG.getConstant(0, dl, MVT::i64));
}
// Canonicalise concat_vectors so that the right-hand vector has as few
@@ -7205,10 +7362,9 @@ static SDValue performConcatVectorsCombine(SDNode *N,
// becomes
// (bitconvert (concat_vectors (v4i16 (bitconvert LHS)), RHS))
- SDValue Op1 = N->getOperand(1);
- if (Op1->getOpcode() != ISD::BITCAST)
+ if (N1->getOpcode() != ISD::BITCAST)
return SDValue();
- SDValue RHS = Op1->getOperand(0);
+ SDValue RHS = N1->getOperand(0);
MVT RHSTy = RHS.getValueType().getSimpleVT();
// If the RHS is not a vector, this is not the pattern we're looking for.
if (!RHSTy.isVector())
@@ -7218,10 +7374,10 @@ static SDValue performConcatVectorsCombine(SDNode *N,
MVT ConcatTy = MVT::getVectorVT(RHSTy.getVectorElementType(),
RHSTy.getVectorNumElements() * 2);
- return DAG.getNode(
- ISD::BITCAST, dl, VT,
- DAG.getNode(ISD::CONCAT_VECTORS, dl, ConcatTy,
- DAG.getNode(ISD::BITCAST, dl, RHSTy, N->getOperand(0)), RHS));
+ return DAG.getNode(ISD::BITCAST, dl, VT,
+ DAG.getNode(ISD::CONCAT_VECTORS, dl, ConcatTy,
+ DAG.getNode(ISD::BITCAST, dl, RHSTy, N0),
+ RHS));
}
static SDValue tryCombineFixedPointConvert(SDNode *N,
@@ -7310,15 +7466,16 @@ static SDValue tryExtendDUPToExtractHigh(SDValue N, SelectionDAG &DAG) {
unsigned NumElems = NarrowTy.getVectorNumElements();
MVT NewDUPVT = MVT::getVectorVT(ElementTy, NumElems * 2);
+ SDLoc dl(N);
SDValue NewDUP;
if (IsDUPLANE)
- NewDUP = DAG.getNode(N.getOpcode(), SDLoc(N), NewDUPVT, N.getOperand(0),
+ NewDUP = DAG.getNode(N.getOpcode(), dl, NewDUPVT, N.getOperand(0),
N.getOperand(1));
else
- NewDUP = DAG.getNode(AArch64ISD::DUP, SDLoc(N), NewDUPVT, N.getOperand(0));
+ NewDUP = DAG.getNode(AArch64ISD::DUP, dl, NewDUPVT, N.getOperand(0));
- return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N.getNode()), NarrowTy,
- NewDUP, DAG.getConstant(NumElems, MVT::i64));
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, NarrowTy, NewDUP,
+ DAG.getConstant(NumElems, dl, MVT::i64));
}
static bool isEssentiallyExtractSubvector(SDValue N) {
@@ -7443,7 +7600,8 @@ static SDValue performSetccAddFolding(SDNode *Op, SelectionDAG &DAG) {
SDLoc dl(Op);
if (InfoAndKind.IsAArch64) {
CCVal = DAG.getConstant(
- AArch64CC::getInvertedCondCode(InfoAndKind.Info.AArch64.CC), MVT::i32);
+ AArch64CC::getInvertedCondCode(InfoAndKind.Info.AArch64.CC), dl,
+ MVT::i32);
Cmp = *InfoAndKind.Info.AArch64.Cmp;
} else
Cmp = getAArch64Cmp(*InfoAndKind.Info.Generic.Opnd0,
@@ -7452,7 +7610,7 @@ static SDValue performSetccAddFolding(SDNode *Op, SelectionDAG &DAG) {
CCVal, DAG, dl);
EVT VT = Op->getValueType(0);
- LHS = DAG.getNode(ISD::ADD, dl, VT, RHS, DAG.getConstant(1, VT));
+ LHS = DAG.getNode(ISD::ADD, dl, VT, RHS, DAG.getConstant(1, dl, VT));
return DAG.getNode(AArch64ISD::CSEL, dl, VT, RHS, LHS, CCVal, Cmp);
}
@@ -7592,12 +7750,15 @@ static SDValue tryCombineShiftImm(unsigned IID, SDNode *N, SelectionDAG &DAG) {
break;
}
- if (IsRightShift && ShiftAmount <= -1 && ShiftAmount >= -(int)ElemBits)
- return DAG.getNode(Opcode, SDLoc(N), N->getValueType(0), N->getOperand(1),
- DAG.getConstant(-ShiftAmount, MVT::i32));
- else if (!IsRightShift && ShiftAmount >= 0 && ShiftAmount < ElemBits)
- return DAG.getNode(Opcode, SDLoc(N), N->getValueType(0), N->getOperand(1),
- DAG.getConstant(ShiftAmount, MVT::i32));
+ if (IsRightShift && ShiftAmount <= -1 && ShiftAmount >= -(int)ElemBits) {
+ SDLoc dl(N);
+ return DAG.getNode(Opcode, dl, N->getValueType(0), N->getOperand(1),
+ DAG.getConstant(-ShiftAmount, dl, MVT::i32));
+ } else if (!IsRightShift && ShiftAmount >= 0 && ShiftAmount < ElemBits) {
+ SDLoc dl(N);
+ return DAG.getNode(Opcode, dl, N->getValueType(0), N->getOperand(1),
+ DAG.getConstant(ShiftAmount, dl, MVT::i32));
+ }
return SDValue();
}
@@ -7618,6 +7779,16 @@ static SDValue tryCombineCRC32(unsigned Mask, SDNode *N, SelectionDAG &DAG) {
N->getOperand(0), N->getOperand(1), AndN.getOperand(0));
}
+static SDValue combineAcrossLanesIntrinsic(unsigned Opc, SDNode *N,
+ SelectionDAG &DAG) {
+ SDLoc dl(N);
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, N->getValueType(0),
+ DAG.getNode(Opc, dl,
+ N->getOperand(1).getSimpleValueType(),
+ N->getOperand(1)),
+ DAG.getConstant(0, dl, MVT::i64));
+}
+
static SDValue performIntrinsicCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
const AArch64Subtarget *Subtarget) {
@@ -7630,6 +7801,18 @@ static SDValue performIntrinsicCombine(SDNode *N,
case Intrinsic::aarch64_neon_vcvtfxu2fp:
return tryCombineFixedPointConvert(N, DCI, DAG);
break;
+ case Intrinsic::aarch64_neon_saddv:
+ return combineAcrossLanesIntrinsic(AArch64ISD::SADDV, N, DAG);
+ case Intrinsic::aarch64_neon_uaddv:
+ return combineAcrossLanesIntrinsic(AArch64ISD::UADDV, N, DAG);
+ case Intrinsic::aarch64_neon_sminv:
+ return combineAcrossLanesIntrinsic(AArch64ISD::SMINV, N, DAG);
+ case Intrinsic::aarch64_neon_uminv:
+ return combineAcrossLanesIntrinsic(AArch64ISD::UMINV, N, DAG);
+ case Intrinsic::aarch64_neon_smaxv:
+ return combineAcrossLanesIntrinsic(AArch64ISD::SMAXV, N, DAG);
+ case Intrinsic::aarch64_neon_umaxv:
+ return combineAcrossLanesIntrinsic(AArch64ISD::UMAXV, N, DAG);
case Intrinsic::aarch64_neon_fmax:
return DAG.getNode(AArch64ISD::FMAX, SDLoc(N), N->getValueType(0),
N->getOperand(1), N->getOperand(2));
@@ -7744,9 +7927,9 @@ static SDValue performExtendCombine(SDNode *N,
EVT InNVT = EVT::getVectorVT(*DAG.getContext(), SrcVT.getVectorElementType(),
LoVT.getVectorNumElements());
Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InNVT, Src,
- DAG.getConstant(0, MVT::i64));
+ DAG.getConstant(0, DL, MVT::i64));
Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InNVT, Src,
- DAG.getConstant(InNVT.getVectorNumElements(), MVT::i64));
+ DAG.getConstant(InNVT.getVectorNumElements(), DL, MVT::i64));
Lo = DAG.getNode(N->getOpcode(), DL, LoVT, Lo);
Hi = DAG.getNode(N->getOpcode(), DL, HiVT, Hi);
@@ -7806,7 +7989,7 @@ static SDValue replaceSplatVectorStore(SelectionDAG &DAG, StoreSDNode *St) {
unsigned Offset = EltOffset;
while (--NumVecElts) {
SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i64, BasePtr,
- DAG.getConstant(Offset, MVT::i64));
+ DAG.getConstant(Offset, DL, MVT::i64));
NewST1 = DAG.getStore(NewST1.getValue(0), DL, SplatVal, OffsetPtr,
St->getPointerInfo(), St->isVolatile(),
St->isNonTemporal(), Alignment);
@@ -7827,14 +8010,13 @@ static SDValue performSTORECombine(SDNode *N,
return SDValue();
// Cyclone has bad performance on unaligned 16B stores when crossing line and
- // page boundries. We want to split such stores.
+ // page boundaries. We want to split such stores.
if (!Subtarget->isCyclone())
return SDValue();
// Don't split at Oz.
MachineFunction &MF = DAG.getMachineFunction();
- bool IsMinSize = MF.getFunction()->getAttributes().hasAttribute(
- AttributeSet::FunctionIndex, Attribute::MinSize);
+ bool IsMinSize = MF.getFunction()->hasFnAttribute(Attribute::MinSize);
if (IsMinSize)
return SDValue();
@@ -7868,15 +8050,15 @@ static SDValue performSTORECombine(SDNode *N,
EVT HalfVT =
EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(), NumElts);
SDValue SubVector0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, StVal,
- DAG.getConstant(0, MVT::i64));
+ DAG.getConstant(0, DL, MVT::i64));
SDValue SubVector1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, StVal,
- DAG.getConstant(NumElts, MVT::i64));
+ DAG.getConstant(NumElts, DL, MVT::i64));
SDValue BasePtr = S->getBasePtr();
SDValue NewST1 =
DAG.getStore(S->getChain(), DL, SubVector0, BasePtr, S->getPointerInfo(),
S->isVolatile(), S->isNonTemporal(), S->getAlignment());
SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i64, BasePtr,
- DAG.getConstant(8, MVT::i64));
+ DAG.getConstant(8, DL, MVT::i64));
return DAG.getStore(NewST1.getValue(0), DL, SubVector1, OffsetPtr,
S->getPointerInfo(), S->isVolatile(), S->isNonTemporal(),
S->getAlignment());
@@ -7944,6 +8126,13 @@ static SDValue performPostLD1Combine(SDNode *N,
Inc = DAG.getRegister(AArch64::XZR, MVT::i64);
}
+ // Finally, check that the vector doesn't depend on the load.
+ // Again, this would create a cycle.
+ // The load depending on the vector is fine, as that's the case for the
+ // LD1*post we'll eventually generate anyway.
+ if (LoadSDN->isPredecessorOf(Vector.getNode()))
+ continue;
+
SmallVector<SDValue, 8> Ops;
Ops.push_back(LD->getOperand(0)); // Chain
if (IsLaneOp) {
@@ -7961,7 +8150,7 @@ static SDValue performPostLD1Combine(SDNode *N,
LoadSDN->getMemOperand());
// Update the uses.
- std::vector<SDValue> NewResults;
+ SmallVector<SDValue, 2> NewResults;
NewResults.push_back(SDValue(LD, 0)); // The result of load
NewResults.push_back(SDValue(UpdN.getNode(), 2)); // Chain
DCI.CombineTo(LD, NewResults);
@@ -8455,13 +8644,21 @@ static SDValue performVSelectCombine(SDNode *N, SelectionDAG &DAG) {
/// the compare-mask instructions rather than going via NZCV, even if LHS and
/// RHS are really scalar. This replaces any scalar setcc in the above pattern
/// with a vector one followed by a DUP shuffle on the result.
-static SDValue performSelectCombine(SDNode *N, SelectionDAG &DAG) {
+static SDValue performSelectCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ SelectionDAG &DAG = DCI.DAG;
SDValue N0 = N->getOperand(0);
EVT ResVT = N->getValueType(0);
- if (N0.getOpcode() != ISD::SETCC || N0.getValueType() != MVT::i1)
+ if (N0.getOpcode() != ISD::SETCC)
return SDValue();
+ // Make sure the SETCC result is either i1 (initial DAG), or i32, the lowered
+ // scalar SetCCResultType. We also don't expect vectors, because we assume
+ // that selects fed by vector SETCCs are canonicalized to VSELECT.
+ assert((N0.getValueType() == MVT::i1 || N0.getValueType() == MVT::i32) &&
+ "Scalar-SETCC feeding SELECT has unexpected result type!");
+
// If NumMaskElts == 0, the comparison is larger than select result. The
// largest real NEON comparison is 64-bits per lane, which means the result is
// at most 32-bits and an illegal vector. Just bail out for now.
@@ -8479,6 +8676,16 @@ static SDValue performSelectCombine(SDNode *N, SelectionDAG &DAG) {
SrcVT = EVT::getVectorVT(*DAG.getContext(), SrcVT, NumMaskElts);
EVT CCVT = SrcVT.changeVectorElementTypeToInteger();
+ // Also bail out if the vector CCVT isn't the same size as ResVT.
+ // This can happen if the SETCC operand size doesn't divide the ResVT size
+ // (e.g., f64 vs v3f32).
+ if (CCVT.getSizeInBits() != ResVT.getSizeInBits())
+ return SDValue();
+
+ // Make sure we didn't create illegal types, if we're not supposed to.
+ assert(DCI.isBeforeLegalize() ||
+ DAG.getTargetLoweringInfo().isTypeLegal(SrcVT));
+
// First perform a vector comparison, where lane 0 is the one we're interested
// in.
SDLoc DL(N0);
@@ -8497,6 +8704,75 @@ static SDValue performSelectCombine(SDNode *N, SelectionDAG &DAG) {
return DAG.getSelect(DL, ResVT, Mask, N->getOperand(1), N->getOperand(2));
}
+/// performSelectCCCombine - Target-specific DAG combining for ISD::SELECT_CC
+/// to match FMIN/FMAX patterns.
+static SDValue performSelectCCCombine(SDNode *N, SelectionDAG &DAG) {
+ // Try to use FMIN/FMAX instructions for FP selects like "x < y ? x : y".
+ // Unless the NoNaNsFPMath option is set, be careful about NaNs:
+ // vmax/vmin return NaN if either operand is a NaN;
+ // only do the transformation when it matches that behavior.
+
+ SDValue CondLHS = N->getOperand(0);
+ SDValue CondRHS = N->getOperand(1);
+ SDValue LHS = N->getOperand(2);
+ SDValue RHS = N->getOperand(3);
+ ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(4))->get();
+
+ unsigned Opcode;
+ bool IsReversed;
+ if (selectCCOpsAreFMaxCompatible(CondLHS, LHS) &&
+ selectCCOpsAreFMaxCompatible(CondRHS, RHS)) {
+ IsReversed = false; // x CC y ? x : y
+ } else if (selectCCOpsAreFMaxCompatible(CondRHS, LHS) &&
+ selectCCOpsAreFMaxCompatible(CondLHS, RHS)) {
+ IsReversed = true ; // x CC y ? y : x
+ } else {
+ return SDValue();
+ }
+
+ bool IsUnordered = false, IsOrEqual;
+ switch (CC) {
+ default:
+ return SDValue();
+ case ISD::SETULT:
+ case ISD::SETULE:
+ IsUnordered = true;
+ case ISD::SETOLT:
+ case ISD::SETOLE:
+ case ISD::SETLT:
+ case ISD::SETLE:
+ IsOrEqual = (CC == ISD::SETLE || CC == ISD::SETOLE || CC == ISD::SETULE);
+ Opcode = IsReversed ? AArch64ISD::FMAX : AArch64ISD::FMIN;
+ break;
+
+ case ISD::SETUGT:
+ case ISD::SETUGE:
+ IsUnordered = true;
+ case ISD::SETOGT:
+ case ISD::SETOGE:
+ case ISD::SETGT:
+ case ISD::SETGE:
+ IsOrEqual = (CC == ISD::SETGE || CC == ISD::SETOGE || CC == ISD::SETUGE);
+ Opcode = IsReversed ? AArch64ISD::FMIN : AArch64ISD::FMAX;
+ break;
+ }
+
+ // If LHS is NaN, an ordered comparison will be false and the result will be
+ // the RHS, but FMIN(NaN, RHS) = FMAX(NaN, RHS) = NaN. Avoid this by checking
+ // that LHS != NaN. Likewise, for unordered comparisons, check for RHS != NaN.
+ if (!DAG.isKnownNeverNaN(IsUnordered ? RHS : LHS))
+ return SDValue();
+
+ // For xxx-or-equal comparisons, "+0 <= -0" and "-0 >= +0" will both be true,
+ // but FMIN will return -0, and FMAX will return +0. So FMIN/FMAX can only be
+ // used for unsafe math or if one of the operands is known to be nonzero.
+ if (IsOrEqual && !DAG.getTarget().Options.UnsafeFPMath &&
+ !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS)))
+ return SDValue();
+
+ return DAG.getNode(Opcode, SDLoc(N), N->getValueType(0), LHS, RHS);
+}
+
SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
@@ -8526,9 +8802,11 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
case ISD::CONCAT_VECTORS:
return performConcatVectorsCombine(N, DCI, DAG);
case ISD::SELECT:
- return performSelectCombine(N, DAG);
+ return performSelectCombine(N, DCI);
case ISD::VSELECT:
return performVSelectCombine(N, DCI.DAG);
+ case ISD::SELECT_CC:
+ return performSelectCCCombine(N, DCI.DAG);
case ISD::STORE:
return performSTORECombine(N, DCI, DAG, Subtarget);
case AArch64ISD::BRCOND:
@@ -8699,7 +8977,7 @@ static void ReplaceBITCASTResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
Op = SDValue(
DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, DL, MVT::f32,
DAG.getUNDEF(MVT::i32), Op,
- DAG.getTargetConstant(AArch64::hsub, MVT::i32)),
+ DAG.getTargetConstant(AArch64::hsub, DL, MVT::i32)),
0);
Op = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Op);
Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, Op));
@@ -8760,9 +9038,11 @@ bool AArch64TargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const {
}
// For the real atomic operations, we have ldxr/stxr up to 128 bits,
-bool AArch64TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
+TargetLoweringBase::AtomicRMWExpansionKind
+AArch64TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
unsigned Size = AI->getType()->getPrimitiveSizeInBits();
- return Size <= 128;
+ return Size <= 128 ? AtomicRMWExpansionKind::LLSC
+ : AtomicRMWExpansionKind::None;
}
bool AArch64TargetLowering::hasLoadLinkedStoreConditional() const {
@@ -8822,7 +9102,7 @@ Value *AArch64TargetLowering::emitStoreConditional(IRBuilder<> &Builder,
Value *Lo = Builder.CreateTrunc(Val, Int64Ty, "lo");
Value *Hi = Builder.CreateTrunc(Builder.CreateLShr(Val, 64), Int64Ty, "hi");
Addr = Builder.CreateBitCast(Addr, Type::getInt8PtrTy(M->getContext()));
- return Builder.CreateCall3(Stxr, Lo, Hi, Addr);
+ return Builder.CreateCall(Stxr, {Lo, Hi, Addr});
}
Intrinsic::ID Int =
@@ -8830,10 +9110,10 @@ Value *AArch64TargetLowering::emitStoreConditional(IRBuilder<> &Builder,
Type *Tys[] = { Addr->getType() };
Function *Stxr = Intrinsic::getDeclaration(M, Int, Tys);
- return Builder.CreateCall2(
- Stxr, Builder.CreateZExtOrBitCast(
- Val, Stxr->getFunctionType()->getParamType(0)),
- Addr);
+ return Builder.CreateCall(Stxr,
+ {Builder.CreateZExtOrBitCast(
+ Val, Stxr->getFunctionType()->getParamType(0)),
+ Addr});
}
bool AArch64TargetLowering::functionArgumentNeedsConsecutiveRegisters(
diff --git a/lib/Target/AArch64/AArch64ISelLowering.h b/lib/Target/AArch64/AArch64ISelLowering.h
index 5a19322..0d9b8b7 100644
--- a/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/lib/Target/AArch64/AArch64ISelLowering.h
@@ -18,13 +18,14 @@
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/Instruction.h"
#include "llvm/Target/TargetLowering.h"
namespace llvm {
namespace AArch64ISD {
-enum {
+enum NodeType : unsigned {
FIRST_NUMBER = ISD::BUILTIN_OP_END,
WrapperLarge, // 4-instruction MOVZ/MOVK sequence for 64-bit addresses.
CALL, // Function call.
@@ -140,6 +141,18 @@ enum {
FCMLEz,
FCMLTz,
+ // Vector across-lanes addition
+ // Only the lower result lane is defined.
+ SADDV,
+ UADDV,
+
+ // Vector across-lanes min/max
+ // Only the lower result lane is defined.
+ SMINV,
+ UMINV,
+ SMAXV,
+ UMAXV,
+
// Vector bitwise negation
NOT,
@@ -207,7 +220,8 @@ class AArch64TargetLowering : public TargetLowering {
bool RequireStrictAlign;
public:
- explicit AArch64TargetLowering(const TargetMachine &TM);
+ explicit AArch64TargetLowering(const TargetMachine &TM,
+ const AArch64Subtarget &STI);
/// Selects the correct CCAssignFn for a given CallingConvention value.
CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg) const;
@@ -222,7 +236,7 @@ public:
MVT getScalarShiftAmountTy(EVT LHSTy) const override;
/// allowsMisalignedMemoryAccesses - Returns true if the target allows
- /// unaligned memory accesses. of the specified type.
+ /// unaligned memory accesses of the specified type.
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace = 0,
unsigned Align = 1,
bool *Fast = nullptr) const override {
@@ -244,10 +258,6 @@ public:
/// getFunctionAlignment - Return the Log2 alignment of this function.
unsigned getFunctionAlignment(const Function *F) const;
- /// getMaximalGlobalOffset - Returns the maximal possible offset which can
- /// be used for loads / stores from the global.
- unsigned getMaximalGlobalOffset() const override;
-
/// Returns true if a cast between SrcAS and DestAS is a noop.
bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override {
// Addrspacecasts are always noops.
@@ -285,6 +295,8 @@ public:
bool isTruncateFree(Type *Ty1, Type *Ty2) const override;
bool isTruncateFree(EVT VT1, EVT VT2) const override;
+ bool isProfitableToHoist(Instruction *I) const override;
+
bool isZExtFree(Type *Ty1, Type *Ty2) const override;
bool isZExtFree(EVT VT1, EVT VT2) const override;
bool isZExtFree(SDValue Val, EVT VT2) const override;
@@ -335,13 +347,16 @@ public:
bool shouldExpandAtomicLoadInIR(LoadInst *LI) const override;
bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override;
- bool shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
+ TargetLoweringBase::AtomicRMWExpansionKind
+ shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
bool useLoadStackGuardNode() const override;
TargetLoweringBase::LegalizeTypeAction
getPreferredVectorAction(EVT VT) const override;
private:
+ bool isExtFreeImpl(const Instruction *Ext) const override;
+
/// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
/// make the right decision when generating code for different targets.
const AArch64Subtarget *Subtarget;
@@ -405,6 +420,9 @@ private:
SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSELECT_CC(ISD::CondCode CC, SDValue LHS, SDValue RHS,
+ SDValue TVal, SDValue FVal, SDLoc dl,
+ SelectionDAG &DAG) const;
SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
@@ -453,12 +471,23 @@ private:
const char *constraint) const override;
std::pair<unsigned, const TargetRegisterClass *>
- getRegForInlineAsmConstraint(const std::string &Constraint,
+ getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
+ const std::string &Constraint,
MVT VT) const override;
void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint,
std::vector<SDValue> &Ops,
SelectionDAG &DAG) const override;
+ unsigned getInlineAsmMemConstraint(
+ const std::string &ConstraintCode) const override {
+ if (ConstraintCode == "Q")
+ return InlineAsm::Constraint_Q;
+ // FIXME: clang has code for 'Ump', 'Utf', 'Usa', and 'Ush' but these are
+ // followed by llvm_unreachable so we'll leave them unimplemented in
+ // the backend for now.
+ return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
+ }
+
bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override;
bool mayBeEmittedAsTailCall(CallInst *CI) const override;
bool getIndexedAddressParts(SDNode *Op, SDValue &Base, SDValue &Offset,
diff --git a/lib/Target/AArch64/AArch64InstrFormats.td b/lib/Target/AArch64/AArch64InstrFormats.td
index d295c02..3b8b668 100644
--- a/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/lib/Target/AArch64/AArch64InstrFormats.td
@@ -441,11 +441,11 @@ def vecshiftL64 : Operand<i32>, ImmLeaf<i32, [{
// instructions for splatting repeating bit patterns across the immediate.
def logical_imm32_XFORM : SDNodeXForm<imm, [{
uint64_t enc = AArch64_AM::encodeLogicalImmediate(N->getZExtValue(), 32);
- return CurDAG->getTargetConstant(enc, MVT::i32);
+ return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i32);
}]>;
def logical_imm64_XFORM : SDNodeXForm<imm, [{
uint64_t enc = AArch64_AM::encodeLogicalImmediate(N->getZExtValue(), 64);
- return CurDAG->getTargetConstant(enc, MVT::i32);
+ return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i32);
}]>;
let DiagnosticType = "LogicalSecondSource" in {
@@ -682,7 +682,7 @@ def fpimm32 : Operand<f32>,
}], SDNodeXForm<fpimm, [{
APFloat InVal = N->getValueAPF();
uint32_t enc = AArch64_AM::getFP32Imm(InVal);
- return CurDAG->getTargetConstant(enc, MVT::i32);
+ return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i32);
}]>> {
let ParserMatchClass = FPImmOperand;
let PrintMethod = "printFPImmOperand";
@@ -693,7 +693,7 @@ def fpimm64 : Operand<f64>,
}], SDNodeXForm<fpimm, [{
APFloat InVal = N->getValueAPF();
uint32_t enc = AArch64_AM::getFP64Imm(InVal);
- return CurDAG->getTargetConstant(enc, MVT::i32);
+ return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i32);
}]>> {
let ParserMatchClass = FPImmOperand;
let PrintMethod = "printFPImmOperand";
@@ -768,7 +768,7 @@ def simdimmtype10 : Operand<i32>,
uint32_t enc = AArch64_AM::encodeAdvSIMDModImmType10(N->getValueAPF()
.bitcastToAPInt()
.getZExtValue());
- return CurDAG->getTargetConstant(enc, MVT::i32);
+ return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i32);
}]>> {
let ParserMatchClass = SIMDImmType10Operand;
let PrintMethod = "printSIMDType10Operand";
@@ -1637,10 +1637,16 @@ multiclass AddSub<bit isSub, string mnemonic,
SDPatternOperator OpNode = null_frag> {
let hasSideEffects = 0, isReMaterializable = 1, isAsCheapAsAMove = 1 in {
// Add/Subtract immediate
+ // Increase the weight of the immediate variant to try to match it before
+ // the extended register variant.
+ // We used to match the register variant before the immediate when the
+ // register argument could be implicitly zero-extended.
+ let AddedComplexity = 6 in
def Wri : BaseAddSubImm<isSub, 0, GPR32sp, GPR32sp, addsub_shifted_imm32,
mnemonic, OpNode> {
let Inst{31} = 0;
}
+ let AddedComplexity = 6 in
def Xri : BaseAddSubImm<isSub, 0, GPR64sp, GPR64sp, addsub_shifted_imm64,
mnemonic, OpNode> {
let Inst{31} = 1;
@@ -2186,7 +2192,8 @@ class BaseCondSelectOp<bit op, bits<2> op2, RegisterClass regtype, string asm,
def inv_cond_XFORM : SDNodeXForm<imm, [{
AArch64CC::CondCode CC = static_cast<AArch64CC::CondCode>(N->getZExtValue());
- return CurDAG->getTargetConstant(AArch64CC::getInvertedCondCode(CC), MVT::i32);
+ return CurDAG->getTargetConstant(AArch64CC::getInvertedCondCode(CC), SDLoc(N),
+ MVT::i32);
}]>;
multiclass CondSelectOp<bit op, bits<2> op2, string asm, PatFrag frag> {
@@ -3282,6 +3289,10 @@ class LoadStoreExclusiveSimple<bits<2> sz, bit o2, bit L, bit o1, bit o0,
: BaseLoadStoreExclusive<sz, o2, L, o1, o0, oops, iops, asm, operands> {
bits<5> Rt;
bits<5> Rn;
+ let Inst{20-16} = 0b11111;
+ let Unpredictable{20-16} = 0b11111;
+ let Inst{14-10} = 0b11111;
+ let Unpredictable{14-10} = 0b11111;
let Inst{9-5} = Rn;
let Inst{4-0} = Rt;
@@ -5298,6 +5309,27 @@ class BaseSIMDThreeScalar<bit U, bits<2> size, bits<5> opcode,
let Inst{4-0} = Rd;
}
+let mayStore = 0, mayLoad = 0, hasSideEffects = 0 in
+class BaseSIMDThreeScalarTied<bit U, bits<2> size, bit R, bits<5> opcode,
+ dag oops, dag iops, string asm,
+ list<dag> pattern>
+ : I<oops, iops, asm, "\t$Rd, $Rn, $Rm", "$Rd = $dst", pattern>,
+ Sched<[WriteV]> {
+ bits<5> Rd;
+ bits<5> Rn;
+ bits<5> Rm;
+ let Inst{31-30} = 0b01;
+ let Inst{29} = U;
+ let Inst{28-24} = 0b11110;
+ let Inst{23-22} = size;
+ let Inst{21} = R;
+ let Inst{20-16} = Rm;
+ let Inst{15-11} = opcode;
+ let Inst{10} = 1;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rd;
+}
+
multiclass SIMDThreeScalarD<bit U, bits<5> opc, string asm,
SDPatternOperator OpNode> {
def v1i64 : BaseSIMDThreeScalar<U, 0b11, opc, FPR64, asm,
@@ -5325,6 +5357,16 @@ multiclass SIMDThreeScalarHS<bit U, bits<5> opc, string asm,
def v1i16 : BaseSIMDThreeScalar<U, 0b01, opc, FPR16, asm, []>;
}
+multiclass SIMDThreeScalarHSTied<bit U, bit R, bits<5> opc, string asm,
+ SDPatternOperator OpNode = null_frag> {
+ def v1i32: BaseSIMDThreeScalarTied<U, 0b10, R, opc, (outs FPR32:$dst),
+ (ins FPR32:$Rd, FPR32:$Rn, FPR32:$Rm),
+ asm, []>;
+ def v1i16: BaseSIMDThreeScalarTied<U, 0b01, R, opc, (outs FPR16:$dst),
+ (ins FPR16:$Rd, FPR16:$Rn, FPR16:$Rm),
+ asm, []>;
+}
+
multiclass SIMDThreeScalarSD<bit U, bit S, bits<5> opc, string asm,
SDPatternOperator OpNode = null_frag> {
let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in {
@@ -5885,7 +5927,7 @@ multiclass SIMDIns {
let Inst{20-18} = idx;
let Inst{17-16} = 0b10;
let Inst{14-12} = idx2;
- let Inst{11} = 0;
+ let Inst{11} = {?};
}
def vi32lane : SIMDInsFromElement<".s", v4i32, i32, VectorIndexS> {
bits<2> idx;
@@ -5893,7 +5935,7 @@ multiclass SIMDIns {
let Inst{20-19} = idx;
let Inst{18-16} = 0b100;
let Inst{14-13} = idx2;
- let Inst{12-11} = 0;
+ let Inst{12-11} = {?,?};
}
def vi64lane : SIMDInsFromElement<".d", v2i64, i64, VectorIndexD> {
bits<1> idx;
@@ -5901,7 +5943,7 @@ multiclass SIMDIns {
let Inst{20} = idx;
let Inst{19-16} = 0b1000;
let Inst{14} = idx2;
- let Inst{13-11} = 0;
+ let Inst{13-11} = {?,?,?};
}
// For all forms of the INS instruction, the "mov" mnemonic is the
@@ -8517,6 +8559,174 @@ multiclass SIMDLdSt4SingleAliases<string asm> {
} // end of 'let Predicates = [HasNEON]'
//----------------------------------------------------------------------------
+// AdvSIMD v8.1 Rounding Double Multiply Add/Subtract
+//----------------------------------------------------------------------------
+
+let Predicates = [HasNEON, HasV8_1a] in {
+
+class BaseSIMDThreeSameVectorTiedR0<bit Q, bit U, bits<2> size, bits<5> opcode,
+ RegisterOperand regtype, string asm,
+ string kind, list<dag> pattern>
+ : BaseSIMDThreeSameVectorTied<Q, U, size, opcode, regtype, asm, kind,
+ pattern> {
+ let Inst{21}=0;
+}
+multiclass SIMDThreeSameVectorSQRDMLxHTiedHS<bit U, bits<5> opc, string asm,
+ SDPatternOperator Accum> {
+ def v4i16 : BaseSIMDThreeSameVectorTiedR0<0, U, 0b01, opc, V64, asm, ".4h",
+ [(set (v4i16 V64:$dst),
+ (Accum (v4i16 V64:$Rd),
+ (v4i16 (int_aarch64_neon_sqrdmulh (v4i16 V64:$Rn),
+ (v4i16 V64:$Rm)))))]>;
+ def v8i16 : BaseSIMDThreeSameVectorTiedR0<1, U, 0b01, opc, V128, asm, ".8h",
+ [(set (v8i16 V128:$dst),
+ (Accum (v8i16 V128:$Rd),
+ (v8i16 (int_aarch64_neon_sqrdmulh (v8i16 V128:$Rn),
+ (v8i16 V128:$Rm)))))]>;
+ def v2i32 : BaseSIMDThreeSameVectorTiedR0<0, U, 0b10, opc, V64, asm, ".2s",
+ [(set (v2i32 V64:$dst),
+ (Accum (v2i32 V64:$Rd),
+ (v2i32 (int_aarch64_neon_sqrdmulh (v2i32 V64:$Rn),
+ (v2i32 V64:$Rm)))))]>;
+ def v4i32 : BaseSIMDThreeSameVectorTiedR0<1, U, 0b10, opc, V128, asm, ".4s",
+ [(set (v4i32 V128:$dst),
+ (Accum (v4i32 V128:$Rd),
+ (v4i32 (int_aarch64_neon_sqrdmulh (v4i32 V128:$Rn),
+ (v4i32 V128:$Rm)))))]>;
+}
+
+multiclass SIMDIndexedSQRDMLxHSDTied<bit U, bits<4> opc, string asm,
+ SDPatternOperator Accum> {
+ def v4i16_indexed : BaseSIMDIndexedTied<0, U, 0, 0b01, opc,
+ V64, V64, V128_lo, VectorIndexH,
+ asm, ".4h", ".4h", ".4h", ".h",
+ [(set (v4i16 V64:$dst),
+ (Accum (v4i16 V64:$Rd),
+ (v4i16 (int_aarch64_neon_sqrdmulh
+ (v4i16 V64:$Rn),
+ (v4i16 (AArch64duplane16 (v8i16 V128_lo:$Rm),
+ VectorIndexH:$idx))))))]> {
+ bits<3> idx;
+ let Inst{11} = idx{2};
+ let Inst{21} = idx{1};
+ let Inst{20} = idx{0};
+ }
+
+ def v8i16_indexed : BaseSIMDIndexedTied<1, U, 0, 0b01, opc,
+ V128, V128, V128_lo, VectorIndexH,
+ asm, ".8h", ".8h", ".8h", ".h",
+ [(set (v8i16 V128:$dst),
+ (Accum (v8i16 V128:$Rd),
+ (v8i16 (int_aarch64_neon_sqrdmulh
+ (v8i16 V128:$Rn),
+ (v8i16 (AArch64duplane16 (v8i16 V128_lo:$Rm),
+ VectorIndexH:$idx))))))]> {
+ bits<3> idx;
+ let Inst{11} = idx{2};
+ let Inst{21} = idx{1};
+ let Inst{20} = idx{0};
+ }
+
+ def v2i32_indexed : BaseSIMDIndexedTied<0, U, 0, 0b10, opc,
+ V64, V64, V128, VectorIndexS,
+ asm, ".2s", ".2s", ".2s", ".s",
+ [(set (v2i32 V64:$dst),
+ (Accum (v2i32 V64:$Rd),
+ (v2i32 (int_aarch64_neon_sqrdmulh
+ (v2i32 V64:$Rn),
+ (v2i32 (AArch64duplane32 (v4i32 V128:$Rm),
+ VectorIndexS:$idx))))))]> {
+ bits<2> idx;
+ let Inst{11} = idx{1};
+ let Inst{21} = idx{0};
+ }
+
+ // FIXME: it would be nice to use the scalar (v1i32) instruction here, but
+ // an intermediate EXTRACT_SUBREG would be untyped.
+ // FIXME: direct EXTRACT_SUBREG from v2i32 to i32 is illegal, that's why we
+ // got it lowered here as (i32 vector_extract (v4i32 insert_subvector(..)))
+ def : Pat<(i32 (Accum (i32 FPR32Op:$Rd),
+ (i32 (vector_extract
+ (v4i32 (insert_subvector
+ (undef),
+ (v2i32 (int_aarch64_neon_sqrdmulh
+ (v2i32 V64:$Rn),
+ (v2i32 (AArch64duplane32
+ (v4i32 V128:$Rm),
+ VectorIndexS:$idx)))),
+ (i32 0))),
+ (i64 0))))),
+ (EXTRACT_SUBREG
+ (v2i32 (!cast<Instruction>(NAME # v2i32_indexed)
+ (v2i32 (INSERT_SUBREG (v2i32 (IMPLICIT_DEF)),
+ FPR32Op:$Rd,
+ ssub)),
+ V64:$Rn,
+ V128:$Rm,
+ VectorIndexS:$idx)),
+ ssub)>;
+
+ def v4i32_indexed : BaseSIMDIndexedTied<1, U, 0, 0b10, opc,
+ V128, V128, V128, VectorIndexS,
+ asm, ".4s", ".4s", ".4s", ".s",
+ [(set (v4i32 V128:$dst),
+ (Accum (v4i32 V128:$Rd),
+ (v4i32 (int_aarch64_neon_sqrdmulh
+ (v4i32 V128:$Rn),
+ (v4i32 (AArch64duplane32 (v4i32 V128:$Rm),
+ VectorIndexS:$idx))))))]> {
+ bits<2> idx;
+ let Inst{11} = idx{1};
+ let Inst{21} = idx{0};
+ }
+
+ // FIXME: it would be nice to use the scalar (v1i32) instruction here, but
+ // an intermediate EXTRACT_SUBREG would be untyped.
+ def : Pat<(i32 (Accum (i32 FPR32Op:$Rd),
+ (i32 (vector_extract
+ (v4i32 (int_aarch64_neon_sqrdmulh
+ (v4i32 V128:$Rn),
+ (v4i32 (AArch64duplane32
+ (v4i32 V128:$Rm),
+ VectorIndexS:$idx)))),
+ (i64 0))))),
+ (EXTRACT_SUBREG
+ (v4i32 (!cast<Instruction>(NAME # v4i32_indexed)
+ (v4i32 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)),
+ FPR32Op:$Rd,
+ ssub)),
+ V128:$Rn,
+ V128:$Rm,
+ VectorIndexS:$idx)),
+ ssub)>;
+
+ def i16_indexed : BaseSIMDIndexedTied<1, U, 1, 0b01, opc,
+ FPR16Op, FPR16Op, V128_lo,
+ VectorIndexH, asm, ".h", "", "", ".h",
+ []> {
+ bits<3> idx;
+ let Inst{11} = idx{2};
+ let Inst{21} = idx{1};
+ let Inst{20} = idx{0};
+ }
+
+ def i32_indexed : BaseSIMDIndexedTied<1, U, 1, 0b10, opc,
+ FPR32Op, FPR32Op, V128, VectorIndexS,
+ asm, ".s", "", "", ".s",
+ [(set (i32 FPR32Op:$dst),
+ (Accum (i32 FPR32Op:$Rd),
+ (i32 (int_aarch64_neon_sqrdmulh
+ (i32 FPR32Op:$Rn),
+ (i32 (vector_extract (v4i32 V128:$Rm),
+ VectorIndexS:$idx))))))]> {
+ bits<2> idx;
+ let Inst{11} = idx{1};
+ let Inst{21} = idx{0};
+ }
+}
+} // let Predicates = [HasNeon, HasV8_1a]
+
+//----------------------------------------------------------------------------
// Crypto extensions
//----------------------------------------------------------------------------
diff --git a/lib/Target/AArch64/AArch64InstrInfo.cpp b/lib/Target/AArch64/AArch64InstrInfo.cpp
index e582ed4..207c34c 100644
--- a/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -31,7 +31,7 @@ using namespace llvm;
AArch64InstrInfo::AArch64InstrInfo(const AArch64Subtarget &STI)
: AArch64GenInstrInfo(AArch64::ADJCALLSTACKDOWN, AArch64::ADJCALLSTACKUP),
- RI(this, &STI), Subtarget(STI) {}
+ RI(STI.getTargetTriple()), Subtarget(STI) {}
/// GetInstSize - Return the number of bytes of code the specified
/// instruction may be. This returns the maximum number of bytes.
@@ -617,10 +617,8 @@ AArch64InstrInfo::areMemAccessesTriviallyDisjoint(MachineInstr *MIa,
int OffsetA = 0, OffsetB = 0;
int WidthA = 0, WidthB = 0;
- assert(MIa && (MIa->mayLoad() || MIa->mayStore()) &&
- "MIa must be a store or a load");
- assert(MIb && (MIb->mayLoad() || MIb->mayStore()) &&
- "MIb must be a store or a load");
+ assert(MIa && MIa->mayLoadOrStore() && "MIa must be a load or store.");
+ assert(MIb && MIb->mayLoadOrStore() && "MIb must be a load or store.");
if (MIa->hasUnmodeledSideEffects() || MIb->hasUnmodeledSideEffects() ||
MIa->hasOrderedMemoryRef() || MIb->hasOrderedMemoryRef())
@@ -707,9 +705,8 @@ static bool UpdateOperandRegClass(MachineInstr *Instr) {
assert(MBB && "Can't get MachineBasicBlock here");
MachineFunction *MF = MBB->getParent();
assert(MF && "Can't get MachineFunction here");
- const TargetMachine *TM = &MF->getTarget();
- const TargetInstrInfo *TII = TM->getSubtargetImpl()->getInstrInfo();
- const TargetRegisterInfo *TRI = TM->getSubtargetImpl()->getRegisterInfo();
+ const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
+ const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
MachineRegisterInfo *MRI = &MF->getRegInfo();
for (unsigned OpIdx = 0, EndIdx = Instr->getNumOperands(); OpIdx < EndIdx;
@@ -1527,7 +1524,7 @@ void AArch64InstrInfo::copyPhysRegTuple(
}
for (; SubReg != End; SubReg += Incr) {
- const MachineInstrBuilder &MIB = BuildMI(MBB, I, DL, get(Opcode));
+ const MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opcode));
AddSubReg(MIB, DestReg, Indices[SubReg], RegState::Define, TRI);
AddSubReg(MIB, SrcReg, Indices[SubReg], 0, TRI);
AddSubReg(MIB, SrcReg, Indices[SubReg], getKillRegState(KillSrc), TRI);
@@ -1905,7 +1902,7 @@ void AArch64InstrInfo::storeRegToStackSlot(
}
assert(Opc && "Unknown register class");
- const MachineInstrBuilder &MI = BuildMI(MBB, MBBI, DL, get(Opc))
+ const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DL, get(Opc))
.addReg(SrcReg, getKillRegState(isKill))
.addFrameIndex(FI);
@@ -2003,7 +2000,7 @@ void AArch64InstrInfo::loadRegFromStackSlot(
}
assert(Opc && "Unknown register class");
- const MachineInstrBuilder &MI = BuildMI(MBB, MBBI, DL, get(Opc))
+ const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DL, get(Opc))
.addReg(DestReg, getDefRegState(true))
.addFrameIndex(FI);
if (Offset)
@@ -2069,10 +2066,10 @@ void llvm::emitFrameOffset(MachineBasicBlock &MBB,
.setMIFlag(Flag);
}
-MachineInstr *
-AArch64InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
- const SmallVectorImpl<unsigned> &Ops,
- int FrameIndex) const {
+MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
+ MachineInstr *MI,
+ ArrayRef<unsigned> Ops,
+ int FrameIndex) const {
// This is a bit of a hack. Consider this instruction:
//
// %vreg0<def> = COPY %SP; GPR64all:%vreg0
@@ -2367,7 +2364,7 @@ bool llvm::rewriteAArch64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
void AArch64InstrInfo::getNoopForMachoTarget(MCInst &NopInst) const {
NopInst.setOpcode(AArch64::HINT);
- NopInst.addOperand(MCOperand::CreateImm(0));
+ NopInst.addOperand(MCOperand::createImm(0));
}
/// useMachineCombiner - return true when a target supports MachineCombiner
bool AArch64InstrInfo::useMachineCombiner() const {
diff --git a/lib/Target/AArch64/AArch64InstrInfo.h b/lib/Target/AArch64/AArch64InstrInfo.h
index d8f1274..fa4b8b7 100644
--- a/lib/Target/AArch64/AArch64InstrInfo.h
+++ b/lib/Target/AArch64/AArch64InstrInfo.h
@@ -129,10 +129,9 @@ public:
const TargetRegisterInfo *TRI) const override;
using TargetInstrInfo::foldMemoryOperandImpl;
- MachineInstr *
- foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
- const SmallVectorImpl<unsigned> &Ops,
- int FrameIndex) const override;
+ MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
+ ArrayRef<unsigned> Ops,
+ int FrameIndex) const override;
bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
MachineBasicBlock *&FBB,
diff --git a/lib/Target/AArch64/AArch64InstrInfo.td b/lib/Target/AArch64/AArch64InstrInfo.td
index a6f09e9..c7d6a69 100644
--- a/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/lib/Target/AArch64/AArch64InstrInfo.td
@@ -14,6 +14,8 @@
//===----------------------------------------------------------------------===//
// ARM Instruction Predicate Definitions.
//
+def HasV8_1a : Predicate<"Subtarget->hasV8_1aOps()">,
+ AssemblerPredicate<"HasV8_1aOps", "armv8.1a">;
def HasFPARMv8 : Predicate<"Subtarget->hasFPARMv8()">,
AssemblerPredicate<"FeatureFPARMv8", "fp-armv8">;
def HasNEON : Predicate<"Subtarget->hasNEON()">,
@@ -258,6 +260,13 @@ def SDT_AArch64mull : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>,
def AArch64smull : SDNode<"AArch64ISD::SMULL", SDT_AArch64mull>;
def AArch64umull : SDNode<"AArch64ISD::UMULL", SDT_AArch64mull>;
+def AArch64saddv : SDNode<"AArch64ISD::SADDV", SDT_AArch64UnaryVec>;
+def AArch64uaddv : SDNode<"AArch64ISD::UADDV", SDT_AArch64UnaryVec>;
+def AArch64sminv : SDNode<"AArch64ISD::SMINV", SDT_AArch64UnaryVec>;
+def AArch64uminv : SDNode<"AArch64ISD::UMINV", SDT_AArch64UnaryVec>;
+def AArch64smaxv : SDNode<"AArch64ISD::SMAXV", SDT_AArch64UnaryVec>;
+def AArch64umaxv : SDNode<"AArch64ISD::UMAXV", SDT_AArch64UnaryVec>;
+
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
@@ -489,7 +498,7 @@ def i64imm_32bit : ImmLeaf<i64, [{
}]>;
def trunc_imm : SDNodeXForm<imm, [{
- return CurDAG->getTargetConstant(N->getZExtValue(), MVT::i32);
+ return CurDAG->getTargetConstant(N->getZExtValue(), SDLoc(N), MVT::i32);
}]>;
def : Pat<(i64 i64imm_32bit:$src),
@@ -498,12 +507,12 @@ def : Pat<(i64 i64imm_32bit:$src),
// Materialize FP constants via MOVi32imm/MOVi64imm (MachO large code model).
def bitcast_fpimm_to_i32 : SDNodeXForm<fpimm, [{
return CurDAG->getTargetConstant(
- N->getValueAPF().bitcastToAPInt().getZExtValue(), MVT::i32);
+ N->getValueAPF().bitcastToAPInt().getZExtValue(), SDLoc(N), MVT::i32);
}]>;
def bitcast_fpimm_to_i64 : SDNodeXForm<fpimm, [{
return CurDAG->getTargetConstant(
- N->getValueAPF().bitcastToAPInt().getZExtValue(), MVT::i64);
+ N->getValueAPF().bitcastToAPInt().getZExtValue(), SDLoc(N), MVT::i64);
}]>;
@@ -848,57 +857,57 @@ defm UBFM : BitfieldImm<0b10, "ubfm">;
def i32shift_a : Operand<i64>, SDNodeXForm<imm, [{
uint64_t enc = (32 - N->getZExtValue()) & 0x1f;
- return CurDAG->getTargetConstant(enc, MVT::i64);
+ return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64);
}]>;
def i32shift_b : Operand<i64>, SDNodeXForm<imm, [{
uint64_t enc = 31 - N->getZExtValue();
- return CurDAG->getTargetConstant(enc, MVT::i64);
+ return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64);
}]>;
// min(7, 31 - shift_amt)
def i32shift_sext_i8 : Operand<i64>, SDNodeXForm<imm, [{
uint64_t enc = 31 - N->getZExtValue();
enc = enc > 7 ? 7 : enc;
- return CurDAG->getTargetConstant(enc, MVT::i64);
+ return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64);
}]>;
// min(15, 31 - shift_amt)
def i32shift_sext_i16 : Operand<i64>, SDNodeXForm<imm, [{
uint64_t enc = 31 - N->getZExtValue();
enc = enc > 15 ? 15 : enc;
- return CurDAG->getTargetConstant(enc, MVT::i64);
+ return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64);
}]>;
def i64shift_a : Operand<i64>, SDNodeXForm<imm, [{
uint64_t enc = (64 - N->getZExtValue()) & 0x3f;
- return CurDAG->getTargetConstant(enc, MVT::i64);
+ return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64);
}]>;
def i64shift_b : Operand<i64>, SDNodeXForm<imm, [{
uint64_t enc = 63 - N->getZExtValue();
- return CurDAG->getTargetConstant(enc, MVT::i64);
+ return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64);
}]>;
// min(7, 63 - shift_amt)
def i64shift_sext_i8 : Operand<i64>, SDNodeXForm<imm, [{
uint64_t enc = 63 - N->getZExtValue();
enc = enc > 7 ? 7 : enc;
- return CurDAG->getTargetConstant(enc, MVT::i64);
+ return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64);
}]>;
// min(15, 63 - shift_amt)
def i64shift_sext_i16 : Operand<i64>, SDNodeXForm<imm, [{
uint64_t enc = 63 - N->getZExtValue();
enc = enc > 15 ? 15 : enc;
- return CurDAG->getTargetConstant(enc, MVT::i64);
+ return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64);
}]>;
// min(31, 63 - shift_amt)
def i64shift_sext_i32 : Operand<i64>, SDNodeXForm<imm, [{
uint64_t enc = 63 - N->getZExtValue();
enc = enc > 31 ? 31 : enc;
- return CurDAG->getTargetConstant(enc, MVT::i64);
+ return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64);
}]>;
def : Pat<(shl GPR32:$Rn, (i64 imm0_31:$imm)),
@@ -2305,6 +2314,20 @@ def STLXPX : StoreExclusivePair<0b11, 0, 0, 1, 1, GPR64, "stlxp">;
def STXPW : StoreExclusivePair<0b10, 0, 0, 1, 0, GPR32, "stxp">;
def STXPX : StoreExclusivePair<0b11, 0, 0, 1, 0, GPR64, "stxp">;
+let Predicates = [HasV8_1a] in {
+ // v8.1a "Limited Order Region" extension load-acquire instructions
+ def LDLARW : LoadAcquire <0b10, 1, 1, 0, 0, GPR32, "ldlar">;
+ def LDLARX : LoadAcquire <0b11, 1, 1, 0, 0, GPR64, "ldlar">;
+ def LDLARB : LoadAcquire <0b00, 1, 1, 0, 0, GPR32, "ldlarb">;
+ def LDLARH : LoadAcquire <0b01, 1, 1, 0, 0, GPR32, "ldlarh">;
+
+ // v8.1a "Limited Order Region" extension store-release instructions
+ def STLLRW : StoreRelease <0b10, 1, 0, 0, 0, GPR32, "stllr">;
+ def STLLRX : StoreRelease <0b11, 1, 0, 0, 0, GPR64, "stllr">;
+ def STLLRB : StoreRelease <0b00, 1, 0, 0, 0, GPR32, "stllrb">;
+ def STLLRH : StoreRelease <0b01, 1, 0, 0, 0, GPR32, "stllrh">;
+}
+
//===----------------------------------------------------------------------===//
// Scaled floating point to integer conversion instructions.
//===----------------------------------------------------------------------===//
@@ -2341,8 +2364,15 @@ defm UCVTF : IntegerToFP<1, "ucvtf", uint_to_fp>;
defm FMOV : UnscaledConversion<"fmov">;
-def : Pat<(f32 (fpimm0)), (FMOVWSr WZR)>, Requires<[NoZCZ]>;
-def : Pat<(f64 (fpimm0)), (FMOVXDr XZR)>, Requires<[NoZCZ]>;
+// Add pseudo ops for FMOV 0 so we can mark them as isReMaterializable
+let isReMaterializable = 1, isCodeGenOnly = 1 in {
+def FMOVS0 : Pseudo<(outs FPR32:$Rd), (ins), [(set f32:$Rd, (fpimm0))]>,
+ PseudoInstExpansion<(FMOVWSr FPR32:$Rd, WZR)>,
+ Requires<[NoZCZ]>;
+def FMOVD0 : Pseudo<(outs FPR64:$Rd), (ins), [(set f64:$Rd, (fpimm0))]>,
+ PseudoInstExpansion<(FMOVXDr FPR64:$Rd, XZR)>,
+ Requires<[NoZCZ]>;
+}
//===----------------------------------------------------------------------===//
// Floating point conversion instruction.
@@ -2762,6 +2792,10 @@ defm UQSUB : SIMDThreeSameVector<1,0b00101,"uqsub", int_aarch64_neon_uqsub>;
defm URHADD : SIMDThreeSameVectorBHS<1,0b00010,"urhadd", int_aarch64_neon_urhadd>;
defm URSHL : SIMDThreeSameVector<1,0b01010,"urshl", int_aarch64_neon_urshl>;
defm USHL : SIMDThreeSameVector<1,0b01000,"ushl", int_aarch64_neon_ushl>;
+defm SQRDMLAH : SIMDThreeSameVectorSQRDMLxHTiedHS<1,0b10000,"sqrdmlah",
+ int_aarch64_neon_sqadd>;
+defm SQRDMLSH : SIMDThreeSameVectorSQRDMLxHTiedHS<1,0b10001,"sqrdmlsh",
+ int_aarch64_neon_sqsub>;
defm AND : SIMDLogicalThreeVector<0, 0b00, "and", and>;
defm BIC : SIMDLogicalThreeVector<0, 0b01, "bic",
@@ -2775,6 +2809,55 @@ defm ORN : SIMDLogicalThreeVector<0, 0b11, "orn",
BinOpFrag<(or node:$LHS, (vnot node:$RHS))> >;
defm ORR : SIMDLogicalThreeVector<0, 0b10, "orr", or>;
+def : Pat<(v8i8 (smin V64:$Rn, V64:$Rm)),
+ (SMINv8i8 V64:$Rn, V64:$Rm)>;
+def : Pat<(v4i16 (smin V64:$Rn, V64:$Rm)),
+ (SMINv4i16 V64:$Rn, V64:$Rm)>;
+def : Pat<(v2i32 (smin V64:$Rn, V64:$Rm)),
+ (SMINv2i32 V64:$Rn, V64:$Rm)>;
+def : Pat<(v16i8 (smin V128:$Rn, V128:$Rm)),
+ (SMINv16i8 V128:$Rn, V128:$Rm)>;
+def : Pat<(v8i16 (smin V128:$Rn, V128:$Rm)),
+ (SMINv8i16 V128:$Rn, V128:$Rm)>;
+def : Pat<(v4i32 (smin V128:$Rn, V128:$Rm)),
+ (SMINv4i32 V128:$Rn, V128:$Rm)>;
+def : Pat<(v8i8 (smax V64:$Rn, V64:$Rm)),
+ (SMAXv8i8 V64:$Rn, V64:$Rm)>;
+def : Pat<(v4i16 (smax V64:$Rn, V64:$Rm)),
+ (SMAXv4i16 V64:$Rn, V64:$Rm)>;
+def : Pat<(v2i32 (smax V64:$Rn, V64:$Rm)),
+ (SMAXv2i32 V64:$Rn, V64:$Rm)>;
+def : Pat<(v16i8 (smax V128:$Rn, V128:$Rm)),
+ (SMAXv16i8 V128:$Rn, V128:$Rm)>;
+def : Pat<(v8i16 (smax V128:$Rn, V128:$Rm)),
+ (SMAXv8i16 V128:$Rn, V128:$Rm)>;
+def : Pat<(v4i32 (smax V128:$Rn, V128:$Rm)),
+ (SMAXv4i32 V128:$Rn, V128:$Rm)>;
+def : Pat<(v8i8 (umin V64:$Rn, V64:$Rm)),
+ (UMINv8i8 V64:$Rn, V64:$Rm)>;
+def : Pat<(v4i16 (umin V64:$Rn, V64:$Rm)),
+ (UMINv4i16 V64:$Rn, V64:$Rm)>;
+def : Pat<(v2i32 (umin V64:$Rn, V64:$Rm)),
+ (UMINv2i32 V64:$Rn, V64:$Rm)>;
+def : Pat<(v16i8 (umin V128:$Rn, V128:$Rm)),
+ (UMINv16i8 V128:$Rn, V128:$Rm)>;
+def : Pat<(v8i16 (umin V128:$Rn, V128:$Rm)),
+ (UMINv8i16 V128:$Rn, V128:$Rm)>;
+def : Pat<(v4i32 (umin V128:$Rn, V128:$Rm)),
+ (UMINv4i32 V128:$Rn, V128:$Rm)>;
+def : Pat<(v8i8 (umax V64:$Rn, V64:$Rm)),
+ (UMAXv8i8 V64:$Rn, V64:$Rm)>;
+def : Pat<(v4i16 (umax V64:$Rn, V64:$Rm)),
+ (UMAXv4i16 V64:$Rn, V64:$Rm)>;
+def : Pat<(v2i32 (umax V64:$Rn, V64:$Rm)),
+ (UMAXv2i32 V64:$Rn, V64:$Rm)>;
+def : Pat<(v16i8 (umax V128:$Rn, V128:$Rm)),
+ (UMAXv16i8 V128:$Rn, V128:$Rm)>;
+def : Pat<(v8i16 (umax V128:$Rn, V128:$Rm)),
+ (UMAXv8i16 V128:$Rn, V128:$Rm)>;
+def : Pat<(v4i32 (umax V128:$Rn, V128:$Rm)),
+ (UMAXv4i32 V128:$Rn, V128:$Rm)>;
+
def : Pat<(AArch64bsl (v8i8 V64:$Rd), V64:$Rn, V64:$Rm),
(BSLv8i8 V64:$Rd, V64:$Rn, V64:$Rm)>;
def : Pat<(AArch64bsl (v4i16 V64:$Rd), V64:$Rn, V64:$Rm),
@@ -2978,6 +3061,20 @@ defm UQSHL : SIMDThreeScalarBHSD<1, 0b01001, "uqshl", int_aarch64_neon_uqshl>
defm UQSUB : SIMDThreeScalarBHSD<1, 0b00101, "uqsub", int_aarch64_neon_uqsub>;
defm URSHL : SIMDThreeScalarD< 1, 0b01010, "urshl", int_aarch64_neon_urshl>;
defm USHL : SIMDThreeScalarD< 1, 0b01000, "ushl", int_aarch64_neon_ushl>;
+let Predicates = [HasV8_1a] in {
+ defm SQRDMLAH : SIMDThreeScalarHSTied<1, 0, 0b10000, "sqrdmlah">;
+ defm SQRDMLSH : SIMDThreeScalarHSTied<1, 0, 0b10001, "sqrdmlsh">;
+ def : Pat<(i32 (int_aarch64_neon_sqadd
+ (i32 FPR32:$Rd),
+ (i32 (int_aarch64_neon_sqrdmulh (i32 FPR32:$Rn),
+ (i32 FPR32:$Rm))))),
+ (SQRDMLAHv1i32 FPR32:$Rd, FPR32:$Rn, FPR32:$Rm)>;
+ def : Pat<(i32 (int_aarch64_neon_sqsub
+ (i32 FPR32:$Rd),
+ (i32 (int_aarch64_neon_sqrdmulh (i32 FPR32:$Rn),
+ (i32 FPR32:$Rm))))),
+ (SQRDMLSHv1i32 FPR32:$Rd, FPR32:$Rn, FPR32:$Rm)>;
+}
def : InstAlias<"cmls $dst, $src1, $src2",
(CMHSv1i64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>;
@@ -3431,10 +3528,10 @@ defm FMAXNMP : SIMDPairwiseScalarSD<1, 0, 0b01100, "fmaxnmp">;
defm FMAXP : SIMDPairwiseScalarSD<1, 0, 0b01111, "fmaxp">;
defm FMINNMP : SIMDPairwiseScalarSD<1, 1, 0b01100, "fminnmp">;
defm FMINP : SIMDPairwiseScalarSD<1, 1, 0b01111, "fminp">;
-def : Pat<(i64 (int_aarch64_neon_saddv (v2i64 V128:$Rn))),
- (ADDPv2i64p V128:$Rn)>;
-def : Pat<(i64 (int_aarch64_neon_uaddv (v2i64 V128:$Rn))),
- (ADDPv2i64p V128:$Rn)>;
+def : Pat<(v2i64 (AArch64saddv V128:$Rn)),
+ (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), (ADDPv2i64p V128:$Rn), dsub)>;
+def : Pat<(v2i64 (AArch64uaddv V128:$Rn)),
+ (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), (ADDPv2i64p V128:$Rn), dsub)>;
def : Pat<(f32 (int_aarch64_neon_faddv (v2f32 V64:$Rn))),
(FADDPv2i32p V64:$Rn)>;
def : Pat<(f32 (int_aarch64_neon_faddv (v4f32 V128:$Rn))),
@@ -3462,13 +3559,13 @@ def : Pat<(f64 (int_aarch64_neon_fminv (v2f64 V128:$Rn))),
// AdvSIMD INS/DUP instructions
//----------------------------------------------------------------------------
-def DUPv8i8gpr : SIMDDupFromMain<0, 0b00001, ".8b", v8i8, V64, GPR32>;
-def DUPv16i8gpr : SIMDDupFromMain<1, 0b00001, ".16b", v16i8, V128, GPR32>;
-def DUPv4i16gpr : SIMDDupFromMain<0, 0b00010, ".4h", v4i16, V64, GPR32>;
-def DUPv8i16gpr : SIMDDupFromMain<1, 0b00010, ".8h", v8i16, V128, GPR32>;
-def DUPv2i32gpr : SIMDDupFromMain<0, 0b00100, ".2s", v2i32, V64, GPR32>;
-def DUPv4i32gpr : SIMDDupFromMain<1, 0b00100, ".4s", v4i32, V128, GPR32>;
-def DUPv2i64gpr : SIMDDupFromMain<1, 0b01000, ".2d", v2i64, V128, GPR64>;
+def DUPv8i8gpr : SIMDDupFromMain<0, {?,?,?,?,1}, ".8b", v8i8, V64, GPR32>;
+def DUPv16i8gpr : SIMDDupFromMain<1, {?,?,?,?,1}, ".16b", v16i8, V128, GPR32>;
+def DUPv4i16gpr : SIMDDupFromMain<0, {?,?,?,1,0}, ".4h", v4i16, V64, GPR32>;
+def DUPv8i16gpr : SIMDDupFromMain<1, {?,?,?,1,0}, ".8h", v8i16, V128, GPR32>;
+def DUPv2i32gpr : SIMDDupFromMain<0, {?,?,1,0,0}, ".2s", v2i32, V64, GPR32>;
+def DUPv4i32gpr : SIMDDupFromMain<1, {?,?,1,0,0}, ".4s", v4i32, V128, GPR32>;
+def DUPv2i64gpr : SIMDDupFromMain<1, {?,1,0,0,0}, ".2d", v2i64, V128, GPR64>;
def DUPv2i64lane : SIMDDup64FromElement;
def DUPv2i32lane : SIMDDup32FromElement<0, ".2s", v2i32, V64>;
@@ -3515,13 +3612,13 @@ def : Pat<(v2f64 (AArch64duplane64 (v2f64 V128:$Rn), VectorIndexD:$imm)),
// instruction even if the types don't match: we just have to remap the lane
// carefully. N.b. this trick only applies to truncations.
def VecIndex_x2 : SDNodeXForm<imm, [{
- return CurDAG->getTargetConstant(2 * N->getZExtValue(), MVT::i64);
+ return CurDAG->getTargetConstant(2 * N->getZExtValue(), SDLoc(N), MVT::i64);
}]>;
def VecIndex_x4 : SDNodeXForm<imm, [{
- return CurDAG->getTargetConstant(4 * N->getZExtValue(), MVT::i64);
+ return CurDAG->getTargetConstant(4 * N->getZExtValue(), SDLoc(N), MVT::i64);
}]>;
def VecIndex_x8 : SDNodeXForm<imm, [{
- return CurDAG->getTargetConstant(8 * N->getZExtValue(), MVT::i64);
+ return CurDAG->getTargetConstant(8 * N->getZExtValue(), SDLoc(N), MVT::i64);
}]>;
multiclass DUPWithTruncPats<ValueType ResVT, ValueType Src64VT,
@@ -3724,36 +3821,24 @@ multiclass Neon_INS_elt_pattern<ValueType VT128, ValueType VT64,
defm : Neon_INS_elt_pattern<v8f16, v4f16, f16, INSvi16lane>;
defm : Neon_INS_elt_pattern<v4f32, v2f32, f32, INSvi32lane>;
defm : Neon_INS_elt_pattern<v2f64, v1f64, f64, INSvi64lane>;
-defm : Neon_INS_elt_pattern<v16i8, v8i8, i32, INSvi8lane>;
-defm : Neon_INS_elt_pattern<v8i16, v4i16, i32, INSvi16lane>;
-defm : Neon_INS_elt_pattern<v4i32, v2i32, i32, INSvi32lane>;
-defm : Neon_INS_elt_pattern<v2i64, v1i64, i64, INSvi32lane>;
// Floating point vector extractions are codegen'd as either a sequence of
-// subregister extractions, possibly fed by an INS if the lane number is
-// anything other than zero.
+// subregister extractions, or a MOV (aka CPY here, alias for DUP) if
+// the lane number is anything other than zero.
def : Pat<(vector_extract (v2f64 V128:$Rn), 0),
(f64 (EXTRACT_SUBREG V128:$Rn, dsub))>;
def : Pat<(vector_extract (v4f32 V128:$Rn), 0),
(f32 (EXTRACT_SUBREG V128:$Rn, ssub))>;
def : Pat<(vector_extract (v8f16 V128:$Rn), 0),
(f16 (EXTRACT_SUBREG V128:$Rn, hsub))>;
+
def : Pat<(vector_extract (v2f64 V128:$Rn), VectorIndexD:$idx),
- (f64 (EXTRACT_SUBREG
- (INSvi64lane (v2f64 (IMPLICIT_DEF)), 0,
- V128:$Rn, VectorIndexD:$idx),
- dsub))>;
+ (f64 (CPYi64 V128:$Rn, VectorIndexD:$idx))>;
def : Pat<(vector_extract (v4f32 V128:$Rn), VectorIndexS:$idx),
- (f32 (EXTRACT_SUBREG
- (INSvi32lane (v4f32 (IMPLICIT_DEF)), 0,
- V128:$Rn, VectorIndexS:$idx),
- ssub))>;
+ (f32 (CPYi32 V128:$Rn, VectorIndexS:$idx))>;
def : Pat<(vector_extract (v8f16 V128:$Rn), VectorIndexH:$idx),
- (f16 (EXTRACT_SUBREG
- (INSvi16lane (v8f16 (IMPLICIT_DEF)), 0,
- V128:$Rn, VectorIndexH:$idx),
- hsub))>;
+ (f16 (CPYi16 V128:$Rn, VectorIndexH:$idx))>;
// All concat_vectors operations are canonicalised to act on i64 vectors for
// AArch64. In the general case we need an instruction, which had just as well be
@@ -3799,121 +3884,143 @@ defm FMAXV : SIMDAcrossLanesS<0b01111, 0, "fmaxv", int_aarch64_neon_fmaxv>;
defm FMINNMV : SIMDAcrossLanesS<0b01100, 1, "fminnmv", int_aarch64_neon_fminnmv>;
defm FMINV : SIMDAcrossLanesS<0b01111, 1, "fminv", int_aarch64_neon_fminv>;
-multiclass SIMDAcrossLanesSignedIntrinsic<string baseOpc, Intrinsic intOp> {
-// If there is a sign extension after this intrinsic, consume it as smov already
-// performed it
- def : Pat<(i32 (sext_inreg (i32 (intOp (v8i8 V64:$Rn))), i8)),
- (i32 (SMOVvi8to32
+// Patterns for across-vector intrinsics, that have a node equivalent, that
+// returns a vector (with only the low lane defined) instead of a scalar.
+// In effect, opNode is the same as (scalar_to_vector (IntNode)).
+multiclass SIMDAcrossLanesIntrinsic<string baseOpc,
+ SDPatternOperator opNode> {
+// If a lane instruction caught the vector_extract around opNode, we can
+// directly match the latter to the instruction.
+def : Pat<(v8i8 (opNode V64:$Rn)),
+ (INSERT_SUBREG (v8i8 (IMPLICIT_DEF)),
+ (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub)>;
+def : Pat<(v16i8 (opNode V128:$Rn)),
(INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
- (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub),
- (i64 0)))>;
- def : Pat<(i32 (intOp (v8i8 V64:$Rn))),
- (i32 (SMOVvi8to32
- (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
- (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub),
- (i64 0)))>;
-// If there is a sign extension after this intrinsic, consume it as smov already
-// performed it
-def : Pat<(i32 (sext_inreg (i32 (intOp (v16i8 V128:$Rn))), i8)),
- (i32 (SMOVvi8to32
- (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
- (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub),
- (i64 0)))>;
-def : Pat<(i32 (intOp (v16i8 V128:$Rn))),
- (i32 (SMOVvi8to32
- (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
- (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub),
- (i64 0)))>;
+ (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub)>;
+def : Pat<(v4i16 (opNode V64:$Rn)),
+ (INSERT_SUBREG (v4i16 (IMPLICIT_DEF)),
+ (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), hsub)>;
+def : Pat<(v8i16 (opNode V128:$Rn)),
+ (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)),
+ (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), hsub)>;
+def : Pat<(v4i32 (opNode V128:$Rn)),
+ (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)),
+ (!cast<Instruction>(!strconcat(baseOpc, "v4i32v")) V128:$Rn), ssub)>;
+
+
+// If none did, fallback to the explicit patterns, consuming the vector_extract.
+def : Pat<(i32 (vector_extract (insert_subvector undef, (v8i8 (opNode V64:$Rn)),
+ (i32 0)), (i64 0))),
+ (EXTRACT_SUBREG (INSERT_SUBREG (v8i8 (IMPLICIT_DEF)),
+ (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn),
+ bsub), ssub)>;
+def : Pat<(i32 (vector_extract (v16i8 (opNode V128:$Rn)), (i64 0))),
+ (EXTRACT_SUBREG (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
+ (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn),
+ bsub), ssub)>;
+def : Pat<(i32 (vector_extract (insert_subvector undef,
+ (v4i16 (opNode V64:$Rn)), (i32 0)), (i64 0))),
+ (EXTRACT_SUBREG (INSERT_SUBREG (v4i16 (IMPLICIT_DEF)),
+ (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn),
+ hsub), ssub)>;
+def : Pat<(i32 (vector_extract (v8i16 (opNode V128:$Rn)), (i64 0))),
+ (EXTRACT_SUBREG (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)),
+ (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn),
+ hsub), ssub)>;
+def : Pat<(i32 (vector_extract (v4i32 (opNode V128:$Rn)), (i64 0))),
+ (EXTRACT_SUBREG (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)),
+ (!cast<Instruction>(!strconcat(baseOpc, "v4i32v")) V128:$Rn),
+ ssub), ssub)>;
+
+}
+
+multiclass SIMDAcrossLanesSignedIntrinsic<string baseOpc,
+ SDPatternOperator opNode>
+ : SIMDAcrossLanesIntrinsic<baseOpc, opNode> {
// If there is a sign extension after this intrinsic, consume it as smov already
// performed it
-def : Pat<(i32 (sext_inreg (i32 (intOp (v4i16 V64:$Rn))), i16)),
+def : Pat<(i32 (sext_inreg (i32 (vector_extract (insert_subvector undef,
+ (opNode (v8i8 V64:$Rn)), (i32 0)), (i64 0))), i8)),
+ (i32 (SMOVvi8to32
+ (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
+ (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub),
+ (i64 0)))>;
+def : Pat<(i32 (sext_inreg (i32 (vector_extract
+ (opNode (v16i8 V128:$Rn)), (i64 0))), i8)),
+ (i32 (SMOVvi8to32
+ (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
+ (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub),
+ (i64 0)))>;
+def : Pat<(i32 (sext_inreg (i32 (vector_extract (insert_subvector undef,
+ (opNode (v4i16 V64:$Rn)), (i32 0)), (i64 0))), i16)),
(i32 (SMOVvi16to32
(INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
(!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), hsub),
(i64 0)))>;
-def : Pat<(i32 (intOp (v4i16 V64:$Rn))),
+def : Pat<(i32 (sext_inreg (i32 (vector_extract
+ (opNode (v8i16 V128:$Rn)), (i64 0))), i16)),
(i32 (SMOVvi16to32
- (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
- (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), hsub),
- (i64 0)))>;
-// If there is a sign extension after this intrinsic, consume it as smov already
-// performed it
-def : Pat<(i32 (sext_inreg (i32 (intOp (v8i16 V128:$Rn))), i16)),
- (i32 (SMOVvi16to32
- (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
- (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), hsub),
- (i64 0)))>;
-def : Pat<(i32 (intOp (v8i16 V128:$Rn))),
- (i32 (SMOVvi16to32
- (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
- (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), hsub),
- (i64 0)))>;
-
-def : Pat<(i32 (intOp (v4i32 V128:$Rn))),
- (i32 (EXTRACT_SUBREG
- (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
- (!cast<Instruction>(!strconcat(baseOpc, "v4i32v")) V128:$Rn), ssub),
- ssub))>;
+ (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
+ (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), hsub),
+ (i64 0)))>;
}
-multiclass SIMDAcrossLanesUnsignedIntrinsic<string baseOpc, Intrinsic intOp> {
+multiclass SIMDAcrossLanesUnsignedIntrinsic<string baseOpc,
+ SDPatternOperator opNode>
+ : SIMDAcrossLanesIntrinsic<baseOpc, opNode> {
// If there is a masking operation keeping only what has been actually
// generated, consume it.
- def : Pat<(i32 (and (i32 (intOp (v8i8 V64:$Rn))), maski8_or_more)),
- (i32 (EXTRACT_SUBREG
- (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
- (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub),
- ssub))>;
- def : Pat<(i32 (intOp (v8i8 V64:$Rn))),
- (i32 (EXTRACT_SUBREG
- (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
- (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub),
- ssub))>;
-// If there is a masking operation keeping only what has been actually
-// generated, consume it.
-def : Pat<(i32 (and (i32 (intOp (v16i8 V128:$Rn))), maski8_or_more)),
- (i32 (EXTRACT_SUBREG
- (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
- (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub),
- ssub))>;
-def : Pat<(i32 (intOp (v16i8 V128:$Rn))),
+def : Pat<(i32 (and (i32 (vector_extract (insert_subvector undef,
+ (opNode (v8i8 V64:$Rn)), (i32 0)), (i64 0))), maski8_or_more)),
+ (i32 (EXTRACT_SUBREG
+ (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
+ (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub),
+ ssub))>;
+def : Pat<(i32 (and (i32 (vector_extract (opNode (v16i8 V128:$Rn)), (i64 0))),
+ maski8_or_more)),
(i32 (EXTRACT_SUBREG
(INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
(!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub),
ssub))>;
-
-// If there is a masking operation keeping only what has been actually
-// generated, consume it.
-def : Pat<(i32 (and (i32 (intOp (v4i16 V64:$Rn))), maski16_or_more)),
- (i32 (EXTRACT_SUBREG
- (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
- (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), hsub),
- ssub))>;
-def : Pat<(i32 (intOp (v4i16 V64:$Rn))),
+def : Pat<(i32 (and (i32 (vector_extract (insert_subvector undef,
+ (opNode (v4i16 V64:$Rn)), (i32 0)), (i64 0))), maski16_or_more)),
(i32 (EXTRACT_SUBREG
(INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
(!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), hsub),
ssub))>;
-// If there is a masking operation keeping only what has been actually
-// generated, consume it.
-def : Pat<(i32 (and (i32 (intOp (v8i16 V128:$Rn))), maski16_or_more)),
- (i32 (EXTRACT_SUBREG
- (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
- (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), hsub),
- ssub))>;
-def : Pat<(i32 (intOp (v8i16 V128:$Rn))),
+def : Pat<(i32 (and (i32 (vector_extract (opNode (v8i16 V128:$Rn)), (i64 0))),
+ maski16_or_more)),
(i32 (EXTRACT_SUBREG
(INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
(!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), hsub),
ssub))>;
+}
-def : Pat<(i32 (intOp (v4i32 V128:$Rn))),
- (i32 (EXTRACT_SUBREG
- (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
- (!cast<Instruction>(!strconcat(baseOpc, "v4i32v")) V128:$Rn), ssub),
- ssub))>;
+defm : SIMDAcrossLanesSignedIntrinsic<"ADDV", AArch64saddv>;
+// vaddv_[su]32 is special; -> ADDP Vd.2S,Vn.2S,Vm.2S; return Vd.s[0];Vn==Vm
+def : Pat<(v2i32 (AArch64saddv (v2i32 V64:$Rn))),
+ (ADDPv2i32 V64:$Rn, V64:$Rn)>;
-}
+defm : SIMDAcrossLanesUnsignedIntrinsic<"ADDV", AArch64uaddv>;
+// vaddv_[su]32 is special; -> ADDP Vd.2S,Vn.2S,Vm.2S; return Vd.s[0];Vn==Vm
+def : Pat<(v2i32 (AArch64uaddv (v2i32 V64:$Rn))),
+ (ADDPv2i32 V64:$Rn, V64:$Rn)>;
+
+defm : SIMDAcrossLanesSignedIntrinsic<"SMAXV", AArch64smaxv>;
+def : Pat<(v2i32 (AArch64smaxv (v2i32 V64:$Rn))),
+ (SMAXPv2i32 V64:$Rn, V64:$Rn)>;
+
+defm : SIMDAcrossLanesSignedIntrinsic<"SMINV", AArch64sminv>;
+def : Pat<(v2i32 (AArch64sminv (v2i32 V64:$Rn))),
+ (SMINPv2i32 V64:$Rn, V64:$Rn)>;
+
+defm : SIMDAcrossLanesUnsignedIntrinsic<"UMAXV", AArch64umaxv>;
+def : Pat<(v2i32 (AArch64umaxv (v2i32 V64:$Rn))),
+ (UMAXPv2i32 V64:$Rn, V64:$Rn)>;
+
+defm : SIMDAcrossLanesUnsignedIntrinsic<"UMINV", AArch64uminv>;
+def : Pat<(v2i32 (AArch64uminv (v2i32 V64:$Rn))),
+ (UMINPv2i32 V64:$Rn, V64:$Rn)>;
multiclass SIMDAcrossLanesSignedLongIntrinsic<string baseOpc, Intrinsic intOp> {
def : Pat<(i32 (intOp (v8i8 V64:$Rn))),
@@ -3976,32 +4083,6 @@ def : Pat<(i64 (intOp (v4i32 V128:$Rn))),
dsub))>;
}
-defm : SIMDAcrossLanesSignedIntrinsic<"ADDV", int_aarch64_neon_saddv>;
-// vaddv_[su]32 is special; -> ADDP Vd.2S,Vn.2S,Vm.2S; return Vd.s[0];Vn==Vm
-def : Pat<(i32 (int_aarch64_neon_saddv (v2i32 V64:$Rn))),
- (EXTRACT_SUBREG (ADDPv2i32 V64:$Rn, V64:$Rn), ssub)>;
-
-defm : SIMDAcrossLanesUnsignedIntrinsic<"ADDV", int_aarch64_neon_uaddv>;
-// vaddv_[su]32 is special; -> ADDP Vd.2S,Vn.2S,Vm.2S; return Vd.s[0];Vn==Vm
-def : Pat<(i32 (int_aarch64_neon_uaddv (v2i32 V64:$Rn))),
- (EXTRACT_SUBREG (ADDPv2i32 V64:$Rn, V64:$Rn), ssub)>;
-
-defm : SIMDAcrossLanesSignedIntrinsic<"SMAXV", int_aarch64_neon_smaxv>;
-def : Pat<(i32 (int_aarch64_neon_smaxv (v2i32 V64:$Rn))),
- (EXTRACT_SUBREG (SMAXPv2i32 V64:$Rn, V64:$Rn), ssub)>;
-
-defm : SIMDAcrossLanesSignedIntrinsic<"SMINV", int_aarch64_neon_sminv>;
-def : Pat<(i32 (int_aarch64_neon_sminv (v2i32 V64:$Rn))),
- (EXTRACT_SUBREG (SMINPv2i32 V64:$Rn, V64:$Rn), ssub)>;
-
-defm : SIMDAcrossLanesUnsignedIntrinsic<"UMAXV", int_aarch64_neon_umaxv>;
-def : Pat<(i32 (int_aarch64_neon_umaxv (v2i32 V64:$Rn))),
- (EXTRACT_SUBREG (UMAXPv2i32 V64:$Rn, V64:$Rn), ssub)>;
-
-defm : SIMDAcrossLanesUnsignedIntrinsic<"UMINV", int_aarch64_neon_uminv>;
-def : Pat<(i32 (int_aarch64_neon_uminv (v2i32 V64:$Rn))),
- (EXTRACT_SUBREG (UMINPv2i32 V64:$Rn, V64:$Rn), ssub)>;
-
defm : SIMDAcrossLanesSignedLongIntrinsic<"SADDLV", int_aarch64_neon_saddlv>;
defm : SIMDAcrossLanesUnsignedLongIntrinsic<"UADDLV", int_aarch64_neon_uaddlv>;
@@ -4324,6 +4405,10 @@ defm SQDMLAL : SIMDIndexedLongSQDMLXSDTied<0, 0b0011, "sqdmlal",
int_aarch64_neon_sqadd>;
defm SQDMLSL : SIMDIndexedLongSQDMLXSDTied<0, 0b0111, "sqdmlsl",
int_aarch64_neon_sqsub>;
+defm SQRDMLAH : SIMDIndexedSQRDMLxHSDTied<1, 0b1101, "sqrdmlah",
+ int_aarch64_neon_sqadd>;
+defm SQRDMLSH : SIMDIndexedSQRDMLxHSDTied<1, 0b1111, "sqrdmlsh",
+ int_aarch64_neon_sqsub>;
defm SQDMULL : SIMDIndexedLongSD<0, 0b1011, "sqdmull", int_aarch64_neon_sqdmull>;
defm UMLAL : SIMDVectorIndexedLongSDTied<1, 0b0010, "umlal",
TriOpFrag<(add node:$LHS, (int_aarch64_neon_umull node:$MHS, node:$RHS))>>;
@@ -5092,22 +5177,26 @@ def : Pat<(trap), (BRK 1)>;
// Natural vector casts (64 bit)
def : Pat<(v8i8 (AArch64NvCast (v2i32 FPR64:$src))), (v8i8 FPR64:$src)>;
def : Pat<(v4i16 (AArch64NvCast (v2i32 FPR64:$src))), (v4i16 FPR64:$src)>;
+def : Pat<(v4f16 (AArch64NvCast (v2i32 FPR64:$src))), (v4f16 FPR64:$src)>;
def : Pat<(v2i32 (AArch64NvCast (v2i32 FPR64:$src))), (v2i32 FPR64:$src)>;
def : Pat<(v2f32 (AArch64NvCast (v2i32 FPR64:$src))), (v2f32 FPR64:$src)>;
def : Pat<(v1i64 (AArch64NvCast (v2i32 FPR64:$src))), (v1i64 FPR64:$src)>;
def : Pat<(v8i8 (AArch64NvCast (v4i16 FPR64:$src))), (v8i8 FPR64:$src)>;
def : Pat<(v4i16 (AArch64NvCast (v4i16 FPR64:$src))), (v4i16 FPR64:$src)>;
+def : Pat<(v4f16 (AArch64NvCast (v4i16 FPR64:$src))), (v4f16 FPR64:$src)>;
def : Pat<(v2i32 (AArch64NvCast (v4i16 FPR64:$src))), (v2i32 FPR64:$src)>;
def : Pat<(v1i64 (AArch64NvCast (v4i16 FPR64:$src))), (v1i64 FPR64:$src)>;
def : Pat<(v8i8 (AArch64NvCast (v8i8 FPR64:$src))), (v8i8 FPR64:$src)>;
def : Pat<(v4i16 (AArch64NvCast (v8i8 FPR64:$src))), (v4i16 FPR64:$src)>;
+def : Pat<(v4f16 (AArch64NvCast (v8i8 FPR64:$src))), (v4f16 FPR64:$src)>;
def : Pat<(v2i32 (AArch64NvCast (v8i8 FPR64:$src))), (v2i32 FPR64:$src)>;
def : Pat<(v1i64 (AArch64NvCast (v8i8 FPR64:$src))), (v1i64 FPR64:$src)>;
def : Pat<(v8i8 (AArch64NvCast (f64 FPR64:$src))), (v8i8 FPR64:$src)>;
def : Pat<(v4i16 (AArch64NvCast (f64 FPR64:$src))), (v4i16 FPR64:$src)>;
+def : Pat<(v4f16 (AArch64NvCast (f64 FPR64:$src))), (v4f16 FPR64:$src)>;
def : Pat<(v2i32 (AArch64NvCast (f64 FPR64:$src))), (v2i32 FPR64:$src)>;
def : Pat<(v2f32 (AArch64NvCast (f64 FPR64:$src))), (v2f32 FPR64:$src)>;
def : Pat<(v1i64 (AArch64NvCast (f64 FPR64:$src))), (v1i64 FPR64:$src)>;
@@ -5122,22 +5211,26 @@ def : Pat<(v1i64 (AArch64NvCast (v2f32 FPR64:$src))), (v1i64 FPR64:$src)>;
// Natural vector casts (128 bit)
def : Pat<(v16i8 (AArch64NvCast (v4i32 FPR128:$src))), (v16i8 FPR128:$src)>;
def : Pat<(v8i16 (AArch64NvCast (v4i32 FPR128:$src))), (v8i16 FPR128:$src)>;
+def : Pat<(v8f16 (AArch64NvCast (v4i32 FPR128:$src))), (v8f16 FPR128:$src)>;
def : Pat<(v4i32 (AArch64NvCast (v4i32 FPR128:$src))), (v4i32 FPR128:$src)>;
def : Pat<(v4f32 (AArch64NvCast (v4i32 FPR128:$src))), (v4f32 FPR128:$src)>;
def : Pat<(v2i64 (AArch64NvCast (v4i32 FPR128:$src))), (v2i64 FPR128:$src)>;
def : Pat<(v16i8 (AArch64NvCast (v8i16 FPR128:$src))), (v16i8 FPR128:$src)>;
def : Pat<(v8i16 (AArch64NvCast (v8i16 FPR128:$src))), (v8i16 FPR128:$src)>;
+def : Pat<(v8f16 (AArch64NvCast (v8i16 FPR128:$src))), (v8f16 FPR128:$src)>;
def : Pat<(v4i32 (AArch64NvCast (v8i16 FPR128:$src))), (v4i32 FPR128:$src)>;
def : Pat<(v2i64 (AArch64NvCast (v8i16 FPR128:$src))), (v2i64 FPR128:$src)>;
def : Pat<(v16i8 (AArch64NvCast (v16i8 FPR128:$src))), (v16i8 FPR128:$src)>;
def : Pat<(v8i16 (AArch64NvCast (v16i8 FPR128:$src))), (v8i16 FPR128:$src)>;
+def : Pat<(v8f16 (AArch64NvCast (v16i8 FPR128:$src))), (v8f16 FPR128:$src)>;
def : Pat<(v4i32 (AArch64NvCast (v16i8 FPR128:$src))), (v4i32 FPR128:$src)>;
def : Pat<(v2i64 (AArch64NvCast (v16i8 FPR128:$src))), (v2i64 FPR128:$src)>;
def : Pat<(v16i8 (AArch64NvCast (v2i64 FPR128:$src))), (v16i8 FPR128:$src)>;
def : Pat<(v8i16 (AArch64NvCast (v2i64 FPR128:$src))), (v8i16 FPR128:$src)>;
+def : Pat<(v8f16 (AArch64NvCast (v2i64 FPR128:$src))), (v8f16 FPR128:$src)>;
def : Pat<(v4i32 (AArch64NvCast (v2i64 FPR128:$src))), (v4i32 FPR128:$src)>;
def : Pat<(v2i64 (AArch64NvCast (v2i64 FPR128:$src))), (v2i64 FPR128:$src)>;
def : Pat<(v4f32 (AArch64NvCast (v2i64 FPR128:$src))), (v4f32 FPR128:$src)>;
diff --git a/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp b/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
index 8157981..186e71a 100644
--- a/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
+++ b/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
@@ -16,6 +16,7 @@
#include "AArch64Subtarget.h"
#include "MCTargetDesc/AArch64AddressingModes.h"
#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
@@ -63,16 +64,24 @@ struct AArch64LoadStoreOpt : public MachineFunctionPass {
// If a matching instruction is found, MergeForward is set to true if the
// merge is to remove the first instruction and replace the second with
// a pair-wise insn, and false if the reverse is true.
+ // \p SExtIdx[out] gives the index of the result of the load pair that
+ // must be extended. The value of SExtIdx assumes that the paired load
+ // produces the value in this order: (I, returned iterator), i.e.,
+ // -1 means no value has to be extended, 0 means I, and 1 means the
+ // returned iterator.
MachineBasicBlock::iterator findMatchingInsn(MachineBasicBlock::iterator I,
- bool &MergeForward,
+ bool &MergeForward, int &SExtIdx,
unsigned Limit);
// Merge the two instructions indicated into a single pair-wise instruction.
// If MergeForward is true, erase the first instruction and fold its
// operation into the second. If false, the reverse. Return the instruction
// following the first instruction (which may change during processing).
+ // \p SExtIdx index of the result that must be extended for a paired load.
+ // -1 means none, 0 means I, and 1 means Paired.
MachineBasicBlock::iterator
mergePairedInsns(MachineBasicBlock::iterator I,
- MachineBasicBlock::iterator Paired, bool MergeForward);
+ MachineBasicBlock::iterator Paired, bool MergeForward,
+ int SExtIdx);
// Scan the instruction list to find a base register update that can
// be combined with the current instruction (a load or store) using
@@ -135,6 +144,8 @@ static bool isUnscaledLdst(unsigned Opc) {
return true;
case AArch64::LDURXi:
return true;
+ case AArch64::LDURSWi:
+ return true;
}
}
@@ -173,6 +184,46 @@ int AArch64LoadStoreOpt::getMemSize(MachineInstr *MemMI) {
case AArch64::LDRXui:
case AArch64::LDURXi:
return 8;
+ case AArch64::LDRSWui:
+ case AArch64::LDURSWi:
+ return 4;
+ }
+}
+
+static unsigned getMatchingNonSExtOpcode(unsigned Opc,
+ bool *IsValidLdStrOpc = nullptr) {
+ if (IsValidLdStrOpc)
+ *IsValidLdStrOpc = true;
+ switch (Opc) {
+ default:
+ if (IsValidLdStrOpc)
+ *IsValidLdStrOpc = false;
+ return UINT_MAX;
+ case AArch64::STRDui:
+ case AArch64::STURDi:
+ case AArch64::STRQui:
+ case AArch64::STURQi:
+ case AArch64::STRWui:
+ case AArch64::STURWi:
+ case AArch64::STRXui:
+ case AArch64::STURXi:
+ case AArch64::LDRDui:
+ case AArch64::LDURDi:
+ case AArch64::LDRQui:
+ case AArch64::LDURQi:
+ case AArch64::LDRWui:
+ case AArch64::LDURWi:
+ case AArch64::LDRXui:
+ case AArch64::LDURXi:
+ case AArch64::STRSui:
+ case AArch64::STURSi:
+ case AArch64::LDRSui:
+ case AArch64::LDURSi:
+ return Opc;
+ case AArch64::LDRSWui:
+ return AArch64::LDRWui;
+ case AArch64::LDURSWi:
+ return AArch64::LDURWi;
}
}
@@ -210,6 +261,9 @@ static unsigned getMatchingPairOpcode(unsigned Opc) {
case AArch64::LDRXui:
case AArch64::LDURXi:
return AArch64::LDPXi;
+ case AArch64::LDRSWui:
+ case AArch64::LDURSWi:
+ return AArch64::LDPSWi;
}
}
@@ -237,6 +291,8 @@ static unsigned getPreIndexedOpcode(unsigned Opc) {
return AArch64::LDRWpre;
case AArch64::LDRXui:
return AArch64::LDRXpre;
+ case AArch64::LDRSWui:
+ return AArch64::LDRSWpre;
}
}
@@ -264,13 +320,15 @@ static unsigned getPostIndexedOpcode(unsigned Opc) {
return AArch64::LDRWpost;
case AArch64::LDRXui:
return AArch64::LDRXpost;
+ case AArch64::LDRSWui:
+ return AArch64::LDRSWpost;
}
}
MachineBasicBlock::iterator
AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
MachineBasicBlock::iterator Paired,
- bool MergeForward) {
+ bool MergeForward, int SExtIdx) {
MachineBasicBlock::iterator NextI = I;
++NextI;
// If NextI is the second of the two instructions to be merged, we need
@@ -280,11 +338,13 @@ AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
if (NextI == Paired)
++NextI;
- bool IsUnscaled = isUnscaledLdst(I->getOpcode());
+ unsigned Opc =
+ SExtIdx == -1 ? I->getOpcode() : getMatchingNonSExtOpcode(I->getOpcode());
+ bool IsUnscaled = isUnscaledLdst(Opc);
int OffsetStride =
IsUnscaled && EnableAArch64UnscaledMemOp ? getMemSize(I) : 1;
- unsigned NewOpc = getMatchingPairOpcode(I->getOpcode());
+ unsigned NewOpc = getMatchingPairOpcode(Opc);
// Insert our new paired instruction after whichever of the paired
// instructions MergeForward indicates.
MachineBasicBlock::iterator InsertionPoint = MergeForward ? Paired : I;
@@ -299,6 +359,11 @@ AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
Paired->getOperand(2).getImm() + OffsetStride) {
RtMI = Paired;
Rt2MI = I;
+ // Here we swapped the assumption made for SExtIdx.
+ // I.e., we turn ldp I, Paired into ldp Paired, I.
+ // Update the index accordingly.
+ if (SExtIdx != -1)
+ SExtIdx = (SExtIdx + 1) % 2;
} else {
RtMI = I;
Rt2MI = Paired;
@@ -325,8 +390,47 @@ AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
DEBUG(dbgs() << " ");
DEBUG(Paired->print(dbgs()));
DEBUG(dbgs() << " with instruction:\n ");
- DEBUG(((MachineInstr *)MIB)->print(dbgs()));
- DEBUG(dbgs() << "\n");
+
+ if (SExtIdx != -1) {
+ // Generate the sign extension for the proper result of the ldp.
+ // I.e., with X1, that would be:
+ // %W1<def> = KILL %W1, %X1<imp-def>
+ // %X1<def> = SBFMXri %X1<kill>, 0, 31
+ MachineOperand &DstMO = MIB->getOperand(SExtIdx);
+ // Right now, DstMO has the extended register, since it comes from an
+ // extended opcode.
+ unsigned DstRegX = DstMO.getReg();
+ // Get the W variant of that register.
+ unsigned DstRegW = TRI->getSubReg(DstRegX, AArch64::sub_32);
+ // Update the result of LDP to use the W instead of the X variant.
+ DstMO.setReg(DstRegW);
+ DEBUG(((MachineInstr *)MIB)->print(dbgs()));
+ DEBUG(dbgs() << "\n");
+ // Make the machine verifier happy by providing a definition for
+ // the X register.
+ // Insert this definition right after the generated LDP, i.e., before
+ // InsertionPoint.
+ MachineInstrBuilder MIBKill =
+ BuildMI(*I->getParent(), InsertionPoint, I->getDebugLoc(),
+ TII->get(TargetOpcode::KILL), DstRegW)
+ .addReg(DstRegW)
+ .addReg(DstRegX, RegState::Define);
+ MIBKill->getOperand(2).setImplicit();
+ // Create the sign extension.
+ MachineInstrBuilder MIBSXTW =
+ BuildMI(*I->getParent(), InsertionPoint, I->getDebugLoc(),
+ TII->get(AArch64::SBFMXri), DstRegX)
+ .addReg(DstRegX)
+ .addImm(0)
+ .addImm(31);
+ (void)MIBSXTW;
+ DEBUG(dbgs() << " Extend operand:\n ");
+ DEBUG(((MachineInstr *)MIBSXTW)->print(dbgs()));
+ DEBUG(dbgs() << "\n");
+ } else {
+ DEBUG(((MachineInstr *)MIB)->print(dbgs()));
+ DEBUG(dbgs() << "\n");
+ }
// Erase the old instructions.
I->eraseFromParent();
@@ -380,17 +484,41 @@ static int alignTo(int Num, int PowOf2) {
return (Num + PowOf2 - 1) & ~(PowOf2 - 1);
}
+static bool mayAlias(MachineInstr *MIa, MachineInstr *MIb,
+ const AArch64InstrInfo *TII) {
+ // One of the instructions must modify memory.
+ if (!MIa->mayStore() && !MIb->mayStore())
+ return false;
+
+ // Both instructions must be memory operations.
+ if (!MIa->mayLoadOrStore() && !MIb->mayLoadOrStore())
+ return false;
+
+ return !TII->areMemAccessesTriviallyDisjoint(MIa, MIb);
+}
+
+static bool mayAlias(MachineInstr *MIa,
+ SmallVectorImpl<MachineInstr *> &MemInsns,
+ const AArch64InstrInfo *TII) {
+ for (auto &MIb : MemInsns)
+ if (mayAlias(MIa, MIb, TII))
+ return true;
+
+ return false;
+}
+
/// findMatchingInsn - Scan the instructions looking for a load/store that can
/// be combined with the current instruction into a load/store pair.
MachineBasicBlock::iterator
AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
- bool &MergeForward, unsigned Limit) {
+ bool &MergeForward, int &SExtIdx,
+ unsigned Limit) {
MachineBasicBlock::iterator E = I->getParent()->end();
MachineBasicBlock::iterator MBBI = I;
MachineInstr *FirstMI = I;
++MBBI;
- int Opc = FirstMI->getOpcode();
+ unsigned Opc = FirstMI->getOpcode();
bool MayLoad = FirstMI->mayLoad();
bool IsUnscaled = isUnscaledLdst(Opc);
unsigned Reg = FirstMI->getOperand(0).getReg();
@@ -414,6 +542,10 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
BitVector ModifiedRegs, UsedRegs;
ModifiedRegs.resize(TRI->getNumRegs());
UsedRegs.resize(TRI->getNumRegs());
+
+ // Remember any instructions that read/write memory between FirstMI and MI.
+ SmallVector<MachineInstr *, 4> MemInsns;
+
for (unsigned Count = 0; MBBI != E && Count < Limit; ++MBBI) {
MachineInstr *MI = MBBI;
// Skip DBG_VALUE instructions. Otherwise debug info can affect the
@@ -424,7 +556,19 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
// Now that we know this is a real instruction, count it.
++Count;
- if (Opc == MI->getOpcode() && MI->getOperand(2).isImm()) {
+ bool CanMergeOpc = Opc == MI->getOpcode();
+ SExtIdx = -1;
+ if (!CanMergeOpc) {
+ bool IsValidLdStrOpc;
+ unsigned NonSExtOpc = getMatchingNonSExtOpcode(Opc, &IsValidLdStrOpc);
+ if (!IsValidLdStrOpc)
+ continue;
+ // Opc will be the first instruction in the pair.
+ SExtIdx = NonSExtOpc == (unsigned)Opc ? 1 : 0;
+ CanMergeOpc = NonSExtOpc == getMatchingNonSExtOpcode(MI->getOpcode());
+ }
+
+ if (CanMergeOpc && MI->getOperand(2).isImm()) {
// If we've found another instruction with the same opcode, check to see
// if the base and offset are compatible with our starting instruction.
// These instructions all have scaled immediate operands, so we just
@@ -450,6 +594,8 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
bool MIIsUnscaled = isUnscaledLdst(MI->getOpcode());
if (!inBoundsForPair(MIIsUnscaled, MinOffset, OffsetStride)) {
trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI);
+ if (MI->mayLoadOrStore())
+ MemInsns.push_back(MI);
continue;
}
// If the alignment requirements of the paired (scaled) instruction
@@ -458,6 +604,8 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
if (IsUnscaled && EnableAArch64UnscaledMemOp &&
(alignTo(MinOffset, OffsetStride) != MinOffset)) {
trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI);
+ if (MI->mayLoadOrStore())
+ MemInsns.push_back(MI);
continue;
}
// If the destination register of the loads is the same register, bail
@@ -465,22 +613,29 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
// registers the same is UNPREDICTABLE and will result in an exception.
if (MayLoad && Reg == MI->getOperand(0).getReg()) {
trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI);
+ if (MI->mayLoadOrStore())
+ MemInsns.push_back(MI);
continue;
}
// If the Rt of the second instruction was not modified or used between
- // the two instructions, we can combine the second into the first.
+ // the two instructions and none of the instructions between the second
+ // and first alias with the second, we can combine the second into the
+ // first.
if (!ModifiedRegs[MI->getOperand(0).getReg()] &&
- !UsedRegs[MI->getOperand(0).getReg()]) {
+ !UsedRegs[MI->getOperand(0).getReg()] &&
+ !mayAlias(MI, MemInsns, TII)) {
MergeForward = false;
return MBBI;
}
// Likewise, if the Rt of the first instruction is not modified or used
- // between the two instructions, we can combine the first into the
- // second.
+ // between the two instructions and none of the instructions between the
+ // first and the second alias with the first, we can combine the first
+ // into the second.
if (!ModifiedRegs[FirstMI->getOperand(0).getReg()] &&
- !UsedRegs[FirstMI->getOperand(0).getReg()]) {
+ !UsedRegs[FirstMI->getOperand(0).getReg()] &&
+ !mayAlias(FirstMI, MemInsns, TII)) {
MergeForward = true;
return MBBI;
}
@@ -489,21 +644,9 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
}
}
- // If the instruction wasn't a matching load or store, but does (or can)
- // modify memory, stop searching, as we don't have alias analysis or
- // anything like that to tell us whether the access is tromping on the
- // locations we care about. The big one we want to catch is calls.
- //
- // FIXME: Theoretically, we can do better than that for SP and FP based
- // references since we can effectively know where those are touching. It's
- // unclear if it's worth the extra code, though. Most paired instructions
- // will be sequential, perhaps with a few intervening non-memory related
- // instructions.
- if (MI->mayStore() || MI->isCall())
- return E;
- // Likewise, if we're matching a store instruction, we don't want to
- // move across a load, as it may be reading the same location.
- if (FirstMI->mayStore() && MI->mayLoad())
+ // If the instruction wasn't a matching load or store. Stop searching if we
+ // encounter a call instruction that might modify memory.
+ if (MI->isCall())
return E;
// Update modified / uses register lists.
@@ -513,6 +656,10 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
// return early.
if (ModifiedRegs[BaseReg])
return E;
+
+ // Update list of instructions that read/write memory.
+ if (MI->mayLoadOrStore())
+ MemInsns.push_back(MI);
}
return E;
}
@@ -780,6 +927,7 @@ bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB) {
case AArch64::LDRQui:
case AArch64::LDRXui:
case AArch64::LDRWui:
+ case AArch64::LDRSWui:
// do the unscaled versions as well
case AArch64::STURSi:
case AArch64::STURDi:
@@ -790,7 +938,8 @@ bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB) {
case AArch64::LDURDi:
case AArch64::LDURQi:
case AArch64::LDURWi:
- case AArch64::LDURXi: {
+ case AArch64::LDURXi:
+ case AArch64::LDURSWi: {
// If this is a volatile load/store, don't mess with it.
if (MI->hasOrderedMemoryRef()) {
++MBBI;
@@ -809,13 +958,14 @@ bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB) {
}
// Look ahead up to ScanLimit instructions for a pairable instruction.
bool MergeForward = false;
+ int SExtIdx = -1;
MachineBasicBlock::iterator Paired =
- findMatchingInsn(MBBI, MergeForward, ScanLimit);
+ findMatchingInsn(MBBI, MergeForward, SExtIdx, ScanLimit);
if (Paired != E) {
// Merge the loads into a pair. Keeping the iterator straight is a
// pain, so we let the merge routine tell us what the next instruction
// is after it's done mucking about.
- MBBI = mergePairedInsns(MBBI, Paired, MergeForward);
+ MBBI = mergePairedInsns(MBBI, Paired, MergeForward, SExtIdx);
Modified = true;
++NumPairCreated;
@@ -835,7 +985,7 @@ bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB) {
MachineInstr *MI = MBBI;
// Do update merging. It's simpler to keep this separate from the above
// switch, though not strictly necessary.
- int Opc = MI->getOpcode();
+ unsigned Opc = MI->getOpcode();
switch (Opc) {
default:
// Just move on to the next instruction.
@@ -931,10 +1081,8 @@ bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB) {
}
bool AArch64LoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
- const TargetMachine &TM = Fn.getTarget();
- TII = static_cast<const AArch64InstrInfo *>(
- TM.getSubtargetImpl()->getInstrInfo());
- TRI = TM.getSubtargetImpl()->getRegisterInfo();
+ TII = static_cast<const AArch64InstrInfo *>(Fn.getSubtarget().getInstrInfo());
+ TRI = Fn.getSubtarget().getRegisterInfo();
bool Modified = false;
for (auto &MBB : Fn)
diff --git a/lib/Target/AArch64/AArch64MCInstLower.cpp b/lib/Target/AArch64/AArch64MCInstLower.cpp
index b829341..72edbf1 100644
--- a/lib/Target/AArch64/AArch64MCInstLower.cpp
+++ b/lib/Target/AArch64/AArch64MCInstLower.cpp
@@ -73,7 +73,7 @@ MCOperand AArch64MCInstLower::lowerSymbolOperandDarwin(const MachineOperand &MO,
if (!MO.isJTI() && MO.getOffset())
Expr = MCBinaryExpr::CreateAdd(
Expr, MCConstantExpr::Create(MO.getOffset(), Ctx), Ctx);
- return MCOperand::CreateExpr(Expr);
+ return MCOperand::createExpr(Expr);
}
MCOperand AArch64MCInstLower::lowerSymbolOperandELF(const MachineOperand &MO,
@@ -148,7 +148,7 @@ MCOperand AArch64MCInstLower::lowerSymbolOperandELF(const MachineOperand &MO,
RefKind = static_cast<AArch64MCExpr::VariantKind>(RefFlags);
Expr = AArch64MCExpr::Create(Expr, RefKind, Ctx);
- return MCOperand::CreateExpr(Expr);
+ return MCOperand::createExpr(Expr);
}
MCOperand AArch64MCInstLower::LowerSymbolOperand(const MachineOperand &MO,
@@ -169,16 +169,16 @@ bool AArch64MCInstLower::lowerOperand(const MachineOperand &MO,
// Ignore all implicit register operands.
if (MO.isImplicit())
return false;
- MCOp = MCOperand::CreateReg(MO.getReg());
+ MCOp = MCOperand::createReg(MO.getReg());
break;
case MachineOperand::MO_RegisterMask:
// Regmasks are like implicit defs.
return false;
case MachineOperand::MO_Immediate:
- MCOp = MCOperand::CreateImm(MO.getImm());
+ MCOp = MCOperand::createImm(MO.getImm());
break;
case MachineOperand::MO_MachineBasicBlock:
- MCOp = MCOperand::CreateExpr(
+ MCOp = MCOperand::createExpr(
MCSymbolRefExpr::Create(MO.getMBB()->getSymbol(), Ctx));
break;
case MachineOperand::MO_GlobalAddress:
diff --git a/lib/Target/AArch64/AArch64PBQPRegAlloc.cpp b/lib/Target/AArch64/AArch64PBQPRegAlloc.cpp
index f942c4e..5394875 100644
--- a/lib/Target/AArch64/AArch64PBQPRegAlloc.cpp
+++ b/lib/Target/AArch64/AArch64PBQPRegAlloc.cpp
@@ -235,7 +235,7 @@ bool A57ChainingConstraint::addIntraChainConstraint(PBQPRAGraph &G, unsigned Rd,
costs[i + 1][j + 1] = sameParityMax + 1.0;
}
}
- G.setEdgeCosts(edge, std::move(costs));
+ G.updateEdgeCosts(edge, std::move(costs));
return true;
}
@@ -312,14 +312,14 @@ void A57ChainingConstraint::addInterChainConstraint(PBQPRAGraph &G, unsigned Rd,
costs[i + 1][j + 1] = sameParityMax + 1.0;
}
}
- G.setEdgeCosts(edge, std::move(costs));
+ G.updateEdgeCosts(edge, std::move(costs));
}
}
}
static bool regJustKilledBefore(const LiveIntervals &LIs, unsigned reg,
const MachineInstr &MI) {
- LiveInterval LI = LIs.getInterval(reg);
+ const LiveInterval &LI = LIs.getInterval(reg);
SlotIndex SI = LIs.getInstructionIndex(&MI);
return LI.expiredAt(SI);
}
@@ -328,7 +328,7 @@ void A57ChainingConstraint::apply(PBQPRAGraph &G) {
const MachineFunction &MF = G.getMetadata().MF;
LiveIntervals &LIs = G.getMetadata().LIS;
- TRI = MF.getTarget().getSubtargetImpl()->getRegisterInfo();
+ TRI = MF.getSubtarget().getRegisterInfo();
DEBUG(MF.dump());
for (const auto &MBB: MF) {
diff --git a/lib/Target/AArch64/AArch64PromoteConstant.cpp b/lib/Target/AArch64/AArch64PromoteConstant.cpp
index 97b0f0e..e1b93bf 100644
--- a/lib/Target/AArch64/AArch64PromoteConstant.cpp
+++ b/lib/Target/AArch64/AArch64PromoteConstant.cpp
@@ -22,7 +22,7 @@
#include "AArch64.h"
#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/IR/Constants.h"
@@ -31,12 +31,14 @@
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/InstIterator.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Module.h"
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
using namespace llvm;
@@ -112,44 +114,42 @@ private:
AU.addPreserved<DominatorTreeWrapperPass>();
}
- /// Type to store a list of User.
- typedef SmallVector<Value::user_iterator, 4> Users;
+ /// Type to store a list of Uses.
+ typedef SmallVector<Use *, 4> Uses;
/// Map an insertion point to all the uses it dominates.
- typedef DenseMap<Instruction *, Users> InsertionPoints;
+ typedef DenseMap<Instruction *, Uses> InsertionPoints;
/// Map a function to the required insertion point of load for a
/// global variable.
typedef DenseMap<Function *, InsertionPoints> InsertionPointsPerFunc;
/// Find the closest point that dominates the given Use.
- Instruction *findInsertionPoint(Value::user_iterator &Use);
+ Instruction *findInsertionPoint(Use &Use);
/// Check if the given insertion point is dominated by an existing
/// insertion point.
/// If true, the given use is added to the list of dominated uses for
/// the related existing point.
/// \param NewPt the insertion point to be checked
- /// \param UseIt the use to be added into the list of dominated uses
+ /// \param Use the use to be added into the list of dominated uses
/// \param InsertPts existing insertion points
/// \pre NewPt and all instruction in InsertPts belong to the same function
/// \return true if one of the insertion point in InsertPts dominates NewPt,
/// false otherwise
- bool isDominated(Instruction *NewPt, Value::user_iterator &UseIt,
- InsertionPoints &InsertPts);
+ bool isDominated(Instruction *NewPt, Use &Use, InsertionPoints &InsertPts);
/// Check if the given insertion point can be merged with an existing
/// insertion point in a common dominator.
/// If true, the given use is added to the list of the created insertion
/// point.
/// \param NewPt the insertion point to be checked
- /// \param UseIt the use to be added into the list of dominated uses
+ /// \param Use the use to be added into the list of dominated uses
/// \param InsertPts existing insertion points
/// \pre NewPt and all instruction in InsertPts belong to the same function
/// \pre isDominated returns false for the exact same parameters.
/// \return true if it exists an insertion point in InsertPts that could
/// have been merged with NewPt in a common dominator,
/// false otherwise
- bool tryAndMerge(Instruction *NewPt, Value::user_iterator &UseIt,
- InsertionPoints &InsertPts);
+ bool tryAndMerge(Instruction *NewPt, Use &Use, InsertionPoints &InsertPts);
/// Compute the minimal insertion points to dominates all the interesting
/// uses of value.
@@ -182,21 +182,21 @@ private:
bool promoteConstant(Constant *Cst);
/// Transfer the list of dominated uses of IPI to NewPt in InsertPts.
- /// Append UseIt to this list and delete the entry of IPI in InsertPts.
- static void appendAndTransferDominatedUses(Instruction *NewPt,
- Value::user_iterator &UseIt,
+ /// Append Use to this list and delete the entry of IPI in InsertPts.
+ static void appendAndTransferDominatedUses(Instruction *NewPt, Use &Use,
InsertionPoints::iterator &IPI,
InsertionPoints &InsertPts) {
// Record the dominated use.
- IPI->second.push_back(UseIt);
+ IPI->second.push_back(&Use);
// Transfer the dominated uses of IPI to NewPt
// Inserting into the DenseMap may invalidate existing iterator.
- // Keep a copy of the key to find the iterator to erase.
+ // Keep a copy of the key to find the iterator to erase. Keep a copy of the
+ // value so that we don't have to dereference IPI->second.
Instruction *OldInstr = IPI->first;
- InsertPts[NewPt] = std::move(IPI->second);
+ Uses OldUses = std::move(IPI->second);
+ InsertPts[NewPt] = std::move(OldUses);
// Erase IPI.
- IPI = InsertPts.find(OldInstr);
- InsertPts.erase(IPI);
+ InsertPts.erase(OldInstr);
}
};
} // end anonymous namespace
@@ -328,23 +328,18 @@ static bool shouldConvert(const Constant *Cst) {
return isConstantUsingVectorTy(Cst->getType());
}
-Instruction *
-AArch64PromoteConstant::findInsertionPoint(Value::user_iterator &Use) {
+Instruction *AArch64PromoteConstant::findInsertionPoint(Use &Use) {
+ Instruction *User = cast<Instruction>(Use.getUser());
+
// If this user is a phi, the insertion point is in the related
// incoming basic block.
- PHINode *PhiInst = dyn_cast<PHINode>(*Use);
- Instruction *InsertionPoint;
- if (PhiInst)
- InsertionPoint =
- PhiInst->getIncomingBlock(Use.getOperandNo())->getTerminator();
- else
- InsertionPoint = dyn_cast<Instruction>(*Use);
- assert(InsertionPoint && "User is not an instruction!");
- return InsertionPoint;
+ if (PHINode *PhiInst = dyn_cast<PHINode>(User))
+ return PhiInst->getIncomingBlock(Use.getOperandNo())->getTerminator();
+
+ return User;
}
-bool AArch64PromoteConstant::isDominated(Instruction *NewPt,
- Value::user_iterator &UseIt,
+bool AArch64PromoteConstant::isDominated(Instruction *NewPt, Use &Use,
InsertionPoints &InsertPts) {
DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>(
@@ -363,15 +358,14 @@ bool AArch64PromoteConstant::isDominated(Instruction *NewPt,
DEBUG(dbgs() << "Insertion point dominated by:\n");
DEBUG(IPI.first->print(dbgs()));
DEBUG(dbgs() << '\n');
- IPI.second.push_back(UseIt);
+ IPI.second.push_back(&Use);
return true;
}
}
return false;
}
-bool AArch64PromoteConstant::tryAndMerge(Instruction *NewPt,
- Value::user_iterator &UseIt,
+bool AArch64PromoteConstant::tryAndMerge(Instruction *NewPt, Use &Use,
InsertionPoints &InsertPts) {
DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>(
*NewPt->getParent()->getParent()).getDomTree();
@@ -391,7 +385,7 @@ bool AArch64PromoteConstant::tryAndMerge(Instruction *NewPt,
DEBUG(dbgs() << "Merge insertion point with:\n");
DEBUG(IPI->first->print(dbgs()));
DEBUG(dbgs() << "\nat considered insertion point.\n");
- appendAndTransferDominatedUses(NewPt, UseIt, IPI, InsertPts);
+ appendAndTransferDominatedUses(NewPt, Use, IPI, InsertPts);
return true;
}
@@ -415,7 +409,7 @@ bool AArch64PromoteConstant::tryAndMerge(Instruction *NewPt,
DEBUG(dbgs() << '\n');
DEBUG(NewPt->print(dbgs()));
DEBUG(dbgs() << '\n');
- appendAndTransferDominatedUses(NewPt, UseIt, IPI, InsertPts);
+ appendAndTransferDominatedUses(NewPt, Use, IPI, InsertPts);
return true;
}
return false;
@@ -424,22 +418,22 @@ bool AArch64PromoteConstant::tryAndMerge(Instruction *NewPt,
void AArch64PromoteConstant::computeInsertionPoints(
Constant *Val, InsertionPointsPerFunc &InsPtsPerFunc) {
DEBUG(dbgs() << "** Compute insertion points **\n");
- for (Value::user_iterator UseIt = Val->user_begin(),
- EndUseIt = Val->user_end();
- UseIt != EndUseIt; ++UseIt) {
+ for (Use &Use : Val->uses()) {
+ Instruction *User = dyn_cast<Instruction>(Use.getUser());
+
// If the user is not an Instruction, we cannot modify it.
- if (!isa<Instruction>(*UseIt))
+ if (!User)
continue;
// Filter out uses that should not be converted.
- if (!shouldConvertUse(Val, cast<Instruction>(*UseIt), UseIt.getOperandNo()))
+ if (!shouldConvertUse(Val, User, Use.getOperandNo()))
continue;
- DEBUG(dbgs() << "Considered use, opidx " << UseIt.getOperandNo() << ":\n");
- DEBUG((*UseIt)->print(dbgs()));
+ DEBUG(dbgs() << "Considered use, opidx " << Use.getOperandNo() << ":\n");
+ DEBUG(User->print(dbgs()));
DEBUG(dbgs() << '\n');
- Instruction *InsertionPoint = findInsertionPoint(UseIt);
+ Instruction *InsertionPoint = findInsertionPoint(Use);
DEBUG(dbgs() << "Considered insertion point:\n");
DEBUG(InsertionPoint->print(dbgs()));
@@ -449,17 +443,17 @@ void AArch64PromoteConstant::computeInsertionPoints(
// by another one.
InsertionPoints &InsertPts =
InsPtsPerFunc[InsertionPoint->getParent()->getParent()];
- if (isDominated(InsertionPoint, UseIt, InsertPts))
+ if (isDominated(InsertionPoint, Use, InsertPts))
continue;
// This insertion point is useful, check if we can merge some insertion
// point in a common dominator or if NewPt dominates an existing one.
- if (tryAndMerge(InsertionPoint, UseIt, InsertPts))
+ if (tryAndMerge(InsertionPoint, Use, InsertPts))
continue;
DEBUG(dbgs() << "Keep considered insertion point\n");
// It is definitely useful by its own
- InsertPts[InsertionPoint].push_back(UseIt);
+ InsertPts[InsertionPoint].push_back(&Use);
}
}
@@ -470,41 +464,32 @@ bool AArch64PromoteConstant::insertDefinitions(
bool HasChanged = false;
// Traverse all insertion points in all the function.
- for (InsertionPointsPerFunc::iterator FctToInstPtsIt = InsPtsPerFunc.begin(),
- EndIt = InsPtsPerFunc.end();
- FctToInstPtsIt != EndIt; ++FctToInstPtsIt) {
- InsertionPoints &InsertPts = FctToInstPtsIt->second;
+ for (const auto &FctToInstPtsIt : InsPtsPerFunc) {
+ const InsertionPoints &InsertPts = FctToInstPtsIt.second;
// Do more checking for debug purposes.
#ifndef NDEBUG
DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>(
- *FctToInstPtsIt->first).getDomTree();
+ *FctToInstPtsIt.first).getDomTree();
#endif
- GlobalVariable *PromotedGV;
assert(!InsertPts.empty() && "Empty uses does not need a definition");
- Module *M = FctToInstPtsIt->first->getParent();
- DenseMap<Module *, GlobalVariable *>::iterator MapIt =
- ModuleToMergedGV.find(M);
- if (MapIt == ModuleToMergedGV.end()) {
+ Module *M = FctToInstPtsIt.first->getParent();
+ GlobalVariable *&PromotedGV = ModuleToMergedGV[M];
+ if (!PromotedGV) {
PromotedGV = new GlobalVariable(
*M, Cst->getType(), true, GlobalValue::InternalLinkage, nullptr,
"_PromotedConst", nullptr, GlobalVariable::NotThreadLocal);
PromotedGV->setInitializer(Cst);
- ModuleToMergedGV[M] = PromotedGV;
DEBUG(dbgs() << "Global replacement: ");
DEBUG(PromotedGV->print(dbgs()));
DEBUG(dbgs() << '\n');
++NumPromoted;
HasChanged = true;
- } else {
- PromotedGV = MapIt->second;
}
- for (InsertionPoints::iterator IPI = InsertPts.begin(),
- EndIPI = InsertPts.end();
- IPI != EndIPI; ++IPI) {
+ for (const auto &IPI : InsertPts) {
// Create the load of the global variable.
- IRBuilder<> Builder(IPI->first->getParent(), IPI->first);
+ IRBuilder<> Builder(IPI.first->getParent(), IPI.first);
LoadInst *LoadedCst = Builder.CreateLoad(PromotedGV);
DEBUG(dbgs() << "**********\n");
DEBUG(dbgs() << "New def: ");
@@ -512,18 +497,15 @@ bool AArch64PromoteConstant::insertDefinitions(
DEBUG(dbgs() << '\n');
// Update the dominated uses.
- Users &DominatedUsers = IPI->second;
- for (Value::user_iterator Use : DominatedUsers) {
+ for (Use *Use : IPI.second) {
#ifndef NDEBUG
- assert((DT.dominates(LoadedCst, cast<Instruction>(*Use)) ||
- (isa<PHINode>(*Use) &&
- DT.dominates(LoadedCst, findInsertionPoint(Use)))) &&
+ assert(DT.dominates(LoadedCst, findInsertionPoint(*Use)) &&
"Inserted definition does not dominate all its uses!");
#endif
- DEBUG(dbgs() << "Use to update " << Use.getOperandNo() << ":");
- DEBUG(Use->print(dbgs()));
+ DEBUG(dbgs() << "Use to update " << Use->getOperandNo() << ":");
+ DEBUG(Use->getUser()->print(dbgs()));
DEBUG(dbgs() << '\n');
- Use->setOperand(Use.getOperandNo(), LoadedCst);
+ Use->set(LoadedCst);
++NumPromotedUses;
}
}
@@ -556,22 +538,19 @@ bool AArch64PromoteConstant::runOnFunction(Function &F) {
// global variable. Create as few loads of this variable as possible and
// update the uses accordingly.
bool LocalChange = false;
- SmallSet<Constant *, 8> AlreadyChecked;
-
- for (auto &MBB : F) {
- for (auto &MI : MBB) {
- // Traverse the operand, looking for constant vectors. Replace them by a
- // load of a global variable of constant vector type.
- for (unsigned OpIdx = 0, EndOpIdx = MI.getNumOperands();
- OpIdx != EndOpIdx; ++OpIdx) {
- Constant *Cst = dyn_cast<Constant>(MI.getOperand(OpIdx));
- // There is no point in promoting global values as they are already
- // global. Do not promote constant expressions either, as they may
- // require some code expansion.
- if (Cst && !isa<GlobalValue>(Cst) && !isa<ConstantExpr>(Cst) &&
- AlreadyChecked.insert(Cst).second)
- LocalChange |= promoteConstant(Cst);
- }
+ SmallPtrSet<Constant *, 8> AlreadyChecked;
+
+ for (Instruction &I : inst_range(&F)) {
+ // Traverse the operand, looking for constant vectors. Replace them by a
+ // load of a global variable of constant vector type.
+ for (Value *Op : I.operand_values()) {
+ Constant *Cst = dyn_cast<Constant>(Op);
+ // There is no point in promoting global values as they are already
+ // global. Do not promote constant expressions either, as they may
+ // require some code expansion.
+ if (Cst && !isa<GlobalValue>(Cst) && !isa<ConstantExpr>(Cst) &&
+ AlreadyChecked.insert(Cst).second)
+ LocalChange |= promoteConstant(Cst);
}
}
return LocalChange;
diff --git a/lib/Target/AArch64/AArch64RegisterInfo.cpp b/lib/Target/AArch64/AArch64RegisterInfo.cpp
index 206cdbb..1836682 100644
--- a/lib/Target/AArch64/AArch64RegisterInfo.cpp
+++ b/lib/Target/AArch64/AArch64RegisterInfo.cpp
@@ -18,6 +18,7 @@
#include "AArch64Subtarget.h"
#include "MCTargetDesc/AArch64AddressingModes.h"
#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/Triple.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
@@ -37,9 +38,8 @@ static cl::opt<bool>
ReserveX18("aarch64-reserve-x18", cl::Hidden,
cl::desc("Reserve X18, making it unavailable as GPR"));
-AArch64RegisterInfo::AArch64RegisterInfo(const AArch64InstrInfo *tii,
- const AArch64Subtarget *sti)
- : AArch64GenRegisterInfo(AArch64::LR), TII(tii), STI(sti) {}
+AArch64RegisterInfo::AArch64RegisterInfo(const Triple &TT)
+ : AArch64GenRegisterInfo(AArch64::LR), TT(TT) {}
const MCPhysReg *
AArch64RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
@@ -55,7 +55,8 @@ AArch64RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
}
const uint32_t *
-AArch64RegisterInfo::getCallPreservedMask(CallingConv::ID CC) const {
+AArch64RegisterInfo::getCallPreservedMask(const MachineFunction &MF,
+ CallingConv::ID CC) const {
if (CC == CallingConv::GHC)
// This is academic becase all GHC calls are (supposed to be) tail calls
return CSR_AArch64_NoRegs_RegMask;
@@ -66,15 +67,16 @@ AArch64RegisterInfo::getCallPreservedMask(CallingConv::ID CC) const {
}
const uint32_t *AArch64RegisterInfo::getTLSCallPreservedMask() const {
- if (STI->isTargetDarwin())
+ if (TT.isOSDarwin())
return CSR_AArch64_TLS_Darwin_RegMask;
- assert(STI->isTargetELF() && "only expect Darwin or ELF TLS");
+ assert(TT.isOSBinFormatELF() && "only expect Darwin or ELF TLS");
return CSR_AArch64_TLS_ELF_RegMask;
}
const uint32_t *
-AArch64RegisterInfo::getThisReturnPreservedMask(CallingConv::ID CC) const {
+AArch64RegisterInfo::getThisReturnPreservedMask(const MachineFunction &MF,
+ CallingConv::ID CC) const {
// This should return a register mask that is the same as that returned by
// getCallPreservedMask but that additionally preserves the register used for
// the first i64 argument (which must also be the register used to return a
@@ -97,12 +99,12 @@ AArch64RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
Reserved.set(AArch64::WSP);
Reserved.set(AArch64::WZR);
- if (TFI->hasFP(MF) || STI->isTargetDarwin()) {
+ if (TFI->hasFP(MF) || TT.isOSDarwin()) {
Reserved.set(AArch64::FP);
Reserved.set(AArch64::W29);
}
- if (STI->isTargetDarwin() || ReserveX18) {
+ if (TT.isOSDarwin() || ReserveX18) {
Reserved.set(AArch64::X18); // Platform register
Reserved.set(AArch64::W18);
}
@@ -129,10 +131,10 @@ bool AArch64RegisterInfo::isReservedReg(const MachineFunction &MF,
return true;
case AArch64::X18:
case AArch64::W18:
- return STI->isTargetDarwin() || ReserveX18;
+ return TT.isOSDarwin() || ReserveX18;
case AArch64::FP:
case AArch64::W29:
- return TFI->hasFP(MF) || STI->isTargetDarwin();
+ return TFI->hasFP(MF) || TT.isOSDarwin();
case AArch64::W19:
case AArch64::X19:
return hasBasePointer(MF);
@@ -163,7 +165,12 @@ bool AArch64RegisterInfo::hasBasePointer(const MachineFunction &MF) const {
// large enough that referencing from the FP won't result in things being
// in range relatively often, we can use a base pointer to allow access
// from the other direction like the SP normally works.
+ // Furthermore, if both variable sized objects are present, and the
+ // stack needs to be dynamically re-aligned, the base pointer is the only
+ // reliable way to reference the locals.
if (MFI->hasVarSizedObjects()) {
+ if (needsStackRealignment(MF))
+ return true;
// Conservatively estimate whether the negative offset from the frame
// pointer will be sufficient to reach. If a function has a smallish
// frame, it's less likely to have lots of spills and callee saved
@@ -179,6 +186,31 @@ bool AArch64RegisterInfo::hasBasePointer(const MachineFunction &MF) const {
return false;
}
+bool AArch64RegisterInfo::canRealignStack(const MachineFunction &MF) const {
+
+ if (MF.getFunction()->hasFnAttribute("no-realign-stack"))
+ return false;
+
+ return true;
+}
+
+// FIXME: share this with other backends with identical implementation?
+bool
+AArch64RegisterInfo::needsStackRealignment(const MachineFunction &MF) const {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ const Function *F = MF.getFunction();
+ unsigned StackAlign = MF.getTarget()
+ .getSubtargetImpl(*MF.getFunction())
+ ->getFrameLowering()
+ ->getStackAlignment();
+ bool requiresRealignment =
+ ((MFI->getMaxAlignment() > StackAlign) ||
+ F->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
+ Attribute::StackAlignment));
+
+ return requiresRealignment && canRealignStack(MF);
+}
+
unsigned
AArch64RegisterInfo::getFrameRegister(const MachineFunction &MF) const {
const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
@@ -269,7 +301,7 @@ bool AArch64RegisterInfo::needsFrameBaseReg(MachineInstr *MI,
// The FP is only available if there is no dynamic realignment. We
// don't know for sure yet whether we'll need that, so we guess based
// on whether there are any local variables that would trigger it.
- if (TFI->hasFP(MF) && isFrameOffsetLegal(MI, FPOffset))
+ if (TFI->hasFP(MF) && isFrameOffsetLegal(MI, AArch64::FP, FPOffset))
return false;
// If we can reference via the stack pointer or base pointer, try that.
@@ -277,7 +309,7 @@ bool AArch64RegisterInfo::needsFrameBaseReg(MachineInstr *MI,
// to only disallow SP relative references in the live range of
// the VLA(s). In practice, it's unclear how much difference that
// would make, but it may be worth doing.
- if (isFrameOffsetLegal(MI, Offset))
+ if (isFrameOffsetLegal(MI, AArch64::SP, Offset))
return false;
// The offset likely isn't legal; we want to allocate a virtual base register.
@@ -285,6 +317,7 @@ bool AArch64RegisterInfo::needsFrameBaseReg(MachineInstr *MI,
}
bool AArch64RegisterInfo::isFrameOffsetLegal(const MachineInstr *MI,
+ unsigned BaseReg,
int64_t Offset) const {
assert(Offset <= INT_MAX && "Offset too big to fit in int.");
assert(MI && "Unable to get the legal offset for nil instruction.");
@@ -302,10 +335,11 @@ void AArch64RegisterInfo::materializeFrameBaseRegister(MachineBasicBlock *MBB,
DebugLoc DL; // Defaults to "unknown"
if (Ins != MBB->end())
DL = Ins->getDebugLoc();
-
+ const MachineFunction &MF = *MBB->getParent();
+ const AArch64InstrInfo *TII =
+ MF.getSubtarget<AArch64Subtarget>().getInstrInfo();
const MCInstrDesc &MCID = TII->get(AArch64::ADDXri);
MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
- const MachineFunction &MF = *MBB->getParent();
MRI.constrainRegClass(BaseReg, TII->getRegClass(MCID, 0, this, MF));
unsigned Shifter = AArch64_AM::getShifterImm(AArch64_AM::LSL, 0);
@@ -324,6 +358,9 @@ void AArch64RegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg,
++i;
assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!");
}
+ const MachineFunction *MF = MI.getParent()->getParent();
+ const AArch64InstrInfo *TII =
+ MF->getSubtarget<AArch64Subtarget>().getInstrInfo();
bool Done = rewriteAArch64FrameIndex(MI, i, BaseReg, Off, TII);
assert(Done && "Unable to resolve frame index!");
(void)Done;
@@ -337,6 +374,8 @@ void AArch64RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
MachineInstr &MI = *II;
MachineBasicBlock &MBB = *MI.getParent();
MachineFunction &MF = *MBB.getParent();
+ const AArch64InstrInfo *TII =
+ MF.getSubtarget<AArch64Subtarget>().getInstrInfo();
const AArch64FrameLowering *TFI = static_cast<const AArch64FrameLowering *>(
MF.getSubtarget().getFrameLowering());
@@ -389,10 +428,10 @@ unsigned AArch64RegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
case AArch64::GPR64RegClassID:
case AArch64::GPR32commonRegClassID:
case AArch64::GPR64commonRegClassID:
- return 32 - 1 // XZR/SP
- - (TFI->hasFP(MF) || STI->isTargetDarwin()) // FP
- - (STI->isTargetDarwin() || ReserveX18) // X18 reserved as platform register
- - hasBasePointer(MF); // X19
+ return 32 - 1 // XZR/SP
+ - (TFI->hasFP(MF) || TT.isOSDarwin()) // FP
+ - (TT.isOSDarwin() || ReserveX18) // X18 reserved as platform register
+ - hasBasePointer(MF); // X19
case AArch64::FPR8RegClassID:
case AArch64::FPR16RegClassID:
case AArch64::FPR32RegClassID:
diff --git a/lib/Target/AArch64/AArch64RegisterInfo.h b/lib/Target/AArch64/AArch64RegisterInfo.h
index 51a5034..8c379d9 100644
--- a/lib/Target/AArch64/AArch64RegisterInfo.h
+++ b/lib/Target/AArch64/AArch64RegisterInfo.h
@@ -19,26 +19,24 @@
namespace llvm {
-class AArch64InstrInfo;
-class AArch64Subtarget;
class MachineFunction;
class RegScavenger;
class TargetRegisterClass;
+class Triple;
struct AArch64RegisterInfo : public AArch64GenRegisterInfo {
private:
- const AArch64InstrInfo *TII;
- const AArch64Subtarget *STI;
+ const Triple &TT;
public:
- AArch64RegisterInfo(const AArch64InstrInfo *tii, const AArch64Subtarget *sti);
+ AArch64RegisterInfo(const Triple &TT);
bool isReservedReg(const MachineFunction &MF, unsigned Reg) const;
/// Code Generation virtual methods...
- const MCPhysReg *
- getCalleeSavedRegs(const MachineFunction *MF = nullptr) const override;
- const uint32_t *getCallPreservedMask(CallingConv::ID) const override;
+ const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override;
+ const uint32_t *getCallPreservedMask(const MachineFunction &MF,
+ CallingConv::ID) const override;
unsigned getCSRFirstUseCost() const override {
// The cost will be compared against BlockFrequency where entry has the
@@ -59,7 +57,8 @@ public:
///
/// Should return NULL in the case that the calling convention does not have
/// this property
- const uint32_t *getThisReturnPreservedMask(CallingConv::ID) const;
+ const uint32_t *getThisReturnPreservedMask(const MachineFunction &MF,
+ CallingConv::ID) const;
BitVector getReservedRegs(const MachineFunction &MF) const override;
const TargetRegisterClass *
@@ -73,7 +72,7 @@ public:
bool requiresFrameIndexScavenging(const MachineFunction &MF) const override;
bool needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const override;
- bool isFrameOffsetLegal(const MachineInstr *MI,
+ bool isFrameOffsetLegal(const MachineInstr *MI, unsigned BaseReg,
int64_t Offset) const override;
void materializeFrameBaseRegister(MachineBasicBlock *MBB, unsigned BaseReg,
int FrameIdx,
@@ -94,6 +93,9 @@ public:
unsigned getRegPressureLimit(const TargetRegisterClass *RC,
MachineFunction &MF) const override;
+ // Base pointer (stack realignment) support.
+ bool canRealignStack(const MachineFunction &MF) const;
+ bool needsStackRealignment(const MachineFunction &MF) const override;
};
} // end namespace llvm
diff --git a/lib/Target/AArch64/AArch64SchedA57.td b/lib/Target/AArch64/AArch64SchedA57.td
index 3ec4157..ca4457a 100644
--- a/lib/Target/AArch64/AArch64SchedA57.td
+++ b/lib/Target/AArch64/AArch64SchedA57.td
@@ -60,7 +60,12 @@ include "AArch64SchedA57WriteRes.td"
// Cortex-A57. The Cortex-A57 types are directly associated with resources, so
// defining the aliases precludes the need for mapping them using WriteRes. The
// aliases are sufficient for creating a coarse, working model. As the model
-// evolves, InstRWs will be used to override these SchedAliases.
+// evolves, InstRWs will be used to override some of these SchedAliases.
+//
+// WARNING: Using SchedAliases is convenient and works well for latency and
+// resource lookup for instructions. However, this creates an entry in
+// AArch64WriteLatencyTable with a WriteResourceID of 0, breaking
+// any SchedReadAdvance since the lookup will fail.
def : SchedAlias<WriteImm, A57Write_1cyc_1I>;
def : SchedAlias<WriteI, A57Write_1cyc_1I>;
@@ -70,8 +75,8 @@ def : SchedAlias<WriteExtr, A57Write_1cyc_1I>;
def : SchedAlias<WriteIS, A57Write_1cyc_1I>;
def : SchedAlias<WriteID32, A57Write_19cyc_1M>;
def : SchedAlias<WriteID64, A57Write_35cyc_1M>;
-def : SchedAlias<WriteIM32, A57Write_3cyc_1M>;
-def : SchedAlias<WriteIM64, A57Write_5cyc_1M>;
+def : WriteRes<WriteIM32, [A57UnitM]> { let Latency = 3; }
+def : WriteRes<WriteIM64, [A57UnitM]> { let Latency = 5; }
def : SchedAlias<WriteBr, A57Write_1cyc_1B>;
def : SchedAlias<WriteBrReg, A57Write_1cyc_1B>;
def : SchedAlias<WriteLD, A57Write_4cyc_1L>;
@@ -127,6 +132,15 @@ def : InstRW<[A57Write_1cyc_1B_1I], (instrs BL)>;
def : InstRW<[A57Write_2cyc_1B_1I], (instrs BLR)>;
+// Shifted Register with Shift == 0
+// ----------------------------------------------------------------------------
+
+def A57WriteISReg : SchedWriteVariant<[
+ SchedVar<RegShiftedPred, [WriteISReg]>,
+ SchedVar<NoSchedPred, [WriteI]>]>;
+def : InstRW<[A57WriteISReg], (instregex ".*rs$")>;
+
+
// Divide and Multiply Instructions
// -----------------------------------------------------------------------------
diff --git a/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp b/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp
index 0cfd582..b9c5399 100644
--- a/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp
+++ b/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp
@@ -28,15 +28,14 @@ SDValue AArch64SelectionDAGInfo::EmitTargetCodeForMemset(
// Check to see if there is a specialized entry-point for memory zeroing.
ConstantSDNode *V = dyn_cast<ConstantSDNode>(Src);
ConstantSDNode *SizeValue = dyn_cast<ConstantSDNode>(Size);
+ const AArch64Subtarget &STI =
+ DAG.getMachineFunction().getSubtarget<AArch64Subtarget>();
const char *bzeroEntry =
- (V && V->isNullValue())
- ? DAG.getTarget().getSubtarget<AArch64Subtarget>().getBZeroEntry()
- : nullptr;
+ (V && V->isNullValue()) ? STI.getBZeroEntry() : nullptr;
// For small size (< 256), it is not beneficial to use bzero
// instead of memset.
if (bzeroEntry && (!SizeValue || SizeValue->getZExtValue() > 256)) {
- const AArch64TargetLowering &TLI =
- *DAG.getTarget().getSubtarget<AArch64Subtarget>().getTargetLowering();
+ const AArch64TargetLowering &TLI = *STI.getTargetLowering();
EVT IntPtr = TLI.getPointerTy();
Type *IntPtrTy = getDataLayout()->getIntPtrType(*DAG.getContext());
diff --git a/lib/Target/AArch64/AArch64StorePairSuppress.cpp b/lib/Target/AArch64/AArch64StorePairSuppress.cpp
index 0c36e8f..85b44a2 100644
--- a/lib/Target/AArch64/AArch64StorePairSuppress.cpp
+++ b/lib/Target/AArch64/AArch64StorePairSuppress.cpp
@@ -30,7 +30,6 @@ class AArch64StorePairSuppress : public MachineFunctionPass {
const AArch64InstrInfo *TII;
const TargetRegisterInfo *TRI;
const MachineRegisterInfo *MRI;
- MachineFunction *MF;
TargetSchedModel SchedModel;
MachineTraceMetrics *Traces;
MachineTraceMetrics::Ensemble *MinInstr;
@@ -115,20 +114,16 @@ bool AArch64StorePairSuppress::isNarrowFPStore(const MachineInstr &MI) {
}
}
-bool AArch64StorePairSuppress::runOnMachineFunction(MachineFunction &mf) {
- MF = &mf;
- TII =
- static_cast<const AArch64InstrInfo *>(MF->getSubtarget().getInstrInfo());
- TRI = MF->getSubtarget().getRegisterInfo();
- MRI = &MF->getRegInfo();
- const TargetSubtargetInfo &ST =
- MF->getTarget().getSubtarget<TargetSubtargetInfo>();
+bool AArch64StorePairSuppress::runOnMachineFunction(MachineFunction &MF) {
+ const TargetSubtargetInfo &ST = MF.getSubtarget();
+ TII = static_cast<const AArch64InstrInfo *>(ST.getInstrInfo());
+ TRI = ST.getRegisterInfo();
+ MRI = &MF.getRegInfo();
SchedModel.init(ST.getSchedModel(), &ST, TII);
-
Traces = &getAnalysis<MachineTraceMetrics>();
MinInstr = nullptr;
- DEBUG(dbgs() << "*** " << getPassName() << ": " << MF->getName() << '\n');
+ DEBUG(dbgs() << "*** " << getPassName() << ": " << MF.getName() << '\n');
if (!SchedModel.hasInstrSchedModel()) {
DEBUG(dbgs() << " Skipping pass: no machine model present.\n");
@@ -139,7 +134,7 @@ bool AArch64StorePairSuppress::runOnMachineFunction(MachineFunction &mf) {
// precisely determine whether a store pair can be formed. But we do want to
// filter out most situations where we can't form store pairs to avoid
// computing trace metrics in those cases.
- for (auto &MBB : *MF) {
+ for (auto &MBB : MF) {
bool SuppressSTP = false;
unsigned PrevBaseReg = 0;
for (auto &MI : MBB) {
diff --git a/lib/Target/AArch64/AArch64Subtarget.cpp b/lib/Target/AArch64/AArch64Subtarget.cpp
index 47b5d54..0b97af8 100644
--- a/lib/Target/AArch64/AArch64Subtarget.cpp
+++ b/lib/Target/AArch64/AArch64Subtarget.cpp
@@ -47,18 +47,12 @@ AArch64Subtarget::AArch64Subtarget(const std::string &TT,
const std::string &FS,
const TargetMachine &TM, bool LittleEndian)
: AArch64GenSubtargetInfo(TT, CPU, FS), ARMProcFamily(Others),
+ HasV8_1aOps(false),
HasFPARMv8(false), HasNEON(false), HasCrypto(false), HasCRC(false),
- HasZeroCycleRegMove(false), HasZeroCycleZeroing(false), CPUString(CPU),
- TargetTriple(TT),
- // This nested ternary is horrible, but DL needs to be properly
- // initialized
- // before TLInfo is constructed.
- DL(isTargetMachO()
- ? "e-m:o-i64:64-i128:128-n32:64-S128"
- : (LittleEndian ? "e-m:e-i64:64-i128:128-n32:64-S128"
- : "E-m:e-i64:64-i128:128-n32:64-S128")),
- FrameLowering(), InstrInfo(initializeSubtargetDependencies(FS)),
- TSInfo(&DL), TLInfo(TM) {}
+ HasZeroCycleRegMove(false), HasZeroCycleZeroing(false),
+ IsLittle(LittleEndian), CPUString(CPU), TargetTriple(TT), FrameLowering(),
+ InstrInfo(initializeSubtargetDependencies(FS)),
+ TSInfo(TM.getDataLayout()), TLInfo(TM, *this) {}
/// ClassifyGlobalReference - Find the target operand flags that describe
/// how a global value should be referenced for the current subtarget.
diff --git a/lib/Target/AArch64/AArch64Subtarget.h b/lib/Target/AArch64/AArch64Subtarget.h
index e2740f1..5454b20 100644
--- a/lib/Target/AArch64/AArch64Subtarget.h
+++ b/lib/Target/AArch64/AArch64Subtarget.h
@@ -37,6 +37,8 @@ protected:
/// ARMProcFamily - ARM processor family: Cortex-A53, Cortex-A57, and others.
ARMProcFamilyEnum ARMProcFamily;
+ bool HasV8_1aOps;
+
bool HasFPARMv8;
bool HasNEON;
bool HasCrypto;
@@ -48,13 +50,14 @@ protected:
// HasZeroCycleZeroing - Has zero-cycle zeroing instructions.
bool HasZeroCycleZeroing;
+ bool IsLittle;
+
/// CPUString - String name of used CPU.
std::string CPUString;
/// TargetTriple - What processor and OS we're targeting.
Triple TargetTriple;
- const DataLayout DL;
AArch64FrameLowering FrameLowering;
AArch64InstrInfo InstrInfo;
AArch64SelectionDAGInfo TSInfo;
@@ -82,15 +85,17 @@ public:
return &TLInfo;
}
const AArch64InstrInfo *getInstrInfo() const override { return &InstrInfo; }
- const DataLayout *getDataLayout() const override { return &DL; }
const AArch64RegisterInfo *getRegisterInfo() const override {
return &getInstrInfo()->getRegisterInfo();
}
+ const Triple &getTargetTriple() const { return TargetTriple; }
bool enableMachineScheduler() const override { return true; }
bool enablePostMachineScheduler() const override {
return isCortexA53() || isCortexA57();
}
+ bool hasV8_1aOps() const { return HasV8_1aOps; }
+
bool hasZeroCycleRegMove() const { return HasZeroCycleRegMove; }
bool hasZeroCycleZeroing() const { return HasZeroCycleZeroing; }
@@ -100,7 +105,7 @@ public:
bool hasCrypto() const { return HasCrypto; }
bool hasCRC() const { return HasCRC; }
- bool isLittleEndian() const { return DL.isLittleEndian(); }
+ bool isLittleEndian() const { return IsLittle; }
bool isTargetDarwin() const { return TargetTriple.isOSDarwin(); }
bool isTargetIOS() const { return TargetTriple.isiOS(); }
diff --git a/lib/Target/AArch64/AArch64TargetMachine.cpp b/lib/Target/AArch64/AArch64TargetMachine.cpp
index d4f19d2..a9059ab 100644
--- a/lib/Target/AArch64/AArch64TargetMachine.cpp
+++ b/lib/Target/AArch64/AArch64TargetMachine.cpp
@@ -13,10 +13,11 @@
#include "AArch64.h"
#include "AArch64TargetMachine.h"
#include "AArch64TargetObjectFile.h"
+#include "AArch64TargetTransformInfo.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/RegAllocRegistry.h"
#include "llvm/IR/Function.h"
-#include "llvm/PassManager.h"
+#include "llvm/IR/LegacyPassManager.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Target/TargetOptions.h"
@@ -84,7 +85,12 @@ EnableA53Fix835769("aarch64-fix-cortex-a53-835769", cl::Hidden,
static cl::opt<bool>
EnableGEPOpt("aarch64-gep-opt", cl::Hidden,
cl::desc("Enable optimizations on complex GEPs"),
- cl::init(true));
+ cl::init(false));
+
+// FIXME: Unify control over GlobalMerge.
+static cl::opt<cl::boolOrDefault>
+EnableGlobalMerge("aarch64-global-merge", cl::Hidden,
+ cl::desc("Enable the global merge pass"));
extern "C" void LLVMInitializeAArch64Target() {
// Register the target.
@@ -103,6 +109,16 @@ static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
return make_unique<AArch64_ELFTargetObjectFile>();
}
+// Helper function to build a DataLayout string
+static std::string computeDataLayout(StringRef TT, bool LittleEndian) {
+ Triple Triple(TT);
+ if (Triple.isOSBinFormatMachO())
+ return "e-m:o-i64:64-i128:128-n32:64-S128";
+ if (LittleEndian)
+ return "e-m:e-i64:64-i128:128-n32:64-S128";
+ return "E-m:e-i64:64-i128:128-n32:64-S128";
+}
+
/// TargetMachine ctor - Create an AArch64 architecture model.
///
AArch64TargetMachine::AArch64TargetMachine(const Target &T, StringRef TT,
@@ -111,9 +127,12 @@ AArch64TargetMachine::AArch64TargetMachine(const Target &T, StringRef TT,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL,
bool LittleEndian)
- : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
+ // This nested ternary is horrible, but DL needs to be properly
+ // initialized before TLInfo is constructed.
+ : LLVMTargetMachine(T, computeDataLayout(TT, LittleEndian), TT, CPU, FS,
+ Options, RM, CM, OL),
TLOF(createTLOF(Triple(getTargetTriple()))),
- Subtarget(TT, CPU, FS, *this, LittleEndian), isLittle(LittleEndian) {
+ isLittle(LittleEndian) {
initAsmInfo();
}
@@ -121,11 +140,8 @@ AArch64TargetMachine::~AArch64TargetMachine() {}
const AArch64Subtarget *
AArch64TargetMachine::getSubtargetImpl(const Function &F) const {
- AttributeSet FnAttrs = F.getAttributes();
- Attribute CPUAttr =
- FnAttrs.getAttribute(AttributeSet::FunctionIndex, "target-cpu");
- Attribute FSAttr =
- FnAttrs.getAttribute(AttributeSet::FunctionIndex, "target-features");
+ Attribute CPUAttr = F.getFnAttribute("target-cpu");
+ Attribute FSAttr = F.getFnAttribute("target-features");
std::string CPU = !CPUAttr.hasAttribute(Attribute::None)
? CPUAttr.getValueAsString().str()
@@ -188,12 +204,10 @@ public:
};
} // namespace
-void AArch64TargetMachine::addAnalysisPasses(PassManagerBase &PM) {
- // Add first the target-independent BasicTTI pass, then our AArch64 pass. This
- // allows the AArch64 pass to delegate to the target independent layer when
- // appropriate.
- PM.add(createBasicTargetTransformInfoPass(this));
- PM.add(createAArch64TargetTransformInfoPass(this));
+TargetIRAnalysis AArch64TargetMachine::getTargetIRAnalysis() {
+ return TargetIRAnalysis([this](Function &F) {
+ return TargetTransformInfo(AArch64TTIImpl(this, F));
+ });
}
TargetPassConfig *AArch64TargetMachine::createPassConfig(PassManagerBase &PM) {
@@ -233,8 +247,13 @@ bool AArch64PassConfig::addPreISel() {
// get a chance to be merged
if (TM->getOptLevel() != CodeGenOpt::None && EnablePromoteConstant)
addPass(createAArch64PromoteConstantPass());
- if (TM->getOptLevel() != CodeGenOpt::None)
- addPass(createGlobalMergePass(TM));
+ // FIXME: On AArch64, this depends on the type.
+ // Basically, the addressable offsets are up to 4095 * Ty.getSizeInBytes().
+ // and the offset has to be a multiple of the related size in bytes.
+ if ((TM->getOptLevel() == CodeGenOpt::Aggressive &&
+ EnableGlobalMerge == cl::BOU_UNSET) ||
+ EnableGlobalMerge == cl::BOU_TRUE)
+ addPass(createGlobalMergePass(TM, 4095));
if (TM->getOptLevel() != CodeGenOpt::None)
addPass(createAArch64AddressTypePromotionPass());
@@ -246,7 +265,7 @@ bool AArch64PassConfig::addInstSelector() {
// For ELF, cleanup any local-dynamic TLS accesses (i.e. combine as many
// references to _TLS_MODULE_BASE_ as possible.
- if (TM->getSubtarget<AArch64Subtarget>().isTargetELF() &&
+ if (Triple(TM->getTargetTriple()).isOSBinFormatELF() &&
getOptLevel() != CodeGenOpt::None)
addPass(createAArch64CleanupLocalDynamicTLSPass());
@@ -281,10 +300,7 @@ void AArch64PassConfig::addPostRegAlloc() {
// Change dead register definitions to refer to the zero register.
if (TM->getOptLevel() != CodeGenOpt::None && EnableDeadRegisterElimination)
addPass(createAArch64DeadRegisterDefinitions());
- if (TM->getOptLevel() != CodeGenOpt::None &&
- (TM->getSubtarget<AArch64Subtarget>().isCortexA53() ||
- TM->getSubtarget<AArch64Subtarget>().isCortexA57()) &&
- usingDefaultRegAlloc())
+ if (TM->getOptLevel() != CodeGenOpt::None && usingDefaultRegAlloc())
// Improve performance for some FP/SIMD code for A57.
addPass(createAArch64A57FPLoadBalancing());
}
@@ -304,6 +320,6 @@ void AArch64PassConfig::addPreEmitPass() {
// range of their destination.
addPass(createAArch64BranchRelaxation());
if (TM->getOptLevel() != CodeGenOpt::None && EnableCollectLOH &&
- TM->getSubtarget<AArch64Subtarget>().isTargetMachO())
+ Triple(TM->getTargetTriple()).isOSBinFormatMachO())
addPass(createAArch64CollectLOHPass());
}
diff --git a/lib/Target/AArch64/AArch64TargetMachine.h b/lib/Target/AArch64/AArch64TargetMachine.h
index 75c65c5..ec34fad 100644
--- a/lib/Target/AArch64/AArch64TargetMachine.h
+++ b/lib/Target/AArch64/AArch64TargetMachine.h
@@ -24,7 +24,6 @@ namespace llvm {
class AArch64TargetMachine : public LLVMTargetMachine {
protected:
std::unique_ptr<TargetLoweringObjectFile> TLOF;
- AArch64Subtarget Subtarget;
mutable StringMap<std::unique_ptr<AArch64Subtarget>> SubtargetMap;
public:
@@ -34,17 +33,13 @@ public:
CodeGenOpt::Level OL, bool IsLittleEndian);
~AArch64TargetMachine() override;
-
- const AArch64Subtarget *getSubtargetImpl() const override {
- return &Subtarget;
- }
const AArch64Subtarget *getSubtargetImpl(const Function &F) const override;
// Pass Pipeline Configuration
TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
- /// \brief Register AArch64 analysis passes with a pass manager.
- void addAnalysisPasses(PassManagerBase &PM) override;
+ /// \brief Get the TargetIRAnalysis for this target.
+ TargetIRAnalysis getTargetIRAnalysis() override;
TargetLoweringObjectFile* getObjFileLowering() const override {
return TLOF.get();
diff --git a/lib/Target/AArch64/AArch64TargetObjectFile.cpp b/lib/Target/AArch64/AArch64TargetObjectFile.cpp
index 4069038..299b4a5 100644
--- a/lib/Target/AArch64/AArch64TargetObjectFile.cpp
+++ b/lib/Target/AArch64/AArch64TargetObjectFile.cpp
@@ -13,6 +13,7 @@
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCValue.h"
#include "llvm/Support/Dwarf.h"
using namespace llvm;
using namespace dwarf;
@@ -23,6 +24,11 @@ void AArch64_ELFTargetObjectFile::Initialize(MCContext &Ctx,
InitializeELF(TM.Options.UseInitArray);
}
+AArch64_MachoTargetObjectFile::AArch64_MachoTargetObjectFile()
+ : TargetLoweringObjectFileMachO() {
+ SupportGOTPCRelWithOffset = false;
+}
+
const MCExpr *AArch64_MachoTargetObjectFile::getTTypeGlobalReference(
const GlobalValue *GV, unsigned Encoding, Mangler &Mang,
const TargetMachine &TM, MachineModuleInfo *MMI,
@@ -35,7 +41,7 @@ const MCExpr *AArch64_MachoTargetObjectFile::getTTypeGlobalReference(
const MCSymbol *Sym = TM.getSymbol(GV, Mang);
const MCExpr *Res =
MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_GOT, getContext());
- MCSymbol *PCSym = getContext().CreateTempSymbol();
+ MCSymbol *PCSym = getContext().createTempSymbol();
Streamer.EmitLabel(PCSym);
const MCExpr *PC = MCSymbolRefExpr::Create(PCSym, getContext());
return MCBinaryExpr::CreateSub(Res, PC, getContext());
@@ -50,3 +56,18 @@ MCSymbol *AArch64_MachoTargetObjectFile::getCFIPersonalitySymbol(
MachineModuleInfo *MMI) const {
return TM.getSymbol(GV, Mang);
}
+
+const MCExpr *AArch64_MachoTargetObjectFile::getIndirectSymViaGOTPCRel(
+ const MCSymbol *Sym, const MCValue &MV, int64_t Offset,
+ MachineModuleInfo *MMI, MCStreamer &Streamer) const {
+ assert((Offset+MV.getConstant() == 0) &&
+ "Arch64 does not support GOT PC rel with extra offset");
+ // On ARM64 Darwin, we can reference symbols with foo@GOT-., which
+ // is an indirect pc-relative reference.
+ const MCExpr *Res =
+ MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_GOT, getContext());
+ MCSymbol *PCSym = getContext().createTempSymbol();
+ Streamer.EmitLabel(PCSym);
+ const MCExpr *PC = MCSymbolRefExpr::Create(PCSym, getContext());
+ return MCBinaryExpr::CreateSub(Res, PC, getContext());
+}
diff --git a/lib/Target/AArch64/AArch64TargetObjectFile.h b/lib/Target/AArch64/AArch64TargetObjectFile.h
index 2e595f9..d41f445 100644
--- a/lib/Target/AArch64/AArch64TargetObjectFile.h
+++ b/lib/Target/AArch64/AArch64TargetObjectFile.h
@@ -24,6 +24,8 @@ class AArch64_ELFTargetObjectFile : public TargetLoweringObjectFileELF {
/// AArch64_MachoTargetObjectFile - This TLOF implementation is used for Darwin.
class AArch64_MachoTargetObjectFile : public TargetLoweringObjectFileMachO {
public:
+ AArch64_MachoTargetObjectFile();
+
const MCExpr *getTTypeGlobalReference(const GlobalValue *GV,
unsigned Encoding, Mangler &Mang,
const TargetMachine &TM,
@@ -33,6 +35,11 @@ public:
MCSymbol *getCFIPersonalitySymbol(const GlobalValue *GV, Mangler &Mang,
const TargetMachine &TM,
MachineModuleInfo *MMI) const override;
+
+ const MCExpr *getIndirectSymViaGOTPCRel(const MCSymbol *Sym,
+ const MCValue &MV, int64_t Offset,
+ MachineModuleInfo *MMI,
+ MCStreamer &Streamer) const override;
};
} // end namespace llvm
diff --git a/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index b1a2914..ed27cf8 100644
--- a/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -1,4 +1,4 @@
-//===-- AArch64TargetTransformInfo.cpp - AArch64 specific TTI pass --------===//
+//===-- AArch64TargetTransformInfo.cpp - AArch64 specific TTI -------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -6,18 +6,12 @@
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
-/// \file
-/// This file implements a TargetTransformInfo analysis pass specific to the
-/// AArch64 target machine. It uses the target's detailed information to provide
-/// more precise answers to certain TTI queries, while letting the target
-/// independent and default TTI implementations handle the rest.
-///
-//===----------------------------------------------------------------------===//
-#include "AArch64.h"
-#include "AArch64TargetMachine.h"
+#include "AArch64TargetTransformInfo.h"
#include "MCTargetDesc/AArch64AddressingModes.h"
#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/CodeGen/BasicTTIImpl.h"
#include "llvm/Support/Debug.h"
#include "llvm/Target/CostTable.h"
#include "llvm/Target/TargetLowering.h"
@@ -26,130 +20,10 @@ using namespace llvm;
#define DEBUG_TYPE "aarch64tti"
-// Declare the pass initialization routine locally as target-specific passes
-// don't have a target-wide initialization entry point, and so we rely on the
-// pass constructor initialization.
-namespace llvm {
-void initializeAArch64TTIPass(PassRegistry &);
-}
-
-namespace {
-
-class AArch64TTI final : public ImmutablePass, public TargetTransformInfo {
- const AArch64TargetMachine *TM;
- const AArch64Subtarget *ST;
- const AArch64TargetLowering *TLI;
-
- /// Estimate the overhead of scalarizing an instruction. Insert and Extract
- /// are set if the result needs to be inserted and/or extracted from vectors.
- unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const;
-
-public:
- AArch64TTI() : ImmutablePass(ID), TM(nullptr), ST(nullptr), TLI(nullptr) {
- llvm_unreachable("This pass cannot be directly constructed");
- }
-
- AArch64TTI(const AArch64TargetMachine *TM)
- : ImmutablePass(ID), TM(TM), ST(TM->getSubtargetImpl()),
- TLI(TM->getSubtargetImpl()->getTargetLowering()) {
- initializeAArch64TTIPass(*PassRegistry::getPassRegistry());
- }
-
- void initializePass() override { pushTTIStack(this); }
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- TargetTransformInfo::getAnalysisUsage(AU);
- }
-
- /// Pass identification.
- static char ID;
-
- /// Provide necessary pointer adjustments for the two base classes.
- void *getAdjustedAnalysisPointer(const void *ID) override {
- if (ID == &TargetTransformInfo::ID)
- return (TargetTransformInfo *)this;
- return this;
- }
-
- /// \name Scalar TTI Implementations
- /// @{
- unsigned getIntImmCost(int64_t Val) const;
- unsigned getIntImmCost(const APInt &Imm, Type *Ty) const override;
- unsigned getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
- Type *Ty) const override;
- unsigned getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
- Type *Ty) const override;
- PopcntSupportKind getPopcntSupport(unsigned TyWidth) const override;
-
- /// @}
-
- /// \name Vector TTI Implementations
- /// @{
-
- unsigned getNumberOfRegisters(bool Vector) const override {
- if (Vector) {
- if (ST->hasNEON())
- return 32;
- return 0;
- }
- return 31;
- }
-
- unsigned getRegisterBitWidth(bool Vector) const override {
- if (Vector) {
- if (ST->hasNEON())
- return 128;
- return 0;
- }
- return 64;
- }
-
- unsigned getMaxInterleaveFactor() const override;
-
- unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) const
- override;
-
- unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) const
- override;
-
- unsigned getArithmeticInstrCost(
- unsigned Opcode, Type *Ty, OperandValueKind Opd1Info = OK_AnyValue,
- OperandValueKind Opd2Info = OK_AnyValue,
- OperandValueProperties Opd1PropInfo = OP_None,
- OperandValueProperties Opd2PropInfo = OP_None) const override;
-
- unsigned getAddressComputationCost(Type *Ty, bool IsComplex) const override;
-
- unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy) const
- override;
-
- unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
- unsigned AddressSpace) const override;
-
- unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type*> Tys) const override;
-
- void getUnrollingPreferences(const Function *F, Loop *L,
- UnrollingPreferences &UP) const override;
-
-
- /// @}
-};
-
-} // end anonymous namespace
-
-INITIALIZE_AG_PASS(AArch64TTI, TargetTransformInfo, "aarch64tti",
- "AArch64 Target Transform Info", true, true, false)
-char AArch64TTI::ID = 0;
-
-ImmutablePass *
-llvm::createAArch64TargetTransformInfoPass(const AArch64TargetMachine *TM) {
- return new AArch64TTI(TM);
-}
-
/// \brief Calculate the cost of materializing a 64-bit value. This helper
/// method might only calculate a fraction of a larger immediate. Therefore it
/// is valid to return a cost of ZERO.
-unsigned AArch64TTI::getIntImmCost(int64_t Val) const {
+unsigned AArch64TTIImpl::getIntImmCost(int64_t Val) {
// Check if the immediate can be encoded within an instruction.
if (Val == 0 || AArch64_AM::isLogicalImmediate(Val, 64))
return 0;
@@ -163,7 +37,7 @@ unsigned AArch64TTI::getIntImmCost(int64_t Val) const {
}
/// \brief Calculate the cost of materializing the given constant.
-unsigned AArch64TTI::getIntImmCost(const APInt &Imm, Type *Ty) const {
+unsigned AArch64TTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) {
assert(Ty->isIntegerTy());
unsigned BitSize = Ty->getPrimitiveSizeInBits();
@@ -187,25 +61,25 @@ unsigned AArch64TTI::getIntImmCost(const APInt &Imm, Type *Ty) const {
return std::max(1U, Cost);
}
-unsigned AArch64TTI::getIntImmCost(unsigned Opcode, unsigned Idx,
- const APInt &Imm, Type *Ty) const {
+unsigned AArch64TTIImpl::getIntImmCost(unsigned Opcode, unsigned Idx,
+ const APInt &Imm, Type *Ty) {
assert(Ty->isIntegerTy());
unsigned BitSize = Ty->getPrimitiveSizeInBits();
// There is no cost model for constants with a bit size of 0. Return TCC_Free
// here, so that constant hoisting will ignore this constant.
if (BitSize == 0)
- return TCC_Free;
+ return TTI::TCC_Free;
unsigned ImmIdx = ~0U;
switch (Opcode) {
default:
- return TCC_Free;
+ return TTI::TCC_Free;
case Instruction::GetElementPtr:
// Always hoist the base address of a GetElementPtr.
if (Idx == 0)
- return 2 * TCC_Basic;
- return TCC_Free;
+ return 2 * TTI::TCC_Basic;
+ return TTI::TCC_Free;
case Instruction::Store:
ImmIdx = 0;
break;
@@ -227,7 +101,7 @@ unsigned AArch64TTI::getIntImmCost(unsigned Opcode, unsigned Idx,
case Instruction::LShr:
case Instruction::AShr:
if (Idx == 1)
- return TCC_Free;
+ return TTI::TCC_Free;
break;
case Instruction::Trunc:
case Instruction::ZExt:
@@ -245,26 +119,27 @@ unsigned AArch64TTI::getIntImmCost(unsigned Opcode, unsigned Idx,
if (Idx == ImmIdx) {
unsigned NumConstants = (BitSize + 63) / 64;
- unsigned Cost = AArch64TTI::getIntImmCost(Imm, Ty);
- return (Cost <= NumConstants * TCC_Basic)
- ? static_cast<unsigned>(TCC_Free) : Cost;
+ unsigned Cost = AArch64TTIImpl::getIntImmCost(Imm, Ty);
+ return (Cost <= NumConstants * TTI::TCC_Basic)
+ ? static_cast<unsigned>(TTI::TCC_Free)
+ : Cost;
}
- return AArch64TTI::getIntImmCost(Imm, Ty);
+ return AArch64TTIImpl::getIntImmCost(Imm, Ty);
}
-unsigned AArch64TTI::getIntImmCost(Intrinsic::ID IID, unsigned Idx,
- const APInt &Imm, Type *Ty) const {
+unsigned AArch64TTIImpl::getIntImmCost(Intrinsic::ID IID, unsigned Idx,
+ const APInt &Imm, Type *Ty) {
assert(Ty->isIntegerTy());
unsigned BitSize = Ty->getPrimitiveSizeInBits();
// There is no cost model for constants with a bit size of 0. Return TCC_Free
// here, so that constant hoisting will ignore this constant.
if (BitSize == 0)
- return TCC_Free;
+ return TTI::TCC_Free;
switch (IID) {
default:
- return TCC_Free;
+ return TTI::TCC_Free;
case Intrinsic::sadd_with_overflow:
case Intrinsic::uadd_with_overflow:
case Intrinsic::ssub_with_overflow:
@@ -273,35 +148,36 @@ unsigned AArch64TTI::getIntImmCost(Intrinsic::ID IID, unsigned Idx,
case Intrinsic::umul_with_overflow:
if (Idx == 1) {
unsigned NumConstants = (BitSize + 63) / 64;
- unsigned Cost = AArch64TTI::getIntImmCost(Imm, Ty);
- return (Cost <= NumConstants * TCC_Basic)
- ? static_cast<unsigned>(TCC_Free) : Cost;
+ unsigned Cost = AArch64TTIImpl::getIntImmCost(Imm, Ty);
+ return (Cost <= NumConstants * TTI::TCC_Basic)
+ ? static_cast<unsigned>(TTI::TCC_Free)
+ : Cost;
}
break;
case Intrinsic::experimental_stackmap:
if ((Idx < 2) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
- return TCC_Free;
+ return TTI::TCC_Free;
break;
case Intrinsic::experimental_patchpoint_void:
case Intrinsic::experimental_patchpoint_i64:
if ((Idx < 4) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
- return TCC_Free;
+ return TTI::TCC_Free;
break;
}
- return AArch64TTI::getIntImmCost(Imm, Ty);
+ return AArch64TTIImpl::getIntImmCost(Imm, Ty);
}
-AArch64TTI::PopcntSupportKind
-AArch64TTI::getPopcntSupport(unsigned TyWidth) const {
+TargetTransformInfo::PopcntSupportKind
+AArch64TTIImpl::getPopcntSupport(unsigned TyWidth) {
assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");
if (TyWidth == 32 || TyWidth == 64)
- return PSK_FastHardware;
+ return TTI::PSK_FastHardware;
// TODO: AArch64TargetLowering::LowerCTPOP() supports 128bit popcount.
- return PSK_Software;
+ return TTI::PSK_Software;
}
-unsigned AArch64TTI::getCastInstrCost(unsigned Opcode, Type *Dst,
- Type *Src) const {
+unsigned AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
+ Type *Src) {
int ISD = TLI->InstructionOpcodeToISD(Opcode);
assert(ISD && "Invalid opcode");
@@ -309,7 +185,7 @@ unsigned AArch64TTI::getCastInstrCost(unsigned Opcode, Type *Dst,
EVT DstTy = TLI->getValueType(Dst);
if (!SrcTy.isSimple() || !DstTy.isSimple())
- return TargetTransformInfo::getCastInstrCost(Opcode, Dst, Src);
+ return BaseT::getCastInstrCost(Opcode, Dst, Src);
static const TypeConversionCostTblEntry<MVT> ConversionTbl[] = {
// LowerVectorINT_TO_FP:
@@ -380,11 +256,11 @@ unsigned AArch64TTI::getCastInstrCost(unsigned Opcode, Type *Dst,
if (Idx != -1)
return ConversionTbl[Idx].Cost;
- return TargetTransformInfo::getCastInstrCost(Opcode, Dst, Src);
+ return BaseT::getCastInstrCost(Opcode, Dst, Src);
}
-unsigned AArch64TTI::getVectorInstrCost(unsigned Opcode, Type *Val,
- unsigned Index) const {
+unsigned AArch64TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
+ unsigned Index) {
assert(Val->isVectorTy() && "This must be a vector type");
if (Index != -1U) {
@@ -408,10 +284,10 @@ unsigned AArch64TTI::getVectorInstrCost(unsigned Opcode, Type *Val,
return 2;
}
-unsigned AArch64TTI::getArithmeticInstrCost(
- unsigned Opcode, Type *Ty, OperandValueKind Opd1Info,
- OperandValueKind Opd2Info, OperandValueProperties Opd1PropInfo,
- OperandValueProperties Opd2PropInfo) const {
+unsigned AArch64TTIImpl::getArithmeticInstrCost(
+ unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info,
+ TTI::OperandValueKind Opd2Info, TTI::OperandValueProperties Opd1PropInfo,
+ TTI::OperandValueProperties Opd2PropInfo) {
// Legalize the type.
std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Ty);
@@ -442,8 +318,8 @@ unsigned AArch64TTI::getArithmeticInstrCost(
switch (ISD) {
default:
- return TargetTransformInfo::getArithmeticInstrCost(
- Opcode, Ty, Opd1Info, Opd2Info, Opd1PropInfo, Opd2PropInfo);
+ return BaseT::getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info,
+ Opd1PropInfo, Opd2PropInfo);
case ISD::ADD:
case ISD::MUL:
case ISD::XOR:
@@ -455,7 +331,7 @@ unsigned AArch64TTI::getArithmeticInstrCost(
}
}
-unsigned AArch64TTI::getAddressComputationCost(Type *Ty, bool IsComplex) const {
+unsigned AArch64TTIImpl::getAddressComputationCost(Type *Ty, bool IsComplex) {
// Address computations in vectorized code with non-consecutive addresses will
// likely result in more instructions compared to scalar code where the
// computation can more often be merged into the index mode. The resulting
@@ -470,14 +346,14 @@ unsigned AArch64TTI::getAddressComputationCost(Type *Ty, bool IsComplex) const {
return 1;
}
-unsigned AArch64TTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
- Type *CondTy) const {
+unsigned AArch64TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
+ Type *CondTy) {
int ISD = TLI->InstructionOpcodeToISD(Opcode);
// We don't lower vector selects well that are wider than the register width.
if (ValTy->isVectorTy() && ISD == ISD::SELECT) {
// We would need this many instructions to hide the scalarization happening.
- unsigned AmortizationCost = 20;
+ const unsigned AmortizationCost = 20;
static const TypeConversionCostTblEntry<MVT::SimpleValueType>
VectorSelectTbl[] = {
{ ISD::SELECT, MVT::v16i1, MVT::v16i16, 16 * AmortizationCost },
@@ -498,12 +374,12 @@ unsigned AArch64TTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
return VectorSelectTbl[Idx].Cost;
}
}
- return TargetTransformInfo::getCmpSelInstrCost(Opcode, ValTy, CondTy);
+ return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy);
}
-unsigned AArch64TTI::getMemoryOpCost(unsigned Opcode, Type *Src,
- unsigned Alignment,
- unsigned AddressSpace) const {
+unsigned AArch64TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
+ unsigned Alignment,
+ unsigned AddressSpace) {
std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Src);
if (Opcode == Instruction::Store && Src->isVectorTy() && Alignment != 16 &&
@@ -531,7 +407,7 @@ unsigned AArch64TTI::getMemoryOpCost(unsigned Opcode, Type *Src,
return LT.first;
}
-unsigned AArch64TTI::getCostOfKeepingLiveOverCall(ArrayRef<Type*> Tys) const {
+unsigned AArch64TTIImpl::getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) {
unsigned Cost = 0;
for (auto *I : Tys) {
if (!I->isVectorTy())
@@ -543,14 +419,103 @@ unsigned AArch64TTI::getCostOfKeepingLiveOverCall(ArrayRef<Type*> Tys) const {
return Cost;
}
-unsigned AArch64TTI::getMaxInterleaveFactor() const {
+unsigned AArch64TTIImpl::getMaxInterleaveFactor(unsigned VF) {
if (ST->isCortexA57())
return 4;
return 2;
}
-void AArch64TTI::getUnrollingPreferences(const Function *F, Loop *L,
- UnrollingPreferences &UP) const {
+void AArch64TTIImpl::getUnrollingPreferences(Loop *L,
+ TTI::UnrollingPreferences &UP) {
+ // Enable partial unrolling and runtime unrolling.
+ BaseT::getUnrollingPreferences(L, UP);
+
+ // For inner loop, it is more likely to be a hot one, and the runtime check
+ // can be promoted out from LICM pass, so the overhead is less, let's try
+ // a larger threshold to unroll more loops.
+ if (L->getLoopDepth() > 1)
+ UP.PartialThreshold *= 2;
+
// Disable partial & runtime unrolling on -Os.
UP.PartialOptSizeThreshold = 0;
}
+
+Value *AArch64TTIImpl::getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
+ Type *ExpectedType) {
+ switch (Inst->getIntrinsicID()) {
+ default:
+ return nullptr;
+ case Intrinsic::aarch64_neon_st2:
+ case Intrinsic::aarch64_neon_st3:
+ case Intrinsic::aarch64_neon_st4: {
+ // Create a struct type
+ StructType *ST = dyn_cast<StructType>(ExpectedType);
+ if (!ST)
+ return nullptr;
+ unsigned NumElts = Inst->getNumArgOperands() - 1;
+ if (ST->getNumElements() != NumElts)
+ return nullptr;
+ for (unsigned i = 0, e = NumElts; i != e; ++i) {
+ if (Inst->getArgOperand(i)->getType() != ST->getElementType(i))
+ return nullptr;
+ }
+ Value *Res = UndefValue::get(ExpectedType);
+ IRBuilder<> Builder(Inst);
+ for (unsigned i = 0, e = NumElts; i != e; ++i) {
+ Value *L = Inst->getArgOperand(i);
+ Res = Builder.CreateInsertValue(Res, L, i);
+ }
+ return Res;
+ }
+ case Intrinsic::aarch64_neon_ld2:
+ case Intrinsic::aarch64_neon_ld3:
+ case Intrinsic::aarch64_neon_ld4:
+ if (Inst->getType() == ExpectedType)
+ return Inst;
+ return nullptr;
+ }
+}
+
+bool AArch64TTIImpl::getTgtMemIntrinsic(IntrinsicInst *Inst,
+ MemIntrinsicInfo &Info) {
+ switch (Inst->getIntrinsicID()) {
+ default:
+ break;
+ case Intrinsic::aarch64_neon_ld2:
+ case Intrinsic::aarch64_neon_ld3:
+ case Intrinsic::aarch64_neon_ld4:
+ Info.ReadMem = true;
+ Info.WriteMem = false;
+ Info.Vol = false;
+ Info.NumMemRefs = 1;
+ Info.PtrVal = Inst->getArgOperand(0);
+ break;
+ case Intrinsic::aarch64_neon_st2:
+ case Intrinsic::aarch64_neon_st3:
+ case Intrinsic::aarch64_neon_st4:
+ Info.ReadMem = false;
+ Info.WriteMem = true;
+ Info.Vol = false;
+ Info.NumMemRefs = 1;
+ Info.PtrVal = Inst->getArgOperand(Inst->getNumArgOperands() - 1);
+ break;
+ }
+
+ switch (Inst->getIntrinsicID()) {
+ default:
+ return false;
+ case Intrinsic::aarch64_neon_ld2:
+ case Intrinsic::aarch64_neon_st2:
+ Info.MatchingId = VECTOR_LDST_TWO_ELEMENTS;
+ break;
+ case Intrinsic::aarch64_neon_ld3:
+ case Intrinsic::aarch64_neon_st3:
+ Info.MatchingId = VECTOR_LDST_THREE_ELEMENTS;
+ break;
+ case Intrinsic::aarch64_neon_ld4:
+ case Intrinsic::aarch64_neon_st4:
+ Info.MatchingId = VECTOR_LDST_FOUR_ELEMENTS;
+ break;
+ }
+ return true;
+}
diff --git a/lib/Target/AArch64/AArch64TargetTransformInfo.h b/lib/Target/AArch64/AArch64TargetTransformInfo.h
new file mode 100644
index 0000000..25c22bc
--- /dev/null
+++ b/lib/Target/AArch64/AArch64TargetTransformInfo.h
@@ -0,0 +1,147 @@
+//===-- AArch64TargetTransformInfo.h - AArch64 specific TTI -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file a TargetTransformInfo::Concept conforming object specific to the
+/// AArch64 target machine. It uses the target's detailed information to
+/// provide more precise answers to certain TTI queries, while letting the
+/// target independent and default TTI implementations handle the rest.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_AARCH64_AARCH64TARGETTRANSFORMINFO_H
+#define LLVM_LIB_TARGET_AARCH64_AARCH64TARGETTRANSFORMINFO_H
+
+#include "AArch64.h"
+#include "AArch64TargetMachine.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/CodeGen/BasicTTIImpl.h"
+#include "llvm/Target/TargetLowering.h"
+#include <algorithm>
+
+namespace llvm {
+
+class AArch64TTIImpl : public BasicTTIImplBase<AArch64TTIImpl> {
+ typedef BasicTTIImplBase<AArch64TTIImpl> BaseT;
+ typedef TargetTransformInfo TTI;
+ friend BaseT;
+
+ const AArch64TargetMachine *TM;
+ const AArch64Subtarget *ST;
+ const AArch64TargetLowering *TLI;
+
+ /// Estimate the overhead of scalarizing an instruction. Insert and Extract
+ /// are set if the result needs to be inserted and/or extracted from vectors.
+ unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract);
+
+ const AArch64Subtarget *getST() const { return ST; }
+ const AArch64TargetLowering *getTLI() const { return TLI; }
+
+ enum MemIntrinsicType {
+ VECTOR_LDST_TWO_ELEMENTS,
+ VECTOR_LDST_THREE_ELEMENTS,
+ VECTOR_LDST_FOUR_ELEMENTS
+ };
+
+public:
+ explicit AArch64TTIImpl(const AArch64TargetMachine *TM, Function &F)
+ : BaseT(TM), TM(TM), ST(TM->getSubtargetImpl(F)),
+ TLI(ST->getTargetLowering()) {}
+
+ // Provide value semantics. MSVC requires that we spell all of these out.
+ AArch64TTIImpl(const AArch64TTIImpl &Arg)
+ : BaseT(static_cast<const BaseT &>(Arg)), TM(Arg.TM), ST(Arg.ST),
+ TLI(Arg.TLI) {}
+ AArch64TTIImpl(AArch64TTIImpl &&Arg)
+ : BaseT(std::move(static_cast<BaseT &>(Arg))), TM(std::move(Arg.TM)),
+ ST(std::move(Arg.ST)), TLI(std::move(Arg.TLI)) {}
+ AArch64TTIImpl &operator=(const AArch64TTIImpl &RHS) {
+ BaseT::operator=(static_cast<const BaseT &>(RHS));
+ TM = RHS.TM;
+ ST = RHS.ST;
+ TLI = RHS.TLI;
+ return *this;
+ }
+ AArch64TTIImpl &operator=(AArch64TTIImpl &&RHS) {
+ BaseT::operator=(std::move(static_cast<BaseT &>(RHS)));
+ TM = std::move(RHS.TM);
+ ST = std::move(RHS.ST);
+ TLI = std::move(RHS.TLI);
+ return *this;
+ }
+
+ /// \name Scalar TTI Implementations
+ /// @{
+
+ using BaseT::getIntImmCost;
+ unsigned getIntImmCost(int64_t Val);
+ unsigned getIntImmCost(const APInt &Imm, Type *Ty);
+ unsigned getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
+ Type *Ty);
+ unsigned getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
+ Type *Ty);
+ TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth);
+
+ /// @}
+
+ /// \name Vector TTI Implementations
+ /// @{
+
+ unsigned getNumberOfRegisters(bool Vector) {
+ if (Vector) {
+ if (ST->hasNEON())
+ return 32;
+ return 0;
+ }
+ return 31;
+ }
+
+ unsigned getRegisterBitWidth(bool Vector) {
+ if (Vector) {
+ if (ST->hasNEON())
+ return 128;
+ return 0;
+ }
+ return 64;
+ }
+
+ unsigned getMaxInterleaveFactor(unsigned VF);
+
+ unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src);
+
+ unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index);
+
+ unsigned getArithmeticInstrCost(
+ unsigned Opcode, Type *Ty,
+ TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
+ TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
+ TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
+ TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None);
+
+ unsigned getAddressComputationCost(Type *Ty, bool IsComplex);
+
+ unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy);
+
+ unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
+ unsigned AddressSpace);
+
+ unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys);
+
+ void getUnrollingPreferences(Loop *L, TTI::UnrollingPreferences &UP);
+
+ Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
+ Type *ExpectedType);
+
+ bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info);
+
+ /// @}
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
index 8eb906b..38d34e6 100644
--- a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
+++ b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
@@ -9,6 +9,7 @@
#include "MCTargetDesc/AArch64AddressingModes.h"
#include "MCTargetDesc/AArch64MCExpr.h"
+#include "MCTargetDesc/AArch64TargetStreamer.h"
#include "Utils/AArch64BaseInfo.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/STLExtras.h"
@@ -113,11 +114,10 @@ public:
#define GET_OPERAND_DIAGNOSTIC_TYPES
#include "AArch64GenAsmMatcher.inc"
};
- AArch64AsmParser(MCSubtargetInfo &_STI, MCAsmParser &_Parser,
- const MCInstrInfo &MII,
- const MCTargetOptions &Options)
- : MCTargetAsmParser(), STI(_STI) {
- MCAsmParserExtension::Initialize(_Parser);
+ AArch64AsmParser(MCSubtargetInfo &STI, MCAsmParser &Parser,
+ const MCInstrInfo &MII, const MCTargetOptions &Options)
+ : MCTargetAsmParser(), STI(STI) {
+ MCAsmParserExtension::Initialize(Parser);
MCStreamer &S = getParser().getStreamer();
if (S.getTargetStreamer() == nullptr)
new AArch64TargetStreamer(S);
@@ -205,14 +205,16 @@ private:
struct BarrierOp {
unsigned Val; // Not the enum since not all values have names.
+ const char *Data;
+ unsigned Length;
};
struct SysRegOp {
const char *Data;
unsigned Length;
- uint64_t FeatureBits; // We need to pass through information about which
- // core we are compiling for so that the SysReg
- // Mappers can appropriately conditionalize.
+ uint32_t MRSReg;
+ uint32_t MSRReg;
+ uint32_t PStateField;
};
struct SysCRImmOp {
@@ -221,6 +223,8 @@ private:
struct PrefetchOp {
unsigned Val;
+ const char *Data;
+ unsigned Length;
};
struct ShiftExtendOp {
@@ -254,8 +258,7 @@ private:
MCContext &Ctx;
public:
- AArch64Operand(KindTy K, MCContext &_Ctx)
- : MCParsedAsmOperand(), Kind(K), Ctx(_Ctx) {}
+ AArch64Operand(KindTy K, MCContext &Ctx) : Kind(K), Ctx(Ctx) {}
AArch64Operand(const AArch64Operand &o) : MCParsedAsmOperand(), Ctx(o.Ctx) {
Kind = o.Kind;
@@ -349,6 +352,11 @@ public:
return Barrier.Val;
}
+ StringRef getBarrierName() const {
+ assert(Kind == k_Barrier && "Invalid access!");
+ return StringRef(Barrier.Data, Barrier.Length);
+ }
+
unsigned getReg() const override {
assert(Kind == k_Register && "Invalid access!");
return Reg.RegNum;
@@ -374,11 +382,6 @@ public:
return StringRef(SysReg.Data, SysReg.Length);
}
- uint64_t getSysRegFeatureBits() const {
- assert(Kind == k_SysReg && "Invalid access!");
- return SysReg.FeatureBits;
- }
-
unsigned getSysCR() const {
assert(Kind == k_SysCR && "Invalid access!");
return SysCRImm.Val;
@@ -389,6 +392,11 @@ public:
return Prefetch.Val;
}
+ StringRef getPrefetchName() const {
+ assert(Kind == k_Prefetch && "Invalid access!");
+ return StringRef(Prefetch.Data, Prefetch.Length);
+ }
+
AArch64_AM::ShiftExtendType getShiftExtendType() const {
assert(Kind == k_ShiftExtend && "Invalid access!");
return ShiftExtend.Type;
@@ -757,58 +765,47 @@ public:
}
bool isMovZSymbolG3() const {
- static AArch64MCExpr::VariantKind Variants[] = { AArch64MCExpr::VK_ABS_G3 };
- return isMovWSymbol(Variants);
+ return isMovWSymbol(AArch64MCExpr::VK_ABS_G3);
}
bool isMovZSymbolG2() const {
- static AArch64MCExpr::VariantKind Variants[] = {
- AArch64MCExpr::VK_ABS_G2, AArch64MCExpr::VK_ABS_G2_S,
- AArch64MCExpr::VK_TPREL_G2, AArch64MCExpr::VK_DTPREL_G2};
- return isMovWSymbol(Variants);
+ return isMovWSymbol({AArch64MCExpr::VK_ABS_G2, AArch64MCExpr::VK_ABS_G2_S,
+ AArch64MCExpr::VK_TPREL_G2,
+ AArch64MCExpr::VK_DTPREL_G2});
}
bool isMovZSymbolG1() const {
- static AArch64MCExpr::VariantKind Variants[] = {
- AArch64MCExpr::VK_ABS_G1, AArch64MCExpr::VK_ABS_G1_S,
+ return isMovWSymbol({
+ AArch64MCExpr::VK_ABS_G1, AArch64MCExpr::VK_ABS_G1_S,
AArch64MCExpr::VK_GOTTPREL_G1, AArch64MCExpr::VK_TPREL_G1,
AArch64MCExpr::VK_DTPREL_G1,
- };
- return isMovWSymbol(Variants);
+ });
}
bool isMovZSymbolG0() const {
- static AArch64MCExpr::VariantKind Variants[] = {
- AArch64MCExpr::VK_ABS_G0, AArch64MCExpr::VK_ABS_G0_S,
- AArch64MCExpr::VK_TPREL_G0, AArch64MCExpr::VK_DTPREL_G0};
- return isMovWSymbol(Variants);
+ return isMovWSymbol({AArch64MCExpr::VK_ABS_G0, AArch64MCExpr::VK_ABS_G0_S,
+ AArch64MCExpr::VK_TPREL_G0,
+ AArch64MCExpr::VK_DTPREL_G0});
}
bool isMovKSymbolG3() const {
- static AArch64MCExpr::VariantKind Variants[] = { AArch64MCExpr::VK_ABS_G3 };
- return isMovWSymbol(Variants);
+ return isMovWSymbol(AArch64MCExpr::VK_ABS_G3);
}
bool isMovKSymbolG2() const {
- static AArch64MCExpr::VariantKind Variants[] = {
- AArch64MCExpr::VK_ABS_G2_NC};
- return isMovWSymbol(Variants);
+ return isMovWSymbol(AArch64MCExpr::VK_ABS_G2_NC);
}
bool isMovKSymbolG1() const {
- static AArch64MCExpr::VariantKind Variants[] = {
- AArch64MCExpr::VK_ABS_G1_NC, AArch64MCExpr::VK_TPREL_G1_NC,
- AArch64MCExpr::VK_DTPREL_G1_NC
- };
- return isMovWSymbol(Variants);
+ return isMovWSymbol({AArch64MCExpr::VK_ABS_G1_NC,
+ AArch64MCExpr::VK_TPREL_G1_NC,
+ AArch64MCExpr::VK_DTPREL_G1_NC});
}
bool isMovKSymbolG0() const {
- static AArch64MCExpr::VariantKind Variants[] = {
- AArch64MCExpr::VK_ABS_G0_NC, AArch64MCExpr::VK_GOTTPREL_G0_NC,
- AArch64MCExpr::VK_TPREL_G0_NC, AArch64MCExpr::VK_DTPREL_G0_NC
- };
- return isMovWSymbol(Variants);
+ return isMovWSymbol(
+ {AArch64MCExpr::VK_ABS_G0_NC, AArch64MCExpr::VK_GOTTPREL_G0_NC,
+ AArch64MCExpr::VK_TPREL_G0_NC, AArch64MCExpr::VK_DTPREL_G0_NC});
}
template<int RegWidth, int Shift>
@@ -855,28 +852,17 @@ public:
bool isMRSSystemRegister() const {
if (!isSysReg()) return false;
- bool IsKnownRegister;
- auto Mapper = AArch64SysReg::MRSMapper(getSysRegFeatureBits());
- Mapper.fromString(getSysReg(), IsKnownRegister);
-
- return IsKnownRegister;
+ return SysReg.MRSReg != -1U;
}
bool isMSRSystemRegister() const {
if (!isSysReg()) return false;
- bool IsKnownRegister;
- auto Mapper = AArch64SysReg::MSRMapper(getSysRegFeatureBits());
- Mapper.fromString(getSysReg(), IsKnownRegister);
-
- return IsKnownRegister;
+ return SysReg.MSRReg != -1U;
}
bool isSystemPStateField() const {
if (!isSysReg()) return false;
- bool IsKnownRegister;
- AArch64PState::PStateMapper().fromString(getSysReg(), IsKnownRegister);
-
- return IsKnownRegister;
+ return SysReg.PStateField != -1U;
}
bool isReg() const override { return Kind == k_Register && !Reg.isVector; }
bool isVectorReg() const { return Kind == k_Register && Reg.isVector; }
@@ -1113,16 +1099,16 @@ public:
void addExpr(MCInst &Inst, const MCExpr *Expr) const {
// Add as immediates when possible. Null MCExpr = 0.
if (!Expr)
- Inst.addOperand(MCOperand::CreateImm(0));
+ Inst.addOperand(MCOperand::createImm(0));
else if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr))
- Inst.addOperand(MCOperand::CreateImm(CE->getValue()));
+ Inst.addOperand(MCOperand::createImm(CE->getValue()));
else
- Inst.addOperand(MCOperand::CreateExpr(Expr));
+ Inst.addOperand(MCOperand::createExpr(Expr));
}
void addRegOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
- Inst.addOperand(MCOperand::CreateReg(getReg()));
+ Inst.addOperand(MCOperand::createReg(getReg()));
}
void addGPR32as64Operands(MCInst &Inst, unsigned N) const {
@@ -1134,26 +1120,26 @@ public:
uint32_t Reg = RI->getRegClass(AArch64::GPR32RegClassID).getRegister(
RI->getEncodingValue(getReg()));
- Inst.addOperand(MCOperand::CreateReg(Reg));
+ Inst.addOperand(MCOperand::createReg(Reg));
}
void addVectorReg64Operands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
assert(
AArch64MCRegisterClasses[AArch64::FPR128RegClassID].contains(getReg()));
- Inst.addOperand(MCOperand::CreateReg(AArch64::D0 + getReg() - AArch64::Q0));
+ Inst.addOperand(MCOperand::createReg(AArch64::D0 + getReg() - AArch64::Q0));
}
void addVectorReg128Operands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
assert(
AArch64MCRegisterClasses[AArch64::FPR128RegClassID].contains(getReg()));
- Inst.addOperand(MCOperand::CreateReg(getReg()));
+ Inst.addOperand(MCOperand::createReg(getReg()));
}
void addVectorRegLoOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
- Inst.addOperand(MCOperand::CreateReg(getReg()));
+ Inst.addOperand(MCOperand::createReg(getReg()));
}
template <unsigned NumRegs>
@@ -1164,7 +1150,7 @@ public:
unsigned FirstReg = FirstRegs[NumRegs - 1];
Inst.addOperand(
- MCOperand::CreateReg(FirstReg + getVectorListStart() - AArch64::Q0));
+ MCOperand::createReg(FirstReg + getVectorListStart() - AArch64::Q0));
}
template <unsigned NumRegs>
@@ -1175,32 +1161,32 @@ public:
unsigned FirstReg = FirstRegs[NumRegs - 1];
Inst.addOperand(
- MCOperand::CreateReg(FirstReg + getVectorListStart() - AArch64::Q0));
+ MCOperand::createReg(FirstReg + getVectorListStart() - AArch64::Q0));
}
void addVectorIndex1Operands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
- Inst.addOperand(MCOperand::CreateImm(getVectorIndex()));
+ Inst.addOperand(MCOperand::createImm(getVectorIndex()));
}
void addVectorIndexBOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
- Inst.addOperand(MCOperand::CreateImm(getVectorIndex()));
+ Inst.addOperand(MCOperand::createImm(getVectorIndex()));
}
void addVectorIndexHOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
- Inst.addOperand(MCOperand::CreateImm(getVectorIndex()));
+ Inst.addOperand(MCOperand::createImm(getVectorIndex()));
}
void addVectorIndexSOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
- Inst.addOperand(MCOperand::CreateImm(getVectorIndex()));
+ Inst.addOperand(MCOperand::createImm(getVectorIndex()));
}
void addVectorIndexDOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
- Inst.addOperand(MCOperand::CreateImm(getVectorIndex()));
+ Inst.addOperand(MCOperand::createImm(getVectorIndex()));
}
void addImmOperands(MCInst &Inst, unsigned N) const {
@@ -1215,16 +1201,16 @@ public:
assert(N == 2 && "Invalid number of operands!");
if (isShiftedImm()) {
addExpr(Inst, getShiftedImmVal());
- Inst.addOperand(MCOperand::CreateImm(getShiftedImmShift()));
+ Inst.addOperand(MCOperand::createImm(getShiftedImmShift()));
} else {
addExpr(Inst, getImm());
- Inst.addOperand(MCOperand::CreateImm(0));
+ Inst.addOperand(MCOperand::createImm(0));
}
}
void addCondCodeOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
- Inst.addOperand(MCOperand::CreateImm(getCondCode()));
+ Inst.addOperand(MCOperand::createImm(getCondCode()));
}
void addAdrpLabelOperands(MCInst &Inst, unsigned N) const {
@@ -1233,7 +1219,7 @@ public:
if (!MCE)
addExpr(Inst, getImm());
else
- Inst.addOperand(MCOperand::CreateImm(MCE->getValue() >> 12));
+ Inst.addOperand(MCOperand::createImm(MCE->getValue() >> 12));
}
void addAdrLabelOperands(MCInst &Inst, unsigned N) const {
@@ -1246,119 +1232,119 @@ public:
const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
if (!MCE) {
- Inst.addOperand(MCOperand::CreateExpr(getImm()));
+ Inst.addOperand(MCOperand::createExpr(getImm()));
return;
}
- Inst.addOperand(MCOperand::CreateImm(MCE->getValue() / Scale));
+ Inst.addOperand(MCOperand::createImm(MCE->getValue() / Scale));
}
void addSImm9Operands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
const MCConstantExpr *MCE = cast<MCConstantExpr>(getImm());
- Inst.addOperand(MCOperand::CreateImm(MCE->getValue()));
+ Inst.addOperand(MCOperand::createImm(MCE->getValue()));
}
void addSImm7s4Operands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
const MCConstantExpr *MCE = cast<MCConstantExpr>(getImm());
- Inst.addOperand(MCOperand::CreateImm(MCE->getValue() / 4));
+ Inst.addOperand(MCOperand::createImm(MCE->getValue() / 4));
}
void addSImm7s8Operands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
const MCConstantExpr *MCE = cast<MCConstantExpr>(getImm());
- Inst.addOperand(MCOperand::CreateImm(MCE->getValue() / 8));
+ Inst.addOperand(MCOperand::createImm(MCE->getValue() / 8));
}
void addSImm7s16Operands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
const MCConstantExpr *MCE = cast<MCConstantExpr>(getImm());
- Inst.addOperand(MCOperand::CreateImm(MCE->getValue() / 16));
+ Inst.addOperand(MCOperand::createImm(MCE->getValue() / 16));
}
void addImm0_7Operands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
const MCConstantExpr *MCE = cast<MCConstantExpr>(getImm());
- Inst.addOperand(MCOperand::CreateImm(MCE->getValue()));
+ Inst.addOperand(MCOperand::createImm(MCE->getValue()));
}
void addImm1_8Operands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
const MCConstantExpr *MCE = cast<MCConstantExpr>(getImm());
- Inst.addOperand(MCOperand::CreateImm(MCE->getValue()));
+ Inst.addOperand(MCOperand::createImm(MCE->getValue()));
}
void addImm0_15Operands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
const MCConstantExpr *MCE = cast<MCConstantExpr>(getImm());
- Inst.addOperand(MCOperand::CreateImm(MCE->getValue()));
+ Inst.addOperand(MCOperand::createImm(MCE->getValue()));
}
void addImm1_16Operands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
const MCConstantExpr *MCE = cast<MCConstantExpr>(getImm());
assert(MCE && "Invalid constant immediate operand!");
- Inst.addOperand(MCOperand::CreateImm(MCE->getValue()));
+ Inst.addOperand(MCOperand::createImm(MCE->getValue()));
}
void addImm0_31Operands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
const MCConstantExpr *MCE = cast<MCConstantExpr>(getImm());
- Inst.addOperand(MCOperand::CreateImm(MCE->getValue()));
+ Inst.addOperand(MCOperand::createImm(MCE->getValue()));
}
void addImm1_31Operands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
const MCConstantExpr *MCE = cast<MCConstantExpr>(getImm());
- Inst.addOperand(MCOperand::CreateImm(MCE->getValue()));
+ Inst.addOperand(MCOperand::createImm(MCE->getValue()));
}
void addImm1_32Operands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
const MCConstantExpr *MCE = cast<MCConstantExpr>(getImm());
- Inst.addOperand(MCOperand::CreateImm(MCE->getValue()));
+ Inst.addOperand(MCOperand::createImm(MCE->getValue()));
}
void addImm0_63Operands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
const MCConstantExpr *MCE = cast<MCConstantExpr>(getImm());
- Inst.addOperand(MCOperand::CreateImm(MCE->getValue()));
+ Inst.addOperand(MCOperand::createImm(MCE->getValue()));
}
void addImm1_63Operands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
const MCConstantExpr *MCE = cast<MCConstantExpr>(getImm());
- Inst.addOperand(MCOperand::CreateImm(MCE->getValue()));
+ Inst.addOperand(MCOperand::createImm(MCE->getValue()));
}
void addImm1_64Operands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
const MCConstantExpr *MCE = cast<MCConstantExpr>(getImm());
- Inst.addOperand(MCOperand::CreateImm(MCE->getValue()));
+ Inst.addOperand(MCOperand::createImm(MCE->getValue()));
}
void addImm0_127Operands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
const MCConstantExpr *MCE = cast<MCConstantExpr>(getImm());
- Inst.addOperand(MCOperand::CreateImm(MCE->getValue()));
+ Inst.addOperand(MCOperand::createImm(MCE->getValue()));
}
void addImm0_255Operands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
const MCConstantExpr *MCE = cast<MCConstantExpr>(getImm());
- Inst.addOperand(MCOperand::CreateImm(MCE->getValue()));
+ Inst.addOperand(MCOperand::createImm(MCE->getValue()));
}
void addImm0_65535Operands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
const MCConstantExpr *MCE = cast<MCConstantExpr>(getImm());
- Inst.addOperand(MCOperand::CreateImm(MCE->getValue()));
+ Inst.addOperand(MCOperand::createImm(MCE->getValue()));
}
void addImm32_63Operands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
const MCConstantExpr *MCE = cast<MCConstantExpr>(getImm());
- Inst.addOperand(MCOperand::CreateImm(MCE->getValue()));
+ Inst.addOperand(MCOperand::createImm(MCE->getValue()));
}
void addLogicalImm32Operands(MCInst &Inst, unsigned N) const {
@@ -1366,14 +1352,14 @@ public:
const MCConstantExpr *MCE = cast<MCConstantExpr>(getImm());
uint64_t encoding =
AArch64_AM::encodeLogicalImmediate(MCE->getValue() & 0xFFFFFFFF, 32);
- Inst.addOperand(MCOperand::CreateImm(encoding));
+ Inst.addOperand(MCOperand::createImm(encoding));
}
void addLogicalImm64Operands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
const MCConstantExpr *MCE = cast<MCConstantExpr>(getImm());
uint64_t encoding = AArch64_AM::encodeLogicalImmediate(MCE->getValue(), 64);
- Inst.addOperand(MCOperand::CreateImm(encoding));
+ Inst.addOperand(MCOperand::createImm(encoding));
}
void addLogicalImm32NotOperands(MCInst &Inst, unsigned N) const {
@@ -1381,7 +1367,7 @@ public:
const MCConstantExpr *MCE = cast<MCConstantExpr>(getImm());
int64_t Val = ~MCE->getValue() & 0xFFFFFFFF;
uint64_t encoding = AArch64_AM::encodeLogicalImmediate(Val, 32);
- Inst.addOperand(MCOperand::CreateImm(encoding));
+ Inst.addOperand(MCOperand::createImm(encoding));
}
void addLogicalImm64NotOperands(MCInst &Inst, unsigned N) const {
@@ -1389,14 +1375,14 @@ public:
const MCConstantExpr *MCE = cast<MCConstantExpr>(getImm());
uint64_t encoding =
AArch64_AM::encodeLogicalImmediate(~MCE->getValue(), 64);
- Inst.addOperand(MCOperand::CreateImm(encoding));
+ Inst.addOperand(MCOperand::createImm(encoding));
}
void addSIMDImmType10Operands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
const MCConstantExpr *MCE = cast<MCConstantExpr>(getImm());
uint64_t encoding = AArch64_AM::encodeAdvSIMDModImmType10(MCE->getValue());
- Inst.addOperand(MCOperand::CreateImm(encoding));
+ Inst.addOperand(MCOperand::createImm(encoding));
}
void addBranchTarget26Operands(MCInst &Inst, unsigned N) const {
@@ -1410,7 +1396,7 @@ public:
return;
}
assert(MCE && "Invalid constant immediate operand!");
- Inst.addOperand(MCOperand::CreateImm(MCE->getValue() >> 2));
+ Inst.addOperand(MCOperand::createImm(MCE->getValue() >> 2));
}
void addPCRelLabel19Operands(MCInst &Inst, unsigned N) const {
@@ -1424,7 +1410,7 @@ public:
return;
}
assert(MCE && "Invalid constant immediate operand!");
- Inst.addOperand(MCOperand::CreateImm(MCE->getValue() >> 2));
+ Inst.addOperand(MCOperand::createImm(MCE->getValue() >> 2));
}
void addBranchTarget14Operands(MCInst &Inst, unsigned N) const {
@@ -1438,64 +1424,52 @@ public:
return;
}
assert(MCE && "Invalid constant immediate operand!");
- Inst.addOperand(MCOperand::CreateImm(MCE->getValue() >> 2));
+ Inst.addOperand(MCOperand::createImm(MCE->getValue() >> 2));
}
void addFPImmOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
- Inst.addOperand(MCOperand::CreateImm(getFPImm()));
+ Inst.addOperand(MCOperand::createImm(getFPImm()));
}
void addBarrierOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
- Inst.addOperand(MCOperand::CreateImm(getBarrier()));
+ Inst.addOperand(MCOperand::createImm(getBarrier()));
}
void addMRSSystemRegisterOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
- bool Valid;
- auto Mapper = AArch64SysReg::MRSMapper(getSysRegFeatureBits());
- uint32_t Bits = Mapper.fromString(getSysReg(), Valid);
-
- Inst.addOperand(MCOperand::CreateImm(Bits));
+ Inst.addOperand(MCOperand::createImm(SysReg.MRSReg));
}
void addMSRSystemRegisterOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
- bool Valid;
- auto Mapper = AArch64SysReg::MSRMapper(getSysRegFeatureBits());
- uint32_t Bits = Mapper.fromString(getSysReg(), Valid);
-
- Inst.addOperand(MCOperand::CreateImm(Bits));
+ Inst.addOperand(MCOperand::createImm(SysReg.MSRReg));
}
void addSystemPStateFieldOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
- bool Valid;
- uint32_t Bits =
- AArch64PState::PStateMapper().fromString(getSysReg(), Valid);
-
- Inst.addOperand(MCOperand::CreateImm(Bits));
+ Inst.addOperand(MCOperand::createImm(SysReg.PStateField));
}
void addSysCROperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
- Inst.addOperand(MCOperand::CreateImm(getSysCR()));
+ Inst.addOperand(MCOperand::createImm(getSysCR()));
}
void addPrefetchOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
- Inst.addOperand(MCOperand::CreateImm(getPrefetch()));
+ Inst.addOperand(MCOperand::createImm(getPrefetch()));
}
void addShifterOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
unsigned Imm =
AArch64_AM::getShifterImm(getShiftExtendType(), getShiftExtendAmount());
- Inst.addOperand(MCOperand::CreateImm(Imm));
+ Inst.addOperand(MCOperand::createImm(Imm));
}
void addExtendOperands(MCInst &Inst, unsigned N) const {
@@ -1503,7 +1477,7 @@ public:
AArch64_AM::ShiftExtendType ET = getShiftExtendType();
if (ET == AArch64_AM::LSL) ET = AArch64_AM::UXTW;
unsigned Imm = AArch64_AM::getArithExtendImm(ET, getShiftExtendAmount());
- Inst.addOperand(MCOperand::CreateImm(Imm));
+ Inst.addOperand(MCOperand::createImm(Imm));
}
void addExtend64Operands(MCInst &Inst, unsigned N) const {
@@ -1511,15 +1485,15 @@ public:
AArch64_AM::ShiftExtendType ET = getShiftExtendType();
if (ET == AArch64_AM::LSL) ET = AArch64_AM::UXTX;
unsigned Imm = AArch64_AM::getArithExtendImm(ET, getShiftExtendAmount());
- Inst.addOperand(MCOperand::CreateImm(Imm));
+ Inst.addOperand(MCOperand::createImm(Imm));
}
void addMemExtendOperands(MCInst &Inst, unsigned N) const {
assert(N == 2 && "Invalid number of operands!");
AArch64_AM::ShiftExtendType ET = getShiftExtendType();
bool IsSigned = ET == AArch64_AM::SXTW || ET == AArch64_AM::SXTX;
- Inst.addOperand(MCOperand::CreateImm(IsSigned));
- Inst.addOperand(MCOperand::CreateImm(getShiftExtendAmount() != 0));
+ Inst.addOperand(MCOperand::createImm(IsSigned));
+ Inst.addOperand(MCOperand::createImm(getShiftExtendAmount() != 0));
}
// For 8-bit load/store instructions with a register offset, both the
@@ -1530,8 +1504,8 @@ public:
assert(N == 2 && "Invalid number of operands!");
AArch64_AM::ShiftExtendType ET = getShiftExtendType();
bool IsSigned = ET == AArch64_AM::SXTW || ET == AArch64_AM::SXTX;
- Inst.addOperand(MCOperand::CreateImm(IsSigned));
- Inst.addOperand(MCOperand::CreateImm(hasShiftExtendAmount()));
+ Inst.addOperand(MCOperand::createImm(IsSigned));
+ Inst.addOperand(MCOperand::createImm(hasShiftExtendAmount()));
}
template<int Shift>
@@ -1540,7 +1514,7 @@ public:
const MCConstantExpr *CE = cast<MCConstantExpr>(getImm());
uint64_t Value = CE->getValue();
- Inst.addOperand(MCOperand::CreateImm((Value >> Shift) & 0xffff));
+ Inst.addOperand(MCOperand::createImm((Value >> Shift) & 0xffff));
}
template<int Shift>
@@ -1549,7 +1523,7 @@ public:
const MCConstantExpr *CE = cast<MCConstantExpr>(getImm());
uint64_t Value = CE->getValue();
- Inst.addOperand(MCOperand::CreateImm((~Value >> Shift) & 0xffff));
+ Inst.addOperand(MCOperand::createImm((~Value >> Shift) & 0xffff));
}
void print(raw_ostream &OS) const override;
@@ -1636,21 +1610,30 @@ public:
return Op;
}
- static std::unique_ptr<AArch64Operand> CreateBarrier(unsigned Val, SMLoc S,
+ static std::unique_ptr<AArch64Operand> CreateBarrier(unsigned Val,
+ StringRef Str,
+ SMLoc S,
MCContext &Ctx) {
auto Op = make_unique<AArch64Operand>(k_Barrier, Ctx);
Op->Barrier.Val = Val;
+ Op->Barrier.Data = Str.data();
+ Op->Barrier.Length = Str.size();
Op->StartLoc = S;
Op->EndLoc = S;
return Op;
}
- static std::unique_ptr<AArch64Operand>
- CreateSysReg(StringRef Str, SMLoc S, uint64_t FeatureBits, MCContext &Ctx) {
+ static std::unique_ptr<AArch64Operand> CreateSysReg(StringRef Str, SMLoc S,
+ uint32_t MRSReg,
+ uint32_t MSRReg,
+ uint32_t PStateField,
+ MCContext &Ctx) {
auto Op = make_unique<AArch64Operand>(k_SysReg, Ctx);
Op->SysReg.Data = Str.data();
Op->SysReg.Length = Str.size();
- Op->SysReg.FeatureBits = FeatureBits;
+ Op->SysReg.MRSReg = MRSReg;
+ Op->SysReg.MSRReg = MSRReg;
+ Op->SysReg.PStateField = PStateField;
Op->StartLoc = S;
Op->EndLoc = S;
return Op;
@@ -1665,10 +1648,14 @@ public:
return Op;
}
- static std::unique_ptr<AArch64Operand> CreatePrefetch(unsigned Val, SMLoc S,
+ static std::unique_ptr<AArch64Operand> CreatePrefetch(unsigned Val,
+ StringRef Str,
+ SMLoc S,
MCContext &Ctx) {
auto Op = make_unique<AArch64Operand>(k_Prefetch, Ctx);
Op->Prefetch.Val = Val;
+ Op->Barrier.Data = Str.data();
+ Op->Barrier.Length = Str.size();
Op->StartLoc = S;
Op->EndLoc = S;
return Op;
@@ -1696,21 +1683,20 @@ void AArch64Operand::print(raw_ostream &OS) const {
<< AArch64_AM::getFPImmFloat(getFPImm()) << ") >";
break;
case k_Barrier: {
- bool Valid;
- StringRef Name = AArch64DB::DBarrierMapper().toString(getBarrier(), Valid);
- if (Valid)
+ StringRef Name = getBarrierName();
+ if (!Name.empty())
OS << "<barrier " << Name << ">";
else
OS << "<barrier invalid #" << getBarrier() << ">";
break;
}
case k_Immediate:
- getImm()->print(OS);
+ OS << *getImm();
break;
case k_ShiftedImm: {
unsigned Shift = getShiftedImmShift();
OS << "<shiftedimm ";
- getShiftedImmVal()->print(OS);
+ OS << *getShiftedImmVal();
OS << ", lsl #" << AArch64_AM::getShiftValue(Shift) << ">";
break;
}
@@ -1741,9 +1727,8 @@ void AArch64Operand::print(raw_ostream &OS) const {
OS << "c" << getSysCR();
break;
case k_Prefetch: {
- bool Valid;
- StringRef Name = AArch64PRFM::PRFMMapper().toString(getPrefetch(), Valid);
- if (Valid)
+ StringRef Name = getPrefetchName();
+ if (!Name.empty())
OS << "<prfop " << Name << ">";
else
OS << "<prfop invalid #" << getPrefetch() << ">";
@@ -1986,7 +1971,12 @@ AArch64AsmParser::tryParsePrefetch(OperandVector &Operands) {
return MatchOperand_ParseFail;
}
- Operands.push_back(AArch64Operand::CreatePrefetch(prfop, S, getContext()));
+ bool Valid;
+ auto Mapper = AArch64PRFM::PRFMMapper();
+ StringRef Name =
+ Mapper.toString(MCE->getValue(), STI.getFeatureBits(), Valid);
+ Operands.push_back(AArch64Operand::CreatePrefetch(prfop, Name,
+ S, getContext()));
return MatchOperand_Success;
}
@@ -1996,14 +1986,17 @@ AArch64AsmParser::tryParsePrefetch(OperandVector &Operands) {
}
bool Valid;
- unsigned prfop = AArch64PRFM::PRFMMapper().fromString(Tok.getString(), Valid);
+ auto Mapper = AArch64PRFM::PRFMMapper();
+ unsigned prfop =
+ Mapper.fromString(Tok.getString(), STI.getFeatureBits(), Valid);
if (!Valid) {
TokError("pre-fetch hint expected");
return MatchOperand_ParseFail;
}
Parser.Lex(); // Eat identifier token.
- Operands.push_back(AArch64Operand::CreatePrefetch(prfop, S, getContext()));
+ Operands.push_back(AArch64Operand::CreatePrefetch(prfop, Tok.getString(),
+ S, getContext()));
return MatchOperand_Success;
}
@@ -2100,15 +2093,16 @@ AArch64AsmParser::tryParseFPImm(OperandVector &Operands) {
const AsmToken &Tok = Parser.getTok();
if (Tok.is(AsmToken::Real)) {
APFloat RealVal(APFloat::IEEEdouble, Tok.getString());
+ if (isNegative)
+ RealVal.changeSign();
+
uint64_t IntVal = RealVal.bitcastToAPInt().getZExtValue();
- // If we had a '-' in front, toggle the sign bit.
- IntVal ^= (uint64_t)isNegative << 63;
int Val = AArch64_AM::getFP64Imm(APInt(64, IntVal));
Parser.Lex(); // Eat the token.
// Check for out of range values. As an exception, we let Zero through,
// as we handle that special case in post-processing before matching in
// order to use the zero register for it.
- if (Val == -1 && !RealVal.isZero()) {
+ if (Val == -1 && !RealVal.isPosZero()) {
TokError("expected compatible register or floating-point constant");
return MatchOperand_ParseFail;
}
@@ -2605,8 +2599,12 @@ AArch64AsmParser::tryParseBarrierOperand(OperandVector &Operands) {
Error(ExprLoc, "barrier operand out of range");
return MatchOperand_ParseFail;
}
- Operands.push_back(
- AArch64Operand::CreateBarrier(MCE->getValue(), ExprLoc, getContext()));
+ bool Valid;
+ auto Mapper = AArch64DB::DBarrierMapper();
+ StringRef Name =
+ Mapper.toString(MCE->getValue(), STI.getFeatureBits(), Valid);
+ Operands.push_back( AArch64Operand::CreateBarrier(MCE->getValue(), Name,
+ ExprLoc, getContext()));
return MatchOperand_Success;
}
@@ -2616,7 +2614,9 @@ AArch64AsmParser::tryParseBarrierOperand(OperandVector &Operands) {
}
bool Valid;
- unsigned Opt = AArch64DB::DBarrierMapper().fromString(Tok.getString(), Valid);
+ auto Mapper = AArch64DB::DBarrierMapper();
+ unsigned Opt =
+ Mapper.fromString(Tok.getString(), STI.getFeatureBits(), Valid);
if (!Valid) {
TokError("invalid barrier option name");
return MatchOperand_ParseFail;
@@ -2628,8 +2628,8 @@ AArch64AsmParser::tryParseBarrierOperand(OperandVector &Operands) {
return MatchOperand_ParseFail;
}
- Operands.push_back(
- AArch64Operand::CreateBarrier(Opt, getLoc(), getContext()));
+ Operands.push_back( AArch64Operand::CreateBarrier(Opt, Tok.getString(),
+ getLoc(), getContext()));
Parser.Lex(); // Consume the option
return MatchOperand_Success;
@@ -2643,8 +2643,27 @@ AArch64AsmParser::tryParseSysReg(OperandVector &Operands) {
if (Tok.isNot(AsmToken::Identifier))
return MatchOperand_NoMatch;
- Operands.push_back(AArch64Operand::CreateSysReg(Tok.getString(), getLoc(),
- STI.getFeatureBits(), getContext()));
+ bool IsKnown;
+ auto MRSMapper = AArch64SysReg::MRSMapper();
+ uint32_t MRSReg = MRSMapper.fromString(Tok.getString(), STI.getFeatureBits(),
+ IsKnown);
+ assert(IsKnown == (MRSReg != -1U) &&
+ "register should be -1 if and only if it's unknown");
+
+ auto MSRMapper = AArch64SysReg::MSRMapper();
+ uint32_t MSRReg = MSRMapper.fromString(Tok.getString(), STI.getFeatureBits(),
+ IsKnown);
+ assert(IsKnown == (MSRReg != -1U) &&
+ "register should be -1 if and only if it's unknown");
+
+ auto PStateMapper = AArch64PState::PStateMapper();
+ uint32_t PStateField =
+ PStateMapper.fromString(Tok.getString(), STI.getFeatureBits(), IsKnown);
+ assert(IsKnown == (PStateField != -1U) &&
+ "register should be -1 if and only if it's unknown");
+
+ Operands.push_back(AArch64Operand::CreateSysReg(
+ Tok.getString(), getLoc(), MRSReg, MSRReg, PStateField, getContext()));
Parser.Lex(); // Eat identifier
return MatchOperand_Success;
@@ -3626,6 +3645,60 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
Op3.getEndLoc(), getContext());
}
}
+ } else if (NumOperands == 4 && Tok == "bfc") {
+ // FIXME: Horrible hack to handle BFC->BFM alias.
+ AArch64Operand &Op1 = static_cast<AArch64Operand &>(*Operands[1]);
+ AArch64Operand LSBOp = static_cast<AArch64Operand &>(*Operands[2]);
+ AArch64Operand WidthOp = static_cast<AArch64Operand &>(*Operands[3]);
+
+ if (Op1.isReg() && LSBOp.isImm() && WidthOp.isImm()) {
+ const MCConstantExpr *LSBCE = dyn_cast<MCConstantExpr>(LSBOp.getImm());
+ const MCConstantExpr *WidthCE = dyn_cast<MCConstantExpr>(WidthOp.getImm());
+
+ if (LSBCE && WidthCE) {
+ uint64_t LSB = LSBCE->getValue();
+ uint64_t Width = WidthCE->getValue();
+
+ uint64_t RegWidth = 0;
+ if (AArch64MCRegisterClasses[AArch64::GPR64allRegClassID].contains(
+ Op1.getReg()))
+ RegWidth = 64;
+ else
+ RegWidth = 32;
+
+ if (LSB >= RegWidth)
+ return Error(LSBOp.getStartLoc(),
+ "expected integer in range [0, 31]");
+ if (Width < 1 || Width > RegWidth)
+ return Error(WidthOp.getStartLoc(),
+ "expected integer in range [1, 32]");
+
+ uint64_t ImmR = 0;
+ if (RegWidth == 32)
+ ImmR = (32 - LSB) & 0x1f;
+ else
+ ImmR = (64 - LSB) & 0x3f;
+
+ uint64_t ImmS = Width - 1;
+
+ if (ImmR != 0 && ImmS >= ImmR)
+ return Error(WidthOp.getStartLoc(),
+ "requested insert overflows register");
+
+ const MCExpr *ImmRExpr = MCConstantExpr::Create(ImmR, getContext());
+ const MCExpr *ImmSExpr = MCConstantExpr::Create(ImmS, getContext());
+ Operands[0] = AArch64Operand::CreateToken(
+ "bfm", false, Op.getStartLoc(), getContext());
+ Operands[2] = AArch64Operand::CreateReg(
+ RegWidth == 32 ? AArch64::WZR : AArch64::XZR, false, SMLoc(),
+ SMLoc(), getContext());
+ Operands[3] = AArch64Operand::CreateImm(
+ ImmRExpr, LSBOp.getStartLoc(), LSBOp.getEndLoc(), getContext());
+ Operands.emplace_back(
+ AArch64Operand::CreateImm(ImmSExpr, WidthOp.getStartLoc(),
+ WidthOp.getEndLoc(), getContext()));
+ }
+ }
} else if (NumOperands == 5) {
// FIXME: Horrible hack to handle the BFI -> BFM, SBFIZ->SBFM, and
// UBFIZ -> UBFM aliases.
@@ -3657,8 +3730,7 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
"expected integer in range [1, 32]");
uint64_t NewOp3Val = 0;
- if (AArch64MCRegisterClasses[AArch64::GPR32allRegClassID].contains(
- Op1.getReg()))
+ if (RegWidth == 32)
NewOp3Val = (32 - Op3Val) & 0x1f;
else
NewOp3Val = (64 - Op3Val) & 0x3f;
@@ -4033,13 +4105,13 @@ bool AArch64AsmParser::parseDirectiveTLSDescCall(SMLoc L) {
if (getParser().parseIdentifier(Name))
return Error(L, "expected symbol after directive");
- MCSymbol *Sym = getContext().GetOrCreateSymbol(Name);
+ MCSymbol *Sym = getContext().getOrCreateSymbol(Name);
const MCExpr *Expr = MCSymbolRefExpr::Create(Sym, getContext());
Expr = AArch64MCExpr::Create(Expr, AArch64MCExpr::VK_TLSDESC, getContext());
MCInst Inst;
Inst.setOpcode(AArch64::TLSDESCCALL);
- Inst.addOperand(MCOperand::CreateExpr(Expr));
+ Inst.addOperand(MCOperand::createExpr(Expr));
getParser().getStreamer().EmitInstruction(Inst, STI);
return false;
@@ -4082,7 +4154,7 @@ bool AArch64AsmParser::parseDirectiveLOH(StringRef IDVal, SMLoc Loc) {
StringRef Name;
if (getParser().parseIdentifier(Name))
return TokError("expected identifier in directive");
- Args.push_back(getContext().GetOrCreateSymbol(Name));
+ Args.push_back(getContext().getOrCreateSymbol(Name));
if (Idx + 1 == NbArgs)
break;
@@ -4139,7 +4211,7 @@ bool AArch64AsmParser::parseDirectiveReq(StringRef Name, SMLoc L) {
Parser.Lex(); // Consume the EndOfStatement
auto pair = std::make_pair(IsVector, RegNum);
- if (!RegisterReqs.insert(std::make_pair(Name, pair)).second)
+ if (RegisterReqs.insert(std::make_pair(Name, pair)).first->second != pair)
Warning(L, "ignoring redefinition of register alias '" + Name + "'");
return true;
diff --git a/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp b/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
index 878e29c..a1ed703 100644
--- a/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
+++ b/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
@@ -221,13 +221,11 @@ DecodeStatus AArch64Disassembler::getInstruction(MCInst &MI, uint64_t &Size,
static MCSymbolizer *
createAArch64ExternalSymbolizer(StringRef TT, LLVMOpInfoCallback GetOpInfo,
- LLVMSymbolLookupCallback SymbolLookUp,
- void *DisInfo, MCContext *Ctx,
- MCRelocationInfo *RelInfo) {
- return new llvm::AArch64ExternalSymbolizer(
- *Ctx,
- std::unique_ptr<MCRelocationInfo>(RelInfo),
- GetOpInfo, SymbolLookUp, DisInfo);
+ LLVMSymbolLookupCallback SymbolLookUp,
+ void *DisInfo, MCContext *Ctx,
+ std::unique_ptr<MCRelocationInfo> &&RelInfo) {
+ return new llvm::AArch64ExternalSymbolizer(*Ctx, move(RelInfo), GetOpInfo,
+ SymbolLookUp, DisInfo);
}
extern "C" void LLVMInitializeAArch64Disassembler() {
@@ -263,7 +261,7 @@ static DecodeStatus DecodeFPR128RegisterClass(MCInst &Inst, unsigned RegNo,
return Fail;
unsigned Register = FPR128DecoderTable[RegNo];
- Inst.addOperand(MCOperand::CreateReg(Register));
+ Inst.addOperand(MCOperand::createReg(Register));
return Success;
}
@@ -292,7 +290,7 @@ static DecodeStatus DecodeFPR64RegisterClass(MCInst &Inst, unsigned RegNo,
return Fail;
unsigned Register = FPR64DecoderTable[RegNo];
- Inst.addOperand(MCOperand::CreateReg(Register));
+ Inst.addOperand(MCOperand::createReg(Register));
return Success;
}
@@ -313,7 +311,7 @@ static DecodeStatus DecodeFPR32RegisterClass(MCInst &Inst, unsigned RegNo,
return Fail;
unsigned Register = FPR32DecoderTable[RegNo];
- Inst.addOperand(MCOperand::CreateReg(Register));
+ Inst.addOperand(MCOperand::createReg(Register));
return Success;
}
@@ -334,7 +332,7 @@ static DecodeStatus DecodeFPR16RegisterClass(MCInst &Inst, unsigned RegNo,
return Fail;
unsigned Register = FPR16DecoderTable[RegNo];
- Inst.addOperand(MCOperand::CreateReg(Register));
+ Inst.addOperand(MCOperand::createReg(Register));
return Success;
}
@@ -355,7 +353,7 @@ static DecodeStatus DecodeFPR8RegisterClass(MCInst &Inst, unsigned RegNo,
return Fail;
unsigned Register = FPR8DecoderTable[RegNo];
- Inst.addOperand(MCOperand::CreateReg(Register));
+ Inst.addOperand(MCOperand::createReg(Register));
return Success;
}
@@ -376,7 +374,7 @@ static DecodeStatus DecodeGPR64RegisterClass(MCInst &Inst, unsigned RegNo,
return Fail;
unsigned Register = GPR64DecoderTable[RegNo];
- Inst.addOperand(MCOperand::CreateReg(Register));
+ Inst.addOperand(MCOperand::createReg(Register));
return Success;
}
@@ -388,7 +386,7 @@ static DecodeStatus DecodeGPR64spRegisterClass(MCInst &Inst, unsigned RegNo,
unsigned Register = GPR64DecoderTable[RegNo];
if (Register == AArch64::XZR)
Register = AArch64::SP;
- Inst.addOperand(MCOperand::CreateReg(Register));
+ Inst.addOperand(MCOperand::createReg(Register));
return Success;
}
@@ -409,7 +407,7 @@ static DecodeStatus DecodeGPR32RegisterClass(MCInst &Inst, unsigned RegNo,
return Fail;
unsigned Register = GPR32DecoderTable[RegNo];
- Inst.addOperand(MCOperand::CreateReg(Register));
+ Inst.addOperand(MCOperand::createReg(Register));
return Success;
}
@@ -422,7 +420,7 @@ static DecodeStatus DecodeGPR32spRegisterClass(MCInst &Inst, unsigned RegNo,
unsigned Register = GPR32DecoderTable[RegNo];
if (Register == AArch64::WZR)
Register = AArch64::WSP;
- Inst.addOperand(MCOperand::CreateReg(Register));
+ Inst.addOperand(MCOperand::createReg(Register));
return Success;
}
@@ -443,7 +441,7 @@ static DecodeStatus DecodeVectorRegisterClass(MCInst &Inst, unsigned RegNo,
return Fail;
unsigned Register = VectorDecoderTable[RegNo];
- Inst.addOperand(MCOperand::CreateReg(Register));
+ Inst.addOperand(MCOperand::createReg(Register));
return Success;
}
@@ -463,7 +461,7 @@ static DecodeStatus DecodeQQRegisterClass(MCInst &Inst, unsigned RegNo,
if (RegNo > 31)
return Fail;
unsigned Register = QQDecoderTable[RegNo];
- Inst.addOperand(MCOperand::CreateReg(Register));
+ Inst.addOperand(MCOperand::createReg(Register));
return Success;
}
@@ -486,7 +484,7 @@ static DecodeStatus DecodeQQQRegisterClass(MCInst &Inst, unsigned RegNo,
if (RegNo > 31)
return Fail;
unsigned Register = QQQDecoderTable[RegNo];
- Inst.addOperand(MCOperand::CreateReg(Register));
+ Inst.addOperand(MCOperand::createReg(Register));
return Success;
}
@@ -510,7 +508,7 @@ static DecodeStatus DecodeQQQQRegisterClass(MCInst &Inst, unsigned RegNo,
if (RegNo > 31)
return Fail;
unsigned Register = QQQQDecoderTable[RegNo];
- Inst.addOperand(MCOperand::CreateReg(Register));
+ Inst.addOperand(MCOperand::createReg(Register));
return Success;
}
@@ -530,7 +528,7 @@ static DecodeStatus DecodeDDRegisterClass(MCInst &Inst, unsigned RegNo,
if (RegNo > 31)
return Fail;
unsigned Register = DDDecoderTable[RegNo];
- Inst.addOperand(MCOperand::CreateReg(Register));
+ Inst.addOperand(MCOperand::createReg(Register));
return Success;
}
@@ -553,7 +551,7 @@ static DecodeStatus DecodeDDDRegisterClass(MCInst &Inst, unsigned RegNo,
if (RegNo > 31)
return Fail;
unsigned Register = DDDDecoderTable[RegNo];
- Inst.addOperand(MCOperand::CreateReg(Register));
+ Inst.addOperand(MCOperand::createReg(Register));
return Success;
}
@@ -577,7 +575,7 @@ static DecodeStatus DecodeDDDDRegisterClass(MCInst &Inst, unsigned RegNo,
if (RegNo > 31)
return Fail;
unsigned Register = DDDDDecoderTable[RegNo];
- Inst.addOperand(MCOperand::CreateReg(Register));
+ Inst.addOperand(MCOperand::createReg(Register));
return Success;
}
@@ -586,14 +584,14 @@ static DecodeStatus DecodeFixedPointScaleImm32(llvm::MCInst &Inst, unsigned Imm,
const void *Decoder) {
// scale{5} is asserted as 1 in tblgen.
Imm |= 0x20;
- Inst.addOperand(MCOperand::CreateImm(64 - Imm));
+ Inst.addOperand(MCOperand::createImm(64 - Imm));
return Success;
}
static DecodeStatus DecodeFixedPointScaleImm64(llvm::MCInst &Inst, unsigned Imm,
uint64_t Addr,
const void *Decoder) {
- Inst.addOperand(MCOperand::CreateImm(64 - Imm));
+ Inst.addOperand(MCOperand::createImm(64 - Imm));
return Success;
}
@@ -609,21 +607,21 @@ static DecodeStatus DecodePCRelLabel19(llvm::MCInst &Inst, unsigned Imm,
if (!Dis->tryAddingSymbolicOperand(Inst, ImmVal * 4, Addr,
Inst.getOpcode() != AArch64::LDRXl, 0, 4))
- Inst.addOperand(MCOperand::CreateImm(ImmVal));
+ Inst.addOperand(MCOperand::createImm(ImmVal));
return Success;
}
static DecodeStatus DecodeMemExtend(llvm::MCInst &Inst, unsigned Imm,
uint64_t Address, const void *Decoder) {
- Inst.addOperand(MCOperand::CreateImm((Imm >> 1) & 1));
- Inst.addOperand(MCOperand::CreateImm(Imm & 1));
+ Inst.addOperand(MCOperand::createImm((Imm >> 1) & 1));
+ Inst.addOperand(MCOperand::createImm(Imm & 1));
return Success;
}
static DecodeStatus DecodeMRSSystemRegister(llvm::MCInst &Inst, unsigned Imm,
uint64_t Address,
const void *Decoder) {
- Inst.addOperand(MCOperand::CreateImm(Imm));
+ Inst.addOperand(MCOperand::createImm(Imm));
// Every system register in the encoding space is valid with the syntax
// S<op0>_<op1>_<Cn>_<Cm>_<op2>, so decoding system registers always succeeds.
@@ -633,7 +631,7 @@ static DecodeStatus DecodeMRSSystemRegister(llvm::MCInst &Inst, unsigned Imm,
static DecodeStatus DecodeMSRSystemRegister(llvm::MCInst &Inst, unsigned Imm,
uint64_t Address,
const void *Decoder) {
- Inst.addOperand(MCOperand::CreateImm(Imm));
+ Inst.addOperand(MCOperand::createImm(Imm));
return Success;
}
@@ -656,20 +654,20 @@ static DecodeStatus DecodeFMOVLaneInstruction(llvm::MCInst &Inst, unsigned Insn,
}
// Add the lane
- Inst.addOperand(MCOperand::CreateImm(1));
+ Inst.addOperand(MCOperand::createImm(1));
return Success;
}
static DecodeStatus DecodeVecShiftRImm(llvm::MCInst &Inst, unsigned Imm,
unsigned Add) {
- Inst.addOperand(MCOperand::CreateImm(Add - Imm));
+ Inst.addOperand(MCOperand::createImm(Add - Imm));
return Success;
}
static DecodeStatus DecodeVecShiftLImm(llvm::MCInst &Inst, unsigned Imm,
unsigned Add) {
- Inst.addOperand(MCOperand::CreateImm((Imm + Add) & (Add - 1)));
+ Inst.addOperand(MCOperand::createImm((Imm + Add) & (Add - 1)));
return Success;
}
@@ -789,7 +787,7 @@ static DecodeStatus DecodeThreeAddrSRegInstruction(llvm::MCInst &Inst,
break;
}
- Inst.addOperand(MCOperand::CreateImm(shift));
+ Inst.addOperand(MCOperand::createImm(shift));
return Success;
}
@@ -821,8 +819,8 @@ static DecodeStatus DecodeMoveImmInstruction(llvm::MCInst &Inst, uint32_t insn,
Inst.getOpcode() == AArch64::MOVKXi)
Inst.addOperand(Inst.getOperand(0));
- Inst.addOperand(MCOperand::CreateImm(imm));
- Inst.addOperand(MCOperand::CreateImm(shift));
+ Inst.addOperand(MCOperand::createImm(imm));
+ Inst.addOperand(MCOperand::createImm(shift));
return Success;
}
@@ -840,7 +838,7 @@ static DecodeStatus DecodeUnsignedLdStInstruction(llvm::MCInst &Inst,
return Fail;
case AArch64::PRFMui:
// Rt is an immediate in prefetch.
- Inst.addOperand(MCOperand::CreateImm(Rt));
+ Inst.addOperand(MCOperand::createImm(Rt));
break;
case AArch64::STRBBui:
case AArch64::LDRBBui:
@@ -883,7 +881,7 @@ static DecodeStatus DecodeUnsignedLdStInstruction(llvm::MCInst &Inst,
DecodeGPR64spRegisterClass(Inst, Rn, Addr, Decoder);
if (!Dis->tryAddingSymbolicOperand(Inst, offset, Addr, Fail, 0, 4))
- Inst.addOperand(MCOperand::CreateImm(offset));
+ Inst.addOperand(MCOperand::createImm(offset));
return Success;
}
@@ -958,7 +956,7 @@ static DecodeStatus DecodeSignedLdStInstruction(llvm::MCInst &Inst,
return Fail;
case AArch64::PRFUMi:
// Rt is an immediate in prefetch.
- Inst.addOperand(MCOperand::CreateImm(Rt));
+ Inst.addOperand(MCOperand::createImm(Rt));
break;
case AArch64::STURBBi:
case AArch64::LDURBBi:
@@ -1059,7 +1057,7 @@ static DecodeStatus DecodeSignedLdStInstruction(llvm::MCInst &Inst,
}
DecodeGPR64spRegisterClass(Inst, Rn, Addr, Decoder);
- Inst.addOperand(MCOperand::CreateImm(offset));
+ Inst.addOperand(MCOperand::createImm(offset));
bool IsLoad = fieldFromInstruction(insn, 22, 1);
bool IsIndexed = fieldFromInstruction(insn, 10, 2) != 0;
@@ -1104,6 +1102,12 @@ static DecodeStatus DecodeExclusiveLdStInstruction(llvm::MCInst &Inst,
case AArch64::STLRW:
case AArch64::STLRB:
case AArch64::STLRH:
+ case AArch64::STLLRW:
+ case AArch64::STLLRB:
+ case AArch64::STLLRH:
+ case AArch64::LDLARW:
+ case AArch64::LDLARB:
+ case AArch64::LDLARH:
DecodeGPR32RegisterClass(Inst, Rt, Addr, Decoder);
break;
case AArch64::STLXRX:
@@ -1114,6 +1118,8 @@ static DecodeStatus DecodeExclusiveLdStInstruction(llvm::MCInst &Inst,
case AArch64::LDAXRX:
case AArch64::LDXRX:
case AArch64::STLRX:
+ case AArch64::LDLARX:
+ case AArch64::STLLRX:
DecodeGPR64RegisterClass(Inst, Rt, Addr, Decoder);
break;
case AArch64::STLXPW:
@@ -1262,7 +1268,7 @@ static DecodeStatus DecodePairLdStInstruction(llvm::MCInst &Inst, uint32_t insn,
}
DecodeGPR64spRegisterClass(Inst, Rn, Addr, Decoder);
- Inst.addOperand(MCOperand::CreateImm(offset));
+ Inst.addOperand(MCOperand::createImm(offset));
// You shouldn't load to the same register twice in an instruction...
if (IsLoad && Rt == Rt2)
@@ -1329,7 +1335,7 @@ static DecodeStatus DecodeAddSubERegInstruction(llvm::MCInst &Inst,
break;
}
- Inst.addOperand(MCOperand::CreateImm(extend));
+ Inst.addOperand(MCOperand::createImm(extend));
return Success;
}
@@ -1360,7 +1366,7 @@ static DecodeStatus DecodeLogicalImmInstruction(llvm::MCInst &Inst,
if (!AArch64_AM::isValidDecodeLogicalImmediate(imm, 32))
return Fail;
}
- Inst.addOperand(MCOperand::CreateImm(imm));
+ Inst.addOperand(MCOperand::createImm(imm));
return Success;
}
@@ -1377,7 +1383,7 @@ static DecodeStatus DecodeModImmInstruction(llvm::MCInst &Inst, uint32_t insn,
else
DecodeVectorRegisterClass(Inst, Rd, Addr, Decoder);
- Inst.addOperand(MCOperand::CreateImm(imm));
+ Inst.addOperand(MCOperand::createImm(imm));
switch (Inst.getOpcode()) {
default:
@@ -1390,13 +1396,13 @@ static DecodeStatus DecodeModImmInstruction(llvm::MCInst &Inst, uint32_t insn,
case AArch64::MOVIv4i32:
case AArch64::MVNIv2i32:
case AArch64::MVNIv4i32:
- Inst.addOperand(MCOperand::CreateImm((cmode & 6) << 2));
+ Inst.addOperand(MCOperand::createImm((cmode & 6) << 2));
break;
case AArch64::MOVIv2s_msl:
case AArch64::MOVIv4s_msl:
case AArch64::MVNIv2s_msl:
case AArch64::MVNIv4s_msl:
- Inst.addOperand(MCOperand::CreateImm(cmode & 1 ? 0x110 : 0x108));
+ Inst.addOperand(MCOperand::createImm(cmode & 1 ? 0x110 : 0x108));
break;
}
@@ -1415,8 +1421,8 @@ static DecodeStatus DecodeModImmTiedInstruction(llvm::MCInst &Inst,
DecodeVectorRegisterClass(Inst, Rd, Addr, Decoder);
DecodeVectorRegisterClass(Inst, Rd, Addr, Decoder);
- Inst.addOperand(MCOperand::CreateImm(imm));
- Inst.addOperand(MCOperand::CreateImm((cmode & 6) << 2));
+ Inst.addOperand(MCOperand::createImm(imm));
+ Inst.addOperand(MCOperand::createImm((cmode & 6) << 2));
return Success;
}
@@ -1435,7 +1441,7 @@ static DecodeStatus DecodeAdrInstruction(llvm::MCInst &Inst, uint32_t insn,
DecodeGPR64RegisterClass(Inst, Rd, Addr, Decoder);
if (!Dis->tryAddingSymbolicOperand(Inst, imm, Addr, Fail, 0, 4))
- Inst.addOperand(MCOperand::CreateImm(imm));
+ Inst.addOperand(MCOperand::createImm(imm));
return Success;
}
@@ -1471,8 +1477,8 @@ static DecodeStatus DecodeBaseAddSubImm(llvm::MCInst &Inst, uint32_t insn,
}
if (!Dis->tryAddingSymbolicOperand(Inst, Imm, Addr, Fail, 0, 4))
- Inst.addOperand(MCOperand::CreateImm(ImmVal));
- Inst.addOperand(MCOperand::CreateImm(12 * ShifterVal));
+ Inst.addOperand(MCOperand::createImm(ImmVal));
+ Inst.addOperand(MCOperand::createImm(12 * ShifterVal));
return Success;
}
@@ -1488,7 +1494,7 @@ static DecodeStatus DecodeUnconditionalBranch(llvm::MCInst &Inst, uint32_t insn,
imm |= ~((1LL << 26) - 1);
if (!Dis->tryAddingSymbolicOperand(Inst, imm * 4, Addr, true, 0, 4))
- Inst.addOperand(MCOperand::CreateImm(imm));
+ Inst.addOperand(MCOperand::createImm(imm));
return Success;
}
@@ -1502,11 +1508,14 @@ static DecodeStatus DecodeSystemPStateInstruction(llvm::MCInst &Inst,
uint64_t pstate_field = (op1 << 3) | op2;
- Inst.addOperand(MCOperand::CreateImm(pstate_field));
- Inst.addOperand(MCOperand::CreateImm(crm));
+ Inst.addOperand(MCOperand::createImm(pstate_field));
+ Inst.addOperand(MCOperand::createImm(crm));
bool ValidNamed;
- (void)AArch64PState::PStateMapper().toString(pstate_field, ValidNamed);
+ const AArch64Disassembler *Dis =
+ static_cast<const AArch64Disassembler *>(Decoder);
+ (void)AArch64PState::PStateMapper().toString(pstate_field,
+ Dis->getSubtargetInfo().getFeatureBits(), ValidNamed);
return ValidNamed ? Success : Fail;
}
@@ -1528,9 +1537,9 @@ static DecodeStatus DecodeTestAndBranch(llvm::MCInst &Inst, uint32_t insn,
DecodeGPR32RegisterClass(Inst, Rt, Addr, Decoder);
else
DecodeGPR64RegisterClass(Inst, Rt, Addr, Decoder);
- Inst.addOperand(MCOperand::CreateImm(bit));
+ Inst.addOperand(MCOperand::createImm(bit));
if (!Dis->tryAddingSymbolicOperand(Inst, dst * 4, Addr, true, 0, 4))
- Inst.addOperand(MCOperand::CreateImm(dst));
+ Inst.addOperand(MCOperand::createImm(dst));
return Success;
}
diff --git a/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.cpp b/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.cpp
index 2057c51..07e4a45 100644
--- a/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.cpp
+++ b/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.cpp
@@ -165,7 +165,7 @@ bool AArch64ExternalSymbolizer::tryAddingSymbolicOperand(
if (SymbolicOp.AddSymbol.Present) {
if (SymbolicOp.AddSymbol.Name) {
StringRef Name(SymbolicOp.AddSymbol.Name);
- MCSymbol *Sym = Ctx.GetOrCreateSymbol(Name);
+ MCSymbol *Sym = Ctx.getOrCreateSymbol(Name);
MCSymbolRefExpr::VariantKind Variant = getVariant(SymbolicOp.VariantKind);
if (Variant != MCSymbolRefExpr::VK_None)
Add = MCSymbolRefExpr::Create(Sym, Variant, Ctx);
@@ -180,7 +180,7 @@ bool AArch64ExternalSymbolizer::tryAddingSymbolicOperand(
if (SymbolicOp.SubtractSymbol.Present) {
if (SymbolicOp.SubtractSymbol.Name) {
StringRef Name(SymbolicOp.SubtractSymbol.Name);
- MCSymbol *Sym = Ctx.GetOrCreateSymbol(Name);
+ MCSymbol *Sym = Ctx.getOrCreateSymbol(Name);
Sub = MCSymbolRefExpr::Create(Sym, Ctx);
} else {
Sub = MCConstantExpr::Create(SymbolicOp.SubtractSymbol.Value, Ctx);
@@ -214,7 +214,7 @@ bool AArch64ExternalSymbolizer::tryAddingSymbolicOperand(
Expr = MCConstantExpr::Create(0, Ctx);
}
- MI.addOperand(MCOperand::CreateExpr(Expr));
+ MI.addOperand(MCOperand::createExpr(Expr));
return true;
}
diff --git a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp
index 46a1d79..02bd929 100644
--- a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp
+++ b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp
@@ -34,18 +34,13 @@ using namespace llvm;
AArch64InstPrinter::AArch64InstPrinter(const MCAsmInfo &MAI,
const MCInstrInfo &MII,
- const MCRegisterInfo &MRI,
- const MCSubtargetInfo &STI)
- : MCInstPrinter(MAI, MII, MRI) {
- // Initialize the set of available features.
- setAvailableFeatures(STI.getFeatureBits());
-}
+ const MCRegisterInfo &MRI)
+ : MCInstPrinter(MAI, MII, MRI) {}
AArch64AppleInstPrinter::AArch64AppleInstPrinter(const MCAsmInfo &MAI,
const MCInstrInfo &MII,
- const MCRegisterInfo &MRI,
- const MCSubtargetInfo &STI)
- : AArch64InstPrinter(MAI, MII, MRI, STI) {}
+ const MCRegisterInfo &MRI)
+ : AArch64InstPrinter(MAI, MII, MRI) {}
void AArch64InstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
// This is for .cfi directives.
@@ -53,7 +48,8 @@ void AArch64InstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
}
void AArch64InstPrinter::printInst(const MCInst *MI, raw_ostream &O,
- StringRef Annot) {
+ StringRef Annot,
+ const MCSubtargetInfo &STI) {
// Check for special encodings and print the canonical alias instead.
unsigned Opcode = MI->getOpcode();
@@ -166,11 +162,23 @@ void AArch64InstPrinter::printInst(const MCInst *MI, raw_ostream &O,
int ImmR = MI->getOperand(3).getImm();
int ImmS = MI->getOperand(4).getImm();
- // BFI alias
- if (ImmS < ImmR) {
+ if ((Op2.getReg() == AArch64::WZR || Op2.getReg() == AArch64::XZR) &&
+ (ImmR == 0 || ImmS < ImmR)) {
+ // BFC takes precedence over its entire range, sligtly differently to BFI.
+ int BitWidth = Opcode == AArch64::BFMXri ? 64 : 32;
+ int LSB = (BitWidth - ImmR) % BitWidth;
+ int Width = ImmS + 1;
+
+ O << "\tbfc\t" << getRegisterName(Op0.getReg())
+ << ", #" << LSB << ", #" << Width;
+ printAnnotation(O, Annot);
+ return;
+ } else if (ImmS < ImmR) {
+ // BFI alias
int BitWidth = Opcode == AArch64::BFMXri ? 64 : 32;
int LSB = (BitWidth - ImmR) % BitWidth;
int Width = ImmS + 1;
+
O << "\tbfi\t" << getRegisterName(Op0.getReg()) << ", "
<< getRegisterName(Op2.getReg()) << ", #" << LSB << ", #" << Width;
printAnnotation(O, Annot);
@@ -210,8 +218,8 @@ void AArch64InstPrinter::printInst(const MCInst *MI, raw_ostream &O,
return;
}
- if (!printAliasInstr(MI, O))
- printInstruction(MI, O);
+ if (!printAliasInstr(MI, STI, O))
+ printInstruction(MI, STI, O);
printAnnotation(O, Annot);
}
@@ -614,7 +622,8 @@ static LdStNInstrDesc *getLdStNInstrDesc(unsigned Opcode) {
}
void AArch64AppleInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
- StringRef Annot) {
+ StringRef Annot,
+ const MCSubtargetInfo &STI) {
unsigned Opcode = MI->getOpcode();
StringRef Layout, Mnemonic;
@@ -624,7 +633,7 @@ void AArch64AppleInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
<< getRegisterName(MI->getOperand(0).getReg(), AArch64::vreg) << ", ";
unsigned ListOpNum = IsTbx ? 2 : 1;
- printVectorList(MI, ListOpNum, O, "");
+ printVectorList(MI, ListOpNum, STI, O, "");
O << ", "
<< getRegisterName(MI->getOperand(ListOpNum + 1).getReg(), AArch64::vreg);
@@ -638,7 +647,7 @@ void AArch64AppleInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
// Now onto the operands: first a vector list with possible lane
// specifier. E.g. { v0 }[2]
int OpNum = LdStDesc->ListOperand;
- printVectorList(MI, OpNum++, O, "");
+ printVectorList(MI, OpNum++, STI, O, "");
if (LdStDesc->HasLane)
O << '[' << MI->getOperand(OpNum++).getImm() << ']';
@@ -662,7 +671,7 @@ void AArch64AppleInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
return;
}
- AArch64InstPrinter::printInst(MI, O, Annot);
+ AArch64InstPrinter::printInst(MI, O, Annot, STI);
}
bool AArch64InstPrinter::printSysAlias(const MCInst *MI, raw_ostream &O) {
@@ -889,6 +898,7 @@ bool AArch64InstPrinter::printSysAlias(const MCInst *MI, raw_ostream &O) {
}
void AArch64InstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI,
raw_ostream &O) {
const MCOperand &Op = MI->getOperand(OpNo);
if (Op.isReg()) {
@@ -903,6 +913,7 @@ void AArch64InstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
}
void AArch64InstPrinter::printHexImm(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI,
raw_ostream &O) {
const MCOperand &Op = MI->getOperand(OpNo);
O << format("#%#llx", Op.getImm());
@@ -922,6 +933,7 @@ void AArch64InstPrinter::printPostIncOperand(const MCInst *MI, unsigned OpNo,
}
void AArch64InstPrinter::printVRegOperand(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI,
raw_ostream &O) {
const MCOperand &Op = MI->getOperand(OpNo);
assert(Op.isReg() && "Non-register vreg operand!");
@@ -930,6 +942,7 @@ void AArch64InstPrinter::printVRegOperand(const MCInst *MI, unsigned OpNo,
}
void AArch64InstPrinter::printSysCROperand(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI,
raw_ostream &O) {
const MCOperand &Op = MI->getOperand(OpNo);
assert(Op.isImm() && "System instruction C[nm] operands must be immediates!");
@@ -937,6 +950,7 @@ void AArch64InstPrinter::printSysCROperand(const MCInst *MI, unsigned OpNo,
}
void AArch64InstPrinter::printAddSubImm(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI,
raw_ostream &O) {
const MCOperand &MO = MI->getOperand(OpNum);
if (MO.isImm()) {
@@ -946,18 +960,19 @@ void AArch64InstPrinter::printAddSubImm(const MCInst *MI, unsigned OpNum,
AArch64_AM::getShiftValue(MI->getOperand(OpNum + 1).getImm());
O << '#' << Val;
if (Shift != 0)
- printShifter(MI, OpNum + 1, O);
+ printShifter(MI, OpNum + 1, STI, O);
if (CommentStream)
*CommentStream << '=' << (Val << Shift) << '\n';
} else {
assert(MO.isExpr() && "Unexpected operand type!");
O << *MO.getExpr();
- printShifter(MI, OpNum + 1, O);
+ printShifter(MI, OpNum + 1, STI, O);
}
}
void AArch64InstPrinter::printLogicalImm32(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI,
raw_ostream &O) {
uint64_t Val = MI->getOperand(OpNum).getImm();
O << "#0x";
@@ -965,6 +980,7 @@ void AArch64InstPrinter::printLogicalImm32(const MCInst *MI, unsigned OpNum,
}
void AArch64InstPrinter::printLogicalImm64(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI,
raw_ostream &O) {
uint64_t Val = MI->getOperand(OpNum).getImm();
O << "#0x";
@@ -972,6 +988,7 @@ void AArch64InstPrinter::printLogicalImm64(const MCInst *MI, unsigned OpNum,
}
void AArch64InstPrinter::printShifter(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI,
raw_ostream &O) {
unsigned Val = MI->getOperand(OpNum).getImm();
// LSL #0 should not be printed.
@@ -983,18 +1000,21 @@ void AArch64InstPrinter::printShifter(const MCInst *MI, unsigned OpNum,
}
void AArch64InstPrinter::printShiftedRegister(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI,
raw_ostream &O) {
O << getRegisterName(MI->getOperand(OpNum).getReg());
- printShifter(MI, OpNum + 1, O);
+ printShifter(MI, OpNum + 1, STI, O);
}
void AArch64InstPrinter::printExtendedRegister(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI,
raw_ostream &O) {
O << getRegisterName(MI->getOperand(OpNum).getReg());
- printArithExtend(MI, OpNum + 1, O);
+ printArithExtend(MI, OpNum + 1, STI, O);
}
void AArch64InstPrinter::printArithExtend(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI,
raw_ostream &O) {
unsigned Val = MI->getOperand(OpNum).getImm();
AArch64_AM::ShiftExtendType ExtType = AArch64_AM::getArithExtendType(Val);
@@ -1038,24 +1058,28 @@ void AArch64InstPrinter::printMemExtend(const MCInst *MI, unsigned OpNum,
}
void AArch64InstPrinter::printCondCode(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI,
raw_ostream &O) {
AArch64CC::CondCode CC = (AArch64CC::CondCode)MI->getOperand(OpNum).getImm();
O << AArch64CC::getCondCodeName(CC);
}
void AArch64InstPrinter::printInverseCondCode(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI,
raw_ostream &O) {
AArch64CC::CondCode CC = (AArch64CC::CondCode)MI->getOperand(OpNum).getImm();
O << AArch64CC::getCondCodeName(AArch64CC::getInvertedCondCode(CC));
}
void AArch64InstPrinter::printAMNoIndex(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI,
raw_ostream &O) {
O << '[' << getRegisterName(MI->getOperand(OpNum).getReg()) << ']';
}
template<int Scale>
void AArch64InstPrinter::printImmScale(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI,
raw_ostream &O) {
O << '#' << Scale * MI->getOperand(OpNum).getImm();
}
@@ -1085,10 +1109,12 @@ void AArch64InstPrinter::printAMIndexedWB(const MCInst *MI, unsigned OpNum,
}
void AArch64InstPrinter::printPrefetchOp(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI,
raw_ostream &O) {
unsigned prfop = MI->getOperand(OpNum).getImm();
bool Valid;
- StringRef Name = AArch64PRFM::PRFMMapper().toString(prfop, Valid);
+ StringRef Name =
+ AArch64PRFM::PRFMMapper().toString(prfop, STI.getFeatureBits(), Valid);
if (Valid)
O << Name;
else
@@ -1096,6 +1122,7 @@ void AArch64InstPrinter::printPrefetchOp(const MCInst *MI, unsigned OpNum,
}
void AArch64InstPrinter::printFPImmOperand(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI,
raw_ostream &O) {
const MCOperand &MO = MI->getOperand(OpNum);
float FPImm =
@@ -1151,6 +1178,7 @@ static unsigned getNextVectorRegister(unsigned Reg, unsigned Stride = 1) {
}
void AArch64InstPrinter::printVectorList(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI,
raw_ostream &O,
StringRef LayoutSuffix) {
unsigned Reg = MI->getOperand(OpNum).getReg();
@@ -1193,14 +1221,17 @@ void AArch64InstPrinter::printVectorList(const MCInst *MI, unsigned OpNum,
O << " }";
}
-void AArch64InstPrinter::printImplicitlyTypedVectorList(const MCInst *MI,
- unsigned OpNum,
- raw_ostream &O) {
- printVectorList(MI, OpNum, O, "");
+void
+AArch64InstPrinter::printImplicitlyTypedVectorList(const MCInst *MI,
+ unsigned OpNum,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ printVectorList(MI, OpNum, STI, O, "");
}
template <unsigned NumLanes, char LaneKind>
void AArch64InstPrinter::printTypedVectorList(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI,
raw_ostream &O) {
std::string Suffix(".");
if (NumLanes)
@@ -1208,15 +1239,17 @@ void AArch64InstPrinter::printTypedVectorList(const MCInst *MI, unsigned OpNum,
else
Suffix += LaneKind;
- printVectorList(MI, OpNum, O, Suffix);
+ printVectorList(MI, OpNum, STI, O, Suffix);
}
void AArch64InstPrinter::printVectorIndex(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI,
raw_ostream &O) {
O << "[" << MI->getOperand(OpNum).getImm() << "]";
}
void AArch64InstPrinter::printAlignedLabel(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI,
raw_ostream &O) {
const MCOperand &Op = MI->getOperand(OpNum);
@@ -1241,6 +1274,7 @@ void AArch64InstPrinter::printAlignedLabel(const MCInst *MI, unsigned OpNum,
}
void AArch64InstPrinter::printAdrpLabel(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI,
raw_ostream &O) {
const MCOperand &Op = MI->getOperand(OpNum);
@@ -1256,6 +1290,7 @@ void AArch64InstPrinter::printAdrpLabel(const MCInst *MI, unsigned OpNum,
}
void AArch64InstPrinter::printBarrierOption(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI,
raw_ostream &O) {
unsigned Val = MI->getOperand(OpNo).getImm();
unsigned Opcode = MI->getOpcode();
@@ -1263,9 +1298,11 @@ void AArch64InstPrinter::printBarrierOption(const MCInst *MI, unsigned OpNo,
bool Valid;
StringRef Name;
if (Opcode == AArch64::ISB)
- Name = AArch64ISB::ISBMapper().toString(Val, Valid);
+ Name = AArch64ISB::ISBMapper().toString(Val, STI.getFeatureBits(),
+ Valid);
else
- Name = AArch64DB::DBarrierMapper().toString(Val, Valid);
+ Name = AArch64DB::DBarrierMapper().toString(Val, STI.getFeatureBits(),
+ Valid);
if (Valid)
O << Name;
else
@@ -1273,31 +1310,35 @@ void AArch64InstPrinter::printBarrierOption(const MCInst *MI, unsigned OpNo,
}
void AArch64InstPrinter::printMRSSystemRegister(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI,
raw_ostream &O) {
unsigned Val = MI->getOperand(OpNo).getImm();
- auto Mapper = AArch64SysReg::MRSMapper(getAvailableFeatures());
- std::string Name = Mapper.toString(Val);
+ auto Mapper = AArch64SysReg::MRSMapper();
+ std::string Name = Mapper.toString(Val, STI.getFeatureBits());
O << StringRef(Name).upper();
}
void AArch64InstPrinter::printMSRSystemRegister(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI,
raw_ostream &O) {
unsigned Val = MI->getOperand(OpNo).getImm();
- auto Mapper = AArch64SysReg::MSRMapper(getAvailableFeatures());
- std::string Name = Mapper.toString(Val);
+ auto Mapper = AArch64SysReg::MSRMapper();
+ std::string Name = Mapper.toString(Val, STI.getFeatureBits());
O << StringRef(Name).upper();
}
void AArch64InstPrinter::printSystemPStateField(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI,
raw_ostream &O) {
unsigned Val = MI->getOperand(OpNo).getImm();
bool Valid;
- StringRef Name = AArch64PState::PStateMapper().toString(Val, Valid);
+ StringRef Name =
+ AArch64PState::PStateMapper().toString(Val, STI.getFeatureBits(), Valid);
if (Valid)
O << StringRef(Name.str()).upper();
else
@@ -1305,6 +1346,7 @@ void AArch64InstPrinter::printSystemPStateField(const MCInst *MI, unsigned OpNo,
}
void AArch64InstPrinter::printSIMDType10Operand(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI,
raw_ostream &O) {
unsigned RawVal = MI->getOperand(OpNo).getImm();
uint64_t Val = AArch64_AM::decodeAdvSIMDModImmType10(RawVal);
diff --git a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h
index 5f51621..c2077a0 100644
--- a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h
+++ b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h
@@ -26,16 +26,21 @@ class MCOperand;
class AArch64InstPrinter : public MCInstPrinter {
public:
AArch64InstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
- const MCRegisterInfo &MRI, const MCSubtargetInfo &STI);
+ const MCRegisterInfo &MRI);
- void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot) override;
+ void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot,
+ const MCSubtargetInfo &STI) override;
void printRegName(raw_ostream &OS, unsigned RegNo) const override;
// Autogenerated by tblgen.
- virtual void printInstruction(const MCInst *MI, raw_ostream &O);
- virtual bool printAliasInstr(const MCInst *MI, raw_ostream &O);
+ virtual void printInstruction(const MCInst *MI, const MCSubtargetInfo &STI,
+ raw_ostream &O);
+ virtual bool printAliasInstr(const MCInst *MI, const MCSubtargetInfo &STI,
+ raw_ostream &O);
virtual void printCustomAliasOperand(const MCInst *MI, unsigned OpIdx,
- unsigned PrintMethodIdx, raw_ostream &O);
+ unsigned PrintMethodIdx,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O);
virtual StringRef getRegName(unsigned RegNo) const {
return getRegisterName(RegNo);
}
@@ -45,90 +50,126 @@ public:
protected:
bool printSysAlias(const MCInst *MI, raw_ostream &O);
// Operand printers
- void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
- void printHexImm(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printOperand(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
+ raw_ostream &O);
+ void printHexImm(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
+ raw_ostream &O);
void printPostIncOperand(const MCInst *MI, unsigned OpNo, unsigned Imm,
raw_ostream &O);
- template<int Amount>
- void printPostIncOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+ template <int Amount>
+ void printPostIncOperand(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI, raw_ostream &O) {
printPostIncOperand(MI, OpNo, Amount, O);
}
- void printVRegOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
- void printSysCROperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
- void printAddSubImm(const MCInst *MI, unsigned OpNum, raw_ostream &O);
- void printLogicalImm32(const MCInst *MI, unsigned OpNum, raw_ostream &O);
- void printLogicalImm64(const MCInst *MI, unsigned OpNum, raw_ostream &O);
- void printShifter(const MCInst *MI, unsigned OpNum, raw_ostream &O);
- void printShiftedRegister(const MCInst *MI, unsigned OpNum, raw_ostream &O);
- void printExtendedRegister(const MCInst *MI, unsigned OpNum, raw_ostream &O);
- void printArithExtend(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printVRegOperand(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printSysCROperand(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printAddSubImm(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printLogicalImm32(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printLogicalImm64(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printShifter(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printShiftedRegister(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printExtendedRegister(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printArithExtend(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O);
void printMemExtend(const MCInst *MI, unsigned OpNum, raw_ostream &O,
char SrcRegKind, unsigned Width);
template <char SrcRegKind, unsigned Width>
- void printMemExtend(const MCInst *MI, unsigned OpNum, raw_ostream &O) {
+ void printMemExtend(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O) {
printMemExtend(MI, OpNum, O, SrcRegKind, Width);
}
- void printCondCode(const MCInst *MI, unsigned OpNum, raw_ostream &O);
- void printInverseCondCode(const MCInst *MI, unsigned OpNum, raw_ostream &O);
- void printAlignedLabel(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printCondCode(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printInverseCondCode(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printAlignedLabel(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O);
void printUImm12Offset(const MCInst *MI, unsigned OpNum, unsigned Scale,
raw_ostream &O);
void printAMIndexedWB(const MCInst *MI, unsigned OpNum, unsigned Scale,
raw_ostream &O);
- template<int Scale>
- void printUImm12Offset(const MCInst *MI, unsigned OpNum, raw_ostream &O) {
+ template <int Scale>
+ void printUImm12Offset(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O) {
printUImm12Offset(MI, OpNum, Scale, O);
}
- template<int BitWidth>
- void printAMIndexedWB(const MCInst *MI, unsigned OpNum, raw_ostream &O) {
+ template <int BitWidth>
+ void printAMIndexedWB(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O) {
printAMIndexedWB(MI, OpNum, BitWidth / 8, O);
}
- void printAMNoIndex(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printAMNoIndex(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O);
- template<int Scale>
- void printImmScale(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ template <int Scale>
+ void printImmScale(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O);
- void printPrefetchOp(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printPrefetchOp(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O);
- void printFPImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printFPImmOperand(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O);
- void printVectorList(const MCInst *MI, unsigned OpNum, raw_ostream &O,
+ void printVectorList(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O,
StringRef LayoutSuffix);
/// Print a list of vector registers where the type suffix is implicit
/// (i.e. attached to the instruction rather than the registers).
void printImplicitlyTypedVectorList(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI,
raw_ostream &O);
template <unsigned NumLanes, char LaneKind>
- void printTypedVectorList(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-
- void printVectorIndex(const MCInst *MI, unsigned OpNum, raw_ostream &O);
- void printAdrpLabel(const MCInst *MI, unsigned OpNum, raw_ostream &O);
- void printBarrierOption(const MCInst *MI, unsigned OpNum, raw_ostream &O);
- void printMSRSystemRegister(const MCInst *MI, unsigned OpNum, raw_ostream &O);
- void printMRSSystemRegister(const MCInst *MI, unsigned OpNum, raw_ostream &O);
- void printSystemPStateField(const MCInst *MI, unsigned OpNum, raw_ostream &O);
- void printSIMDType10Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printTypedVectorList(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+
+ void printVectorIndex(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printAdrpLabel(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printBarrierOption(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printMSRSystemRegister(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printMRSSystemRegister(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printSystemPStateField(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printSIMDType10Operand(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O);
};
class AArch64AppleInstPrinter : public AArch64InstPrinter {
public:
AArch64AppleInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
- const MCRegisterInfo &MRI, const MCSubtargetInfo &STI);
+ const MCRegisterInfo &MRI);
- void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot) override;
+ void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot,
+ const MCSubtargetInfo &STI) override;
- void printInstruction(const MCInst *MI, raw_ostream &O) override;
- bool printAliasInstr(const MCInst *MI, raw_ostream &O) override;
+ void printInstruction(const MCInst *MI, const MCSubtargetInfo &STI,
+ raw_ostream &O) override;
+ bool printAliasInstr(const MCInst *MI, const MCSubtargetInfo &STI,
+ raw_ostream &O) override;
void printCustomAliasOperand(const MCInst *MI, unsigned OpIdx,
unsigned PrintMethodIdx,
+ const MCSubtargetInfo &STI,
raw_ostream &O) override;
StringRef getRegName(unsigned RegNo) const override {
return getRegisterName(RegNo);
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64AddressingModes.h b/lib/Target/AArch64/MCTargetDesc/AArch64AddressingModes.h
index 4db9dea..ed24343 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64AddressingModes.h
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64AddressingModes.h
@@ -237,15 +237,15 @@ static inline bool processLogicalImmediate(uint64_t Imm, unsigned RegSize,
if (isShiftedMask_64(Imm)) {
I = countTrailingZeros(Imm);
assert(I < 64 && "undefined behavior");
- CTO = CountTrailingOnes_64(Imm >> I);
+ CTO = countTrailingOnes(Imm >> I);
} else {
Imm |= ~Mask;
if (!isShiftedMask_64(~Imm))
return false;
- unsigned CLO = CountLeadingOnes_64(Imm);
+ unsigned CLO = countLeadingOnes(Imm);
I = 64 - CLO;
- CTO = CLO + CountTrailingOnes_64(Imm) - (64 - Size);
+ CTO = CLO + countTrailingOnes(Imm) - (64 - Size);
}
// Encode in Immr the number of RORs it would take to get *from* 0^m 1^n
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
index 27cbac9..31fceb6 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
@@ -18,6 +18,7 @@
#include "llvm/MC/MCObjectWriter.h"
#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/MC/MCValue.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MachO.h"
using namespace llvm;
@@ -246,10 +247,7 @@ bool AArch64AsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const {
// If the count is not 4-byte aligned, we must be writing data into the text
// section (otherwise we have unaligned instructions, and thus have far
// bigger problems), so just write zeros instead.
- if ((Count & 3) != 0) {
- for (uint64_t i = 0, e = (Count & 3); i != e; ++i)
- OW->Write8(0);
- }
+ OW->WriteZeros(Count % 4);
// We are properly aligned, so write NOPs as requested.
Count /= 4;
@@ -312,47 +310,11 @@ public:
DarwinAArch64AsmBackend(const Target &T, const MCRegisterInfo &MRI)
: AArch64AsmBackend(T), MRI(MRI) {}
- MCObjectWriter *createObjectWriter(raw_ostream &OS) const override {
+ MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override {
return createAArch64MachObjectWriter(OS, MachO::CPU_TYPE_ARM64,
MachO::CPU_SUBTYPE_ARM64_ALL);
}
- bool doesSectionRequireSymbols(const MCSection &Section) const override {
- // Any section for which the linker breaks things into atoms needs to
- // preserve symbols, including assembler local symbols, to identify
- // those atoms. These sections are:
- // Sections of type:
- //
- // S_CSTRING_LITERALS (e.g. __cstring)
- // S_LITERAL_POINTERS (e.g. objc selector pointers)
- // S_16BYTE_LITERALS, S_8BYTE_LITERALS, S_4BYTE_LITERALS
- //
- // Sections named:
- //
- // __TEXT,__eh_frame
- // __TEXT,__ustring
- // __DATA,__cfstring
- // __DATA,__objc_classrefs
- // __DATA,__objc_catlist
- //
- // FIXME: It would be better if the compiler used actual linker local
- // symbols for each of these sections rather than preserving what
- // are ostensibly assembler local symbols.
- const MCSectionMachO &SMO = static_cast<const MCSectionMachO &>(Section);
- return (SMO.getType() == MachO::S_CSTRING_LITERALS ||
- SMO.getType() == MachO::S_4BYTE_LITERALS ||
- SMO.getType() == MachO::S_8BYTE_LITERALS ||
- SMO.getType() == MachO::S_16BYTE_LITERALS ||
- SMO.getType() == MachO::S_LITERAL_POINTERS ||
- (SMO.getSegmentName() == "__TEXT" &&
- (SMO.getSectionName() == "__eh_frame" ||
- SMO.getSectionName() == "__ustring")) ||
- (SMO.getSegmentName() == "__DATA" &&
- (SMO.getSectionName() == "__cfstring" ||
- SMO.getSectionName() == "__objc_classrefs" ||
- SMO.getSectionName() == "__objc_catlist")));
- }
-
/// \brief Generate the compact unwind encoding from the CFI directives.
uint32_t generateCompactUnwindEncoding(
ArrayRef<MCCFIInstruction> Instrs) const override {
@@ -496,7 +458,7 @@ public:
ELFAArch64AsmBackend(const Target &T, uint8_t OSABI, bool IsLittleEndian)
: AArch64AsmBackend(T), OSABI(OSABI), IsLittleEndian(IsLittleEndian) {}
- MCObjectWriter *createObjectWriter(raw_ostream &OS) const override {
+ MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override {
return createAArch64ELFObjectWriter(OS, OSABI, IsLittleEndian);
}
@@ -529,14 +491,28 @@ void ELFAArch64AsmBackend::processFixupValue(
IsResolved = false;
}
+// Returns whether this fixup is based on an address in the .eh_frame section,
+// and therefore should be byte swapped.
+// FIXME: Should be replaced with something more principled.
+static bool isByteSwappedFixup(const MCExpr *E) {
+ MCValue Val;
+ if (!E->EvaluateAsRelocatable(Val, nullptr, nullptr))
+ return false;
+
+ if (!Val.getSymA() || Val.getSymA()->getSymbol().isUndefined())
+ return false;
+
+ const MCSectionELF *SecELF =
+ dyn_cast<MCSectionELF>(&Val.getSymA()->getSymbol().getSection());
+ return SecELF->getSectionName() == ".eh_frame";
+}
+
void ELFAArch64AsmBackend::applyFixup(const MCFixup &Fixup, char *Data,
unsigned DataSize, uint64_t Value,
bool IsPCRel) const {
// store fixups in .eh_frame section in big endian order
if (!IsLittleEndian && Fixup.getKind() == FK_Data_4) {
- const MCSection *Sec = Fixup.getValue()->FindAssociatedSection();
- const MCSectionELF *SecELF = dyn_cast_or_null<const MCSectionELF>(Sec);
- if (SecELF && SecELF->getSectionName() == ".eh_frame")
+ if (isByteSwappedFixup(Fixup.getValue()))
Value = ByteSwap_32(unsigned(Value));
}
AArch64AsmBackend::applyFixup (Fixup, Data, DataSize, Value, IsPCRel);
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp
index 5ea49c3..1f516d1 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp
@@ -26,7 +26,7 @@ class AArch64ELFObjectWriter : public MCELFObjectTargetWriter {
public:
AArch64ELFObjectWriter(uint8_t OSABI, bool IsLittleEndian);
- virtual ~AArch64ELFObjectWriter();
+ ~AArch64ELFObjectWriter() override;
protected:
unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup,
@@ -248,9 +248,9 @@ unsigned AArch64ELFObjectWriter::GetRelocType(const MCValue &Target,
llvm_unreachable("Unimplemented fixup -> relocation");
}
-MCObjectWriter *llvm::createAArch64ELFObjectWriter(raw_ostream &OS,
- uint8_t OSABI,
- bool IsLittleEndian) {
+MCObjectWriter *llvm::createAArch64ELFObjectWriter(raw_pwrite_stream &OS,
+ uint8_t OSABI,
+ bool IsLittleEndian) {
MCELFObjectTargetWriter *MOTW =
new AArch64ELFObjectWriter(OSABI, IsLittleEndian);
return createELFObjectWriter(MOTW, OS, IsLittleEndian);
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
index 8dc6c30..204a1ab 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
@@ -13,6 +13,7 @@
//
//===----------------------------------------------------------------------===//
+#include "AArch64TargetStreamer.h"
#include "llvm/MC/MCELFStreamer.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/StringExtras.h"
@@ -59,7 +60,7 @@ AArch64TargetAsmStreamer::AArch64TargetAsmStreamer(MCStreamer &S,
: AArch64TargetStreamer(S), OS(OS) {}
void AArch64TargetAsmStreamer::emitInst(uint32_t Inst) {
- OS << "\t.inst\t0x" << utohexstr(Inst) << "\n";
+ OS << "\t.inst\t0x" << Twine::utohexstr(Inst) << "\n";
}
class AArch64TargetELFStreamer : public AArch64TargetStreamer {
@@ -89,15 +90,12 @@ class AArch64ELFStreamer : public MCELFStreamer {
public:
friend class AArch64TargetELFStreamer;
- AArch64ELFStreamer(MCContext &Context, MCAsmBackend &TAB, raw_ostream &OS,
- MCCodeEmitter *Emitter)
+ AArch64ELFStreamer(MCContext &Context, MCAsmBackend &TAB,
+ raw_pwrite_stream &OS, MCCodeEmitter *Emitter)
: MCELFStreamer(Context, TAB, OS, Emitter), MappingSymbolCounter(0),
LastEMS(EMS_None) {}
- ~AArch64ELFStreamer() {}
-
- void ChangeSection(const MCSection *Section,
- const MCExpr *Subsection) override {
+ void ChangeSection(MCSection *Section, const MCExpr *Subsection) override {
// We have to keep track of the mapping symbol state of any sections we
// use. Each one should start off as EMS_None, which is provided as the
// default constructor by DenseMap::lookup.
@@ -117,15 +115,8 @@ public:
}
void emitInst(uint32_t Inst) {
- char Buffer[4];
- const bool LittleEndian = getContext().getAsmInfo()->isLittleEndian();
-
EmitA64MappingSymbol();
- for (unsigned II = 0; II != 4; ++II) {
- const unsigned I = LittleEndian ? (4 - II - 1) : II;
- Buffer[4 - II - 1] = uint8_t(Inst >> I * CHAR_BIT);
- }
- MCELFStreamer::EmitBytes(StringRef(Buffer, 4));
+ MCELFStreamer::EmitIntValue(Inst, 4);
}
/// This is one of the functions used to emit data into an ELF section, so the
@@ -167,10 +158,10 @@ private:
}
void EmitMappingSymbol(StringRef Name) {
- MCSymbol *Start = getContext().CreateTempSymbol();
+ MCSymbol *Start = getContext().createTempSymbol();
EmitLabel(Start);
- MCSymbol *Symbol = getContext().GetOrCreateSymbol(
+ MCSymbol *Symbol = getContext().getOrCreateSymbol(
Name + "." + Twine(MappingSymbolCounter++));
MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol);
@@ -189,8 +180,6 @@ private:
DenseMap<const MCSection *, ElfMappingSymbol> LastMappingSymbols;
ElfMappingSymbol LastEMS;
-
- /// @}
};
} // end anonymous namespace
@@ -203,24 +192,27 @@ void AArch64TargetELFStreamer::emitInst(uint32_t Inst) {
}
namespace llvm {
-MCStreamer *
-createAArch64MCAsmStreamer(MCContext &Ctx, formatted_raw_ostream &OS,
- bool isVerboseAsm, bool useDwarfDirectory,
- MCInstPrinter *InstPrint, MCCodeEmitter *CE,
- MCAsmBackend *TAB, bool ShowInst) {
- MCStreamer *S = llvm::createAsmStreamer(
- Ctx, OS, isVerboseAsm, useDwarfDirectory, InstPrint, CE, TAB, ShowInst);
- new AArch64TargetAsmStreamer(*S, OS);
- return S;
+MCTargetStreamer *createAArch64AsmTargetStreamer(MCStreamer &S,
+ formatted_raw_ostream &OS,
+ MCInstPrinter *InstPrint,
+ bool isVerboseAsm) {
+ return new AArch64TargetAsmStreamer(S, OS);
}
MCELFStreamer *createAArch64ELFStreamer(MCContext &Context, MCAsmBackend &TAB,
- raw_ostream &OS, MCCodeEmitter *Emitter,
- bool RelaxAll) {
+ raw_pwrite_stream &OS,
+ MCCodeEmitter *Emitter, bool RelaxAll) {
AArch64ELFStreamer *S = new AArch64ELFStreamer(Context, TAB, OS, Emitter);
- new AArch64TargetELFStreamer(*S);
if (RelaxAll)
S->getAssembler().setRelaxAll(true);
return S;
}
+
+MCTargetStreamer *
+createAArch64ObjectTargetStreamer(MCStreamer &S, const MCSubtargetInfo &STI) {
+ Triple TT(STI.getTargetTriple());
+ if (TT.getObjectFormat() == Triple::ELF)
+ return new AArch64TargetELFStreamer(S);
+ return nullptr;
+}
}
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.h b/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.h
index 71b05cc..ef48203 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.h
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.h
@@ -19,8 +19,8 @@
namespace llvm {
MCELFStreamer *createAArch64ELFStreamer(MCContext &Context, MCAsmBackend &TAB,
- raw_ostream &OS, MCCodeEmitter *Emitter,
- bool RelaxAll);
+ raw_pwrite_stream &OS,
+ MCCodeEmitter *Emitter, bool RelaxAll);
}
#endif
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp
index f048474..ab2cad6 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp
@@ -48,6 +48,10 @@ AArch64MCAsmInfoDarwin::AArch64MCAsmInfoDarwin() {
UseDataRegionDirectives = true;
ExceptionsType = ExceptionHandling::DwarfCFI;
+
+ // AArch64 Darwin doesn't have the baggage of X86/ARM, so it's fine to use
+ // LShr instead of AShr.
+ UseLogicalShr = true;
}
const MCExpr *AArch64MCAsmInfoDarwin::getExprForPersonalitySymbol(
@@ -59,7 +63,7 @@ const MCExpr *AArch64MCAsmInfoDarwin::getExprForPersonalitySymbol(
MCContext &Context = Streamer.getContext();
const MCExpr *Res =
MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_GOT, Context);
- MCSymbol *PCSym = Context.CreateTempSymbol();
+ MCSymbol *PCSym = Context.createTempSymbol();
Streamer.EmitLabel(PCSym);
const MCExpr *PC = MCSymbolRefExpr::Create(PCSym, Context);
return MCBinaryExpr::CreateSub(Res, PC, Context);
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h b/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h
index 5d03c21..9b88de7 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h
@@ -15,6 +15,7 @@
#define LLVM_LIB_TARGET_AARCH64_MCTARGETDESC_AARCH64MCASMINFO_H
#include "llvm/MC/MCAsmInfoDarwin.h"
+#include "llvm/MC/MCAsmInfoELF.h"
namespace llvm {
class Target;
@@ -27,7 +28,7 @@ struct AArch64MCAsmInfoDarwin : public MCAsmInfoDarwin {
MCStreamer &Streamer) const override;
};
-struct AArch64MCAsmInfoELF : public MCAsmInfo {
+struct AArch64MCAsmInfoELF : public MCAsmInfoELF {
explicit AArch64MCAsmInfoELF(StringRef TT);
};
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp
index 4756a192..277ea9f 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp
@@ -38,11 +38,9 @@ class AArch64MCCodeEmitter : public MCCodeEmitter {
AArch64MCCodeEmitter(const AArch64MCCodeEmitter &); // DO NOT IMPLEMENT
void operator=(const AArch64MCCodeEmitter &); // DO NOT IMPLEMENT
public:
- AArch64MCCodeEmitter(const MCInstrInfo &mcii, const MCSubtargetInfo &sti,
- MCContext &ctx)
- : Ctx(ctx) {}
+ AArch64MCCodeEmitter(const MCInstrInfo &mcii, MCContext &ctx) : Ctx(ctx) {}
- ~AArch64MCCodeEmitter() {}
+ ~AArch64MCCodeEmitter() override {}
// getBinaryCodeForInstr - TableGen'erated function for getting the
// binary encoding for an instruction.
@@ -186,7 +184,7 @@ public:
}
}
- void EncodeInstruction(const MCInst &MI, raw_ostream &OS,
+ void encodeInstruction(const MCInst &MI, raw_ostream &OS,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const override;
@@ -205,9 +203,8 @@ public:
MCCodeEmitter *llvm::createAArch64MCCodeEmitter(const MCInstrInfo &MCII,
const MCRegisterInfo &MRI,
- const MCSubtargetInfo &STI,
MCContext &Ctx) {
- return new AArch64MCCodeEmitter(MCII, STI, Ctx);
+ return new AArch64MCCodeEmitter(MCII, Ctx);
}
/// getMachineOpValue - Return binary encoding of operand. If the machine
@@ -235,7 +232,7 @@ AArch64MCCodeEmitter::getLdStUImm12OpValue(const MCInst &MI, unsigned OpIdx,
else {
assert(MO.isExpr() && "unable to encode load/store imm operand");
MCFixupKind Kind = MCFixupKind(FixupKind);
- Fixups.push_back(MCFixup::Create(0, MO.getExpr(), Kind, MI.getLoc()));
+ Fixups.push_back(MCFixup::create(0, MO.getExpr(), Kind, MI.getLoc()));
++MCNumFixups;
}
@@ -259,7 +256,7 @@ AArch64MCCodeEmitter::getAdrLabelOpValue(const MCInst &MI, unsigned OpIdx,
MCFixupKind Kind = MI.getOpcode() == AArch64::ADR
? MCFixupKind(AArch64::fixup_aarch64_pcrel_adr_imm21)
: MCFixupKind(AArch64::fixup_aarch64_pcrel_adrp_imm21);
- Fixups.push_back(MCFixup::Create(0, Expr, Kind, MI.getLoc()));
+ Fixups.push_back(MCFixup::create(0, Expr, Kind, MI.getLoc()));
MCNumFixups += 1;
@@ -289,7 +286,7 @@ AArch64MCCodeEmitter::getAddSubImmOpValue(const MCInst &MI, unsigned OpIdx,
// Encode the 12 bits of the fixup.
MCFixupKind Kind = MCFixupKind(AArch64::fixup_aarch64_add_imm12);
- Fixups.push_back(MCFixup::Create(0, Expr, Kind, MI.getLoc()));
+ Fixups.push_back(MCFixup::create(0, Expr, Kind, MI.getLoc()));
++MCNumFixups;
@@ -309,7 +306,7 @@ uint32_t AArch64MCCodeEmitter::getCondBranchTargetOpValue(
assert(MO.isExpr() && "Unexpected target type!");
MCFixupKind Kind = MCFixupKind(AArch64::fixup_aarch64_pcrel_branch19);
- Fixups.push_back(MCFixup::Create(0, MO.getExpr(), Kind, MI.getLoc()));
+ Fixups.push_back(MCFixup::create(0, MO.getExpr(), Kind, MI.getLoc()));
++MCNumFixups;
@@ -331,7 +328,7 @@ AArch64MCCodeEmitter::getLoadLiteralOpValue(const MCInst &MI, unsigned OpIdx,
assert(MO.isExpr() && "Unexpected target type!");
MCFixupKind Kind = MCFixupKind(AArch64::fixup_aarch64_ldr_pcrel_imm19);
- Fixups.push_back(MCFixup::Create(0, MO.getExpr(), Kind, MI.getLoc()));
+ Fixups.push_back(MCFixup::create(0, MO.getExpr(), Kind, MI.getLoc()));
++MCNumFixups;
@@ -358,7 +355,7 @@ AArch64MCCodeEmitter::getMoveWideImmOpValue(const MCInst &MI, unsigned OpIdx,
return MO.getImm();
assert(MO.isExpr() && "Unexpected movz/movk immediate");
- Fixups.push_back(MCFixup::Create(
+ Fixups.push_back(MCFixup::create(
0, MO.getExpr(), MCFixupKind(AArch64::fixup_aarch64_movw), MI.getLoc()));
++MCNumFixups;
@@ -379,7 +376,7 @@ uint32_t AArch64MCCodeEmitter::getTestBranchTargetOpValue(
assert(MO.isExpr() && "Unexpected ADR target type!");
MCFixupKind Kind = MCFixupKind(AArch64::fixup_aarch64_pcrel_branch14);
- Fixups.push_back(MCFixup::Create(0, MO.getExpr(), Kind, MI.getLoc()));
+ Fixups.push_back(MCFixup::create(0, MO.getExpr(), Kind, MI.getLoc()));
++MCNumFixups;
@@ -403,7 +400,7 @@ AArch64MCCodeEmitter::getBranchTargetOpValue(const MCInst &MI, unsigned OpIdx,
MCFixupKind Kind = MI.getOpcode() == AArch64::BL
? MCFixupKind(AArch64::fixup_aarch64_pcrel_call26)
: MCFixupKind(AArch64::fixup_aarch64_pcrel_branch26);
- Fixups.push_back(MCFixup::Create(0, MO.getExpr(), Kind, MI.getLoc()));
+ Fixups.push_back(MCFixup::create(0, MO.getExpr(), Kind, MI.getLoc()));
++MCNumFixups;
@@ -601,7 +598,7 @@ unsigned AArch64MCCodeEmitter::fixMOVZ(const MCInst &MI, unsigned EncodedValue,
return EncodedValue & ~(1u << 30);
}
-void AArch64MCCodeEmitter::EncodeInstruction(const MCInst &MI, raw_ostream &OS,
+void AArch64MCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
if (MI.getOpcode() == AArch64::TLSDESCCALL) {
@@ -609,7 +606,7 @@ void AArch64MCCodeEmitter::EncodeInstruction(const MCInst &MI, raw_ostream &OS,
// following (BLR) instruction. It doesn't emit any code itself so it
// doesn't go through the normal TableGenerated channels.
MCFixupKind Fixup = MCFixupKind(AArch64::fixup_aarch64_tlsdesc_call);
- Fixups.push_back(MCFixup::Create(0, MI.getOperand(0).getExpr(), Fixup));
+ Fixups.push_back(MCFixup::create(0, MI.getOperand(0).getExpr(), Fixup));
return;
}
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp
index e396df8..74b81af 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp
@@ -16,6 +16,7 @@
#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCELF.h"
+#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MCValue.h"
#include "llvm/Object/ELF.h"
@@ -85,7 +86,7 @@ void AArch64MCExpr::visitUsedExpr(MCStreamer &Streamer) const {
Streamer.visitUsedExpr(*getSubExpr());
}
-const MCSection *AArch64MCExpr::FindAssociatedSection() const {
+MCSection *AArch64MCExpr::FindAssociatedSection() const {
llvm_unreachable("FIXME: what goes here?");
}
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h b/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h
index db48ac9..95d2277 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h
@@ -120,7 +120,7 @@ public:
/// @{
/// Get the kind of this expression.
- VariantKind getKind() const { return static_cast<VariantKind>(Kind); }
+ VariantKind getKind() const { return Kind; }
/// Get the expression this modifier applies to.
const MCExpr *getSubExpr() const { return Expr; }
@@ -149,7 +149,7 @@ public:
void visitUsedExpr(MCStreamer &Streamer) const override;
- const MCSection *FindAssociatedSection() const override;
+ MCSection *FindAssociatedSection() const override;
bool EvaluateAsRelocatableImpl(MCValue &Res,
const MCAsmLayout *Layout,
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp
index 0f7a6b8..2e22de0 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp
@@ -105,112 +105,78 @@ static MCCodeGenInfo *createAArch64MCCodeGenInfo(StringRef TT, Reloc::Model RM,
RM = Reloc::Static;
MCCodeGenInfo *X = new MCCodeGenInfo();
- X->InitMCCodeGenInfo(RM, CM, OL);
+ X->initMCCodeGenInfo(RM, CM, OL);
return X;
}
-static MCInstPrinter *createAArch64MCInstPrinter(const Target &T,
+static MCInstPrinter *createAArch64MCInstPrinter(const Triple &T,
unsigned SyntaxVariant,
const MCAsmInfo &MAI,
const MCInstrInfo &MII,
- const MCRegisterInfo &MRI,
- const MCSubtargetInfo &STI) {
+ const MCRegisterInfo &MRI) {
if (SyntaxVariant == 0)
- return new AArch64InstPrinter(MAI, MII, MRI, STI);
+ return new AArch64InstPrinter(MAI, MII, MRI);
if (SyntaxVariant == 1)
- return new AArch64AppleInstPrinter(MAI, MII, MRI, STI);
+ return new AArch64AppleInstPrinter(MAI, MII, MRI);
return nullptr;
}
-static MCStreamer *createMCStreamer(const Target &T, StringRef TT,
- MCContext &Ctx, MCAsmBackend &TAB,
- raw_ostream &OS, MCCodeEmitter *Emitter,
- const MCSubtargetInfo &STI, bool RelaxAll) {
- Triple TheTriple(TT);
-
- if (TheTriple.isOSDarwin())
- return createMachOStreamer(Ctx, TAB, OS, Emitter, RelaxAll,
- /*LabelSections*/ true);
-
+static MCStreamer *createELFStreamer(const Triple &T, MCContext &Ctx,
+ MCAsmBackend &TAB, raw_pwrite_stream &OS,
+ MCCodeEmitter *Emitter, bool RelaxAll) {
return createAArch64ELFStreamer(Ctx, TAB, OS, Emitter, RelaxAll);
}
+static MCStreamer *createMachOStreamer(MCContext &Ctx, MCAsmBackend &TAB,
+ raw_pwrite_stream &OS,
+ MCCodeEmitter *Emitter, bool RelaxAll,
+ bool DWARFMustBeAtTheEnd) {
+ return createMachOStreamer(Ctx, TAB, OS, Emitter, RelaxAll,
+ DWARFMustBeAtTheEnd,
+ /*LabelSections*/ true);
+}
+
// Force static initialization.
extern "C" void LLVMInitializeAArch64TargetMC() {
- // Register the MC asm info.
- RegisterMCAsmInfoFn X(TheAArch64leTarget, createAArch64MCAsmInfo);
- RegisterMCAsmInfoFn Y(TheAArch64beTarget, createAArch64MCAsmInfo);
- RegisterMCAsmInfoFn Z(TheARM64Target, createAArch64MCAsmInfo);
-
- // Register the MC codegen info.
- TargetRegistry::RegisterMCCodeGenInfo(TheAArch64leTarget,
- createAArch64MCCodeGenInfo);
- TargetRegistry::RegisterMCCodeGenInfo(TheAArch64beTarget,
- createAArch64MCCodeGenInfo);
- TargetRegistry::RegisterMCCodeGenInfo(TheARM64Target,
- createAArch64MCCodeGenInfo);
-
- // Register the MC instruction info.
- TargetRegistry::RegisterMCInstrInfo(TheAArch64leTarget,
- createAArch64MCInstrInfo);
- TargetRegistry::RegisterMCInstrInfo(TheAArch64beTarget,
- createAArch64MCInstrInfo);
- TargetRegistry::RegisterMCInstrInfo(TheARM64Target,
- createAArch64MCInstrInfo);
-
- // Register the MC register info.
- TargetRegistry::RegisterMCRegInfo(TheAArch64leTarget,
- createAArch64MCRegisterInfo);
- TargetRegistry::RegisterMCRegInfo(TheAArch64beTarget,
- createAArch64MCRegisterInfo);
- TargetRegistry::RegisterMCRegInfo(TheARM64Target,
- createAArch64MCRegisterInfo);
-
- // Register the MC subtarget info.
- TargetRegistry::RegisterMCSubtargetInfo(TheAArch64leTarget,
- createAArch64MCSubtargetInfo);
- TargetRegistry::RegisterMCSubtargetInfo(TheAArch64beTarget,
- createAArch64MCSubtargetInfo);
- TargetRegistry::RegisterMCSubtargetInfo(TheARM64Target,
- createAArch64MCSubtargetInfo);
+ for (Target *T :
+ {&TheAArch64leTarget, &TheAArch64beTarget, &TheARM64Target}) {
+ // Register the MC asm info.
+ RegisterMCAsmInfoFn X(*T, createAArch64MCAsmInfo);
+
+ // Register the MC codegen info.
+ TargetRegistry::RegisterMCCodeGenInfo(*T, createAArch64MCCodeGenInfo);
+
+ // Register the MC instruction info.
+ TargetRegistry::RegisterMCInstrInfo(*T, createAArch64MCInstrInfo);
+
+ // Register the MC register info.
+ TargetRegistry::RegisterMCRegInfo(*T, createAArch64MCRegisterInfo);
+
+ // Register the MC subtarget info.
+ TargetRegistry::RegisterMCSubtargetInfo(*T, createAArch64MCSubtargetInfo);
+
+ // Register the MC Code Emitter
+ TargetRegistry::RegisterMCCodeEmitter(*T, createAArch64MCCodeEmitter);
+
+ // Register the obj streamers.
+ TargetRegistry::RegisterELFStreamer(*T, createELFStreamer);
+ TargetRegistry::RegisterMachOStreamer(*T, createMachOStreamer);
+
+ // Register the obj target streamer.
+ TargetRegistry::RegisterObjectTargetStreamer(
+ *T, createAArch64ObjectTargetStreamer);
+
+ // Register the asm streamer.
+ TargetRegistry::RegisterAsmTargetStreamer(*T,
+ createAArch64AsmTargetStreamer);
+ // Register the MCInstPrinter.
+ TargetRegistry::RegisterMCInstPrinter(*T, createAArch64MCInstPrinter);
+ }
// Register the asm backend.
- TargetRegistry::RegisterMCAsmBackend(TheAArch64leTarget,
- createAArch64leAsmBackend);
+ for (Target *T : {&TheAArch64leTarget, &TheARM64Target})
+ TargetRegistry::RegisterMCAsmBackend(*T, createAArch64leAsmBackend);
TargetRegistry::RegisterMCAsmBackend(TheAArch64beTarget,
createAArch64beAsmBackend);
- TargetRegistry::RegisterMCAsmBackend(TheARM64Target,
- createAArch64leAsmBackend);
-
- // Register the MC Code Emitter
- TargetRegistry::RegisterMCCodeEmitter(TheAArch64leTarget,
- createAArch64MCCodeEmitter);
- TargetRegistry::RegisterMCCodeEmitter(TheAArch64beTarget,
- createAArch64MCCodeEmitter);
- TargetRegistry::RegisterMCCodeEmitter(TheARM64Target,
- createAArch64MCCodeEmitter);
-
- // Register the object streamer.
- TargetRegistry::RegisterMCObjectStreamer(TheAArch64leTarget,
- createMCStreamer);
- TargetRegistry::RegisterMCObjectStreamer(TheAArch64beTarget,
- createMCStreamer);
- TargetRegistry::RegisterMCObjectStreamer(TheARM64Target, createMCStreamer);
-
- // Register the asm streamer.
- TargetRegistry::RegisterAsmStreamer(TheAArch64leTarget,
- createAArch64MCAsmStreamer);
- TargetRegistry::RegisterAsmStreamer(TheAArch64beTarget,
- createAArch64MCAsmStreamer);
- TargetRegistry::RegisterAsmStreamer(TheARM64Target,
- createAArch64MCAsmStreamer);
-
- // Register the MCInstPrinter.
- TargetRegistry::RegisterMCInstPrinter(TheAArch64leTarget,
- createAArch64MCInstPrinter);
- TargetRegistry::RegisterMCInstPrinter(TheAArch64beTarget,
- createAArch64MCInstPrinter);
- TargetRegistry::RegisterMCInstPrinter(TheARM64Target,
- createAArch64MCInstPrinter);
}
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h
index 1553115..4705bdf 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h
@@ -28,18 +28,20 @@ class MCRegisterInfo;
class MCObjectWriter;
class MCStreamer;
class MCSubtargetInfo;
+class MCTargetStreamer;
class StringRef;
class Target;
+class Triple;
class raw_ostream;
+class raw_pwrite_stream;
extern Target TheAArch64leTarget;
extern Target TheAArch64beTarget;
extern Target TheARM64Target;
MCCodeEmitter *createAArch64MCCodeEmitter(const MCInstrInfo &MCII,
- const MCRegisterInfo &MRI,
- const MCSubtargetInfo &STI,
- MCContext &Ctx);
+ const MCRegisterInfo &MRI,
+ MCContext &Ctx);
MCAsmBackend *createAArch64leAsmBackend(const Target &T,
const MCRegisterInfo &MRI, StringRef TT,
StringRef CPU);
@@ -47,17 +49,22 @@ MCAsmBackend *createAArch64beAsmBackend(const Target &T,
const MCRegisterInfo &MRI, StringRef TT,
StringRef CPU);
-MCObjectWriter *createAArch64ELFObjectWriter(raw_ostream &OS, uint8_t OSABI,
+MCObjectWriter *createAArch64ELFObjectWriter(raw_pwrite_stream &OS,
+ uint8_t OSABI,
bool IsLittleEndian);
-MCObjectWriter *createAArch64MachObjectWriter(raw_ostream &OS, uint32_t CPUType,
- uint32_t CPUSubtype);
+MCObjectWriter *createAArch64MachObjectWriter(raw_pwrite_stream &OS,
+ uint32_t CPUType,
+ uint32_t CPUSubtype);
+
+MCTargetStreamer *createAArch64AsmTargetStreamer(MCStreamer &S,
+ formatted_raw_ostream &OS,
+ MCInstPrinter *InstPrint,
+ bool isVerboseAsm);
+
+MCTargetStreamer *createAArch64ObjectTargetStreamer(MCStreamer &S,
+ const MCSubtargetInfo &STI);
-MCStreamer *
-createAArch64MCAsmStreamer(MCContext &Ctx, formatted_raw_ostream &OS,
- bool isVerboseAsm, bool useDwarfDirectory,
- MCInstPrinter *InstPrint, MCCodeEmitter *CE,
- MCAsmBackend *TAB, bool ShowInst);
} // End llvm namespace
// Defines symbolic names for AArch64 registers. This defines a mapping from
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp
index e12a24b..d425975 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp
@@ -10,6 +10,7 @@
#include "MCTargetDesc/AArch64FixupKinds.h"
#include "MCTargetDesc/AArch64MCTargetDesc.h"
#include "llvm/ADT/Twine.h"
+#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCAsmLayout.h"
#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCContext.h"
@@ -33,7 +34,7 @@ public:
: MCMachObjectTargetWriter(true /* is64Bit */, CPUType, CPUSubtype,
/*UseAggressiveSymbolFolding=*/true) {}
- void RecordRelocation(MachObjectWriter *Writer, const MCAssembler &Asm,
+ void RecordRelocation(MachObjectWriter *Writer, MCAssembler &Asm,
const MCAsmLayout &Layout, const MCFragment *Fragment,
const MCFixup &Fixup, MCValue Target,
uint64_t &FixedValue) override;
@@ -91,7 +92,7 @@ bool AArch64MachObjectWriter::getAArch64FixupKindMachOInfo(
// This encompasses the relocation for the whole 21-bit value.
switch (Sym->getKind()) {
default:
- Asm.getContext().FatalError(Fixup.getLoc(),
+ Asm.getContext().reportFatalError(Fixup.getLoc(),
"ADR/ADRP relocations must be GOT relative");
case MCSymbolRefExpr::VK_PAGE:
RelocType = unsigned(MachO::ARM64_RELOC_PAGE21);
@@ -112,8 +113,35 @@ bool AArch64MachObjectWriter::getAArch64FixupKindMachOInfo(
}
}
+static bool canUseLocalRelocation(const MCSectionMachO &Section,
+ const MCSymbol &Symbol, unsigned Log2Size) {
+ // Debug info sections can use local relocations.
+ if (Section.hasAttribute(MachO::S_ATTR_DEBUG))
+ return true;
+
+ // Otherwise, only pointer sized relocations are supported.
+ if (Log2Size != 3)
+ return false;
+
+ // But only if they don't point to a few forbidden sections.
+ if (!Symbol.isInSection())
+ return true;
+ const MCSectionMachO &RefSec = cast<MCSectionMachO>(Symbol.getSection());
+ if (RefSec.getType() == MachO::S_CSTRING_LITERALS)
+ return false;
+
+ if (RefSec.getSegmentName() == "__DATA" &&
+ RefSec.getSectionName() == "__objc_classrefs")
+ return false;
+
+ // FIXME: ld64 currently handles internal pointer-sized relocations
+ // incorrectly (applying the addend twice). We should be able to return true
+ // unconditionally by this point when that's fixed.
+ return false;
+}
+
void AArch64MachObjectWriter::RecordRelocation(
- MachObjectWriter *Writer, const MCAssembler &Asm, const MCAsmLayout &Layout,
+ MachObjectWriter *Writer, MCAssembler &Asm, const MCAsmLayout &Layout,
const MCFragment *Fragment, const MCFixup &Fixup, MCValue Target,
uint64_t &FixedValue) {
unsigned IsPCRel = Writer->isFixupKindPCRel(Asm, Fixup.getKind());
@@ -123,9 +151,9 @@ void AArch64MachObjectWriter::RecordRelocation(
unsigned Log2Size = 0;
int64_t Value = 0;
unsigned Index = 0;
- unsigned IsExtern = 0;
unsigned Type = 0;
unsigned Kind = Fixup.getKind();
+ const MCSymbol *RelSymbol = nullptr;
FixupOffset += Fixup.getOffset();
@@ -143,7 +171,7 @@ void AArch64MachObjectWriter::RecordRelocation(
// assembler local symbols. If we got here, that's not what we have,
// so complain loudly.
if (Kind == AArch64::fixup_aarch64_pcrel_branch19) {
- Asm.getContext().FatalError(Fixup.getLoc(),
+ Asm.getContext().reportFatalError(Fixup.getLoc(),
"conditional branch requires assembler-local"
" label. '" +
Target.getSymA()->getSymbol().getName() +
@@ -154,14 +182,14 @@ void AArch64MachObjectWriter::RecordRelocation(
// 14-bit branch relocations should only target internal labels, and so
// should never get here.
if (Kind == AArch64::fixup_aarch64_pcrel_branch14) {
- Asm.getContext().FatalError(Fixup.getLoc(),
+ Asm.getContext().reportFatalError(Fixup.getLoc(),
"Invalid relocation on conditional branch!");
return;
}
if (!getAArch64FixupKindMachOInfo(Fixup, Type, Target.getSymA(), Log2Size,
Asm)) {
- Asm.getContext().FatalError(Fixup.getLoc(), "unknown AArch64 fixup kind!");
+ Asm.getContext().reportFatalError(Fixup.getLoc(), "unknown AArch64 fixup kind!");
return;
}
@@ -171,11 +199,9 @@ void AArch64MachObjectWriter::RecordRelocation(
// FIXME: Should this always be extern?
// SymbolNum of 0 indicates the absolute section.
Type = MachO::ARM64_RELOC_UNSIGNED;
- Index = 0;
if (IsPCRel) {
- IsExtern = 1;
- Asm.getContext().FatalError(Fixup.getLoc(),
+ Asm.getContext().reportFatalError(Fixup.getLoc(),
"PC relative absolute relocation!");
// FIXME: x86_64 sets the type to a branch reloc here. Should we do
@@ -184,39 +210,36 @@ void AArch64MachObjectWriter::RecordRelocation(
} else if (Target.getSymB()) { // A - B + constant
const MCSymbol *A = &Target.getSymA()->getSymbol();
const MCSymbolData &A_SD = Asm.getSymbolData(*A);
- const MCSymbolData *A_Base = Asm.getAtom(&A_SD);
+ const MCSymbol *A_Base = Asm.getAtom(*A);
const MCSymbol *B = &Target.getSymB()->getSymbol();
const MCSymbolData &B_SD = Asm.getSymbolData(*B);
- const MCSymbolData *B_Base = Asm.getAtom(&B_SD);
+ const MCSymbol *B_Base = Asm.getAtom(*B);
// Check for "_foo@got - .", which comes through here as:
// Ltmp0:
// ... _foo@got - Ltmp0
if (Target.getSymA()->getKind() == MCSymbolRefExpr::VK_GOT &&
Target.getSymB()->getKind() == MCSymbolRefExpr::VK_None &&
- Layout.getSymbolOffset(&B_SD) ==
+ Layout.getSymbolOffset(*B) ==
Layout.getFragmentOffset(Fragment) + Fixup.getOffset()) {
// SymB is the PC, so use a PC-rel pointer-to-GOT relocation.
- Index = A_Base->getIndex();
- IsExtern = 1;
Type = MachO::ARM64_RELOC_POINTER_TO_GOT;
IsPCRel = 1;
MachO::any_relocation_info MRE;
MRE.r_word0 = FixupOffset;
- MRE.r_word1 = ((Index << 0) | (IsPCRel << 24) | (Log2Size << 25) |
- (IsExtern << 27) | (Type << 28));
- Writer->addRelocation(Fragment->getParent(), MRE);
+ MRE.r_word1 = (IsPCRel << 24) | (Log2Size << 25) | (Type << 28);
+ Writer->addRelocation(A_Base, Fragment->getParent(), MRE);
return;
} else if (Target.getSymA()->getKind() != MCSymbolRefExpr::VK_None ||
Target.getSymB()->getKind() != MCSymbolRefExpr::VK_None)
// Otherwise, neither symbol can be modified.
- Asm.getContext().FatalError(Fixup.getLoc(),
+ Asm.getContext().reportFatalError(Fixup.getLoc(),
"unsupported relocation of modified symbol");
// We don't support PCrel relocations of differences.
if (IsPCRel)
- Asm.getContext().FatalError(Fixup.getLoc(),
+ Asm.getContext().reportFatalError(Fixup.getLoc(),
"unsupported pc-relative relocation of "
"difference");
@@ -227,50 +250,52 @@ void AArch64MachObjectWriter::RecordRelocation(
// FIXME: We should probably just synthesize an external symbol and use
// that.
if (!A_Base)
- Asm.getContext().FatalError(
+ Asm.getContext().reportFatalError(
Fixup.getLoc(),
"unsupported relocation of local symbol '" + A->getName() +
"'. Must have non-local symbol earlier in section.");
if (!B_Base)
- Asm.getContext().FatalError(
+ Asm.getContext().reportFatalError(
Fixup.getLoc(),
"unsupported relocation of local symbol '" + B->getName() +
"'. Must have non-local symbol earlier in section.");
if (A_Base == B_Base && A_Base)
- Asm.getContext().FatalError(Fixup.getLoc(),
+ Asm.getContext().reportFatalError(Fixup.getLoc(),
"unsupported relocation with identical base");
- Value += (!A_SD.getFragment() ? 0
- : Writer->getSymbolAddress(&A_SD, Layout)) -
- (!A_Base || !A_Base->getFragment()
+ Value += (!A_SD.getFragment() ? 0 : Writer->getSymbolAddress(*A, Layout)) -
+ (!A_Base || !A_Base->getData().getFragment()
? 0
- : Writer->getSymbolAddress(A_Base, Layout));
- Value -= (!B_SD.getFragment() ? 0
- : Writer->getSymbolAddress(&B_SD, Layout)) -
- (!B_Base || !B_Base->getFragment()
+ : Writer->getSymbolAddress(*A_Base, Layout));
+ Value -= (!B_SD.getFragment() ? 0 : Writer->getSymbolAddress(*B, Layout)) -
+ (!B_Base || !B_Base->getData().getFragment()
? 0
- : Writer->getSymbolAddress(B_Base, Layout));
+ : Writer->getSymbolAddress(*B_Base, Layout));
- Index = A_Base->getIndex();
- IsExtern = 1;
Type = MachO::ARM64_RELOC_UNSIGNED;
MachO::any_relocation_info MRE;
MRE.r_word0 = FixupOffset;
- MRE.r_word1 = ((Index << 0) | (IsPCRel << 24) | (Log2Size << 25) |
- (IsExtern << 27) | (Type << 28));
- Writer->addRelocation(Fragment->getParent(), MRE);
+ MRE.r_word1 = (IsPCRel << 24) | (Log2Size << 25) | (Type << 28);
+ Writer->addRelocation(A_Base, Fragment->getParent(), MRE);
- Index = B_Base->getIndex();
- IsExtern = 1;
+ RelSymbol = B_Base;
Type = MachO::ARM64_RELOC_SUBTRACTOR;
} else { // A + constant
const MCSymbol *Symbol = &Target.getSymA()->getSymbol();
- const MCSymbolData &SD = Asm.getSymbolData(*Symbol);
- const MCSymbolData *Base = Asm.getAtom(&SD);
- const MCSectionMachO &Section = static_cast<const MCSectionMachO &>(
- Fragment->getParent()->getSection());
+ const MCSectionMachO &Section =
+ static_cast<const MCSectionMachO &>(*Fragment->getParent());
+
+ bool CanUseLocalRelocation =
+ canUseLocalRelocation(Section, *Symbol, Log2Size);
+ if (Symbol->isTemporary() && (Value || !CanUseLocalRelocation)) {
+ const MCSection &Sec = Symbol->getSection();
+ if (!Asm.getContext().getAsmInfo()->isSectionAtomizableBySymbols(Sec))
+ Asm.addLocalUsedInReloc(*Symbol);
+ }
+
+ const MCSymbol *Base = Asm.getAtom(*Symbol);
// If the symbol is a variable and we weren't able to get a Base for it
// (i.e., it's not in the symbol table associated with a section) resolve
@@ -279,7 +304,7 @@ void AArch64MachObjectWriter::RecordRelocation(
// If the evaluation is an absolute value, just use that directly
// to keep things easy.
int64_t Res;
- if (SD.getSymbol().getVariableValue()->EvaluateAsAbsolute(
+ if (Symbol->getVariableValue()->EvaluateAsAbsolute(
Res, Layout, Writer->getSectionAddressMap())) {
FixedValue = Res;
return;
@@ -290,7 +315,7 @@ void AArch64MachObjectWriter::RecordRelocation(
// the FixedValue?
if (!Symbol->getVariableValue()->EvaluateAsRelocatable(Target, &Layout,
&Fixup))
- Asm.getContext().FatalError(Fixup.getLoc(),
+ Asm.getContext().reportFatalError(Fixup.getLoc(),
"unable to resolve variable '" +
Symbol->getName() + "'");
return RecordRelocation(Writer, Asm, Layout, Fragment, Fixup, Target,
@@ -310,42 +335,38 @@ void AArch64MachObjectWriter::RecordRelocation(
// sections, and for pointer-sized relocations (.quad), we allow section
// relocations. It's code sections that run into trouble.
if (Base) {
- Index = Base->getIndex();
- IsExtern = 1;
+ RelSymbol = Base;
// Add the local offset, if needed.
- if (Base != &SD)
- Value += Layout.getSymbolOffset(&SD) - Layout.getSymbolOffset(Base);
+ if (Base != Symbol)
+ Value +=
+ Layout.getSymbolOffset(*Symbol) - Layout.getSymbolOffset(*Base);
} else if (Symbol->isInSection()) {
- // Pointer-sized relocations can use a local relocation. Otherwise,
- // we have to be in a debug info section.
- if (!Section.hasAttribute(MachO::S_ATTR_DEBUG) && Log2Size != 3)
- Asm.getContext().FatalError(
+ if (!CanUseLocalRelocation)
+ Asm.getContext().reportFatalError(
Fixup.getLoc(),
"unsupported relocation of local symbol '" + Symbol->getName() +
"'. Must have non-local symbol earlier in section.");
// Adjust the relocation to be section-relative.
// The index is the section ordinal (1-based).
- const MCSectionData &SymSD =
- Asm.getSectionData(SD.getSymbol().getSection());
- Index = SymSD.getOrdinal() + 1;
- IsExtern = 0;
- Value += Writer->getSymbolAddress(&SD, Layout);
+ const MCSection &Sec = Symbol->getSection();
+ Index = Sec.getOrdinal() + 1;
+ Value += Writer->getSymbolAddress(*Symbol, Layout);
if (IsPCRel)
Value -= Writer->getFragmentAddress(Fragment, Layout) +
Fixup.getOffset() + (1ULL << Log2Size);
} else {
// Resolve constant variables.
- if (SD.getSymbol().isVariable()) {
+ if (Symbol->isVariable()) {
int64_t Res;
- if (SD.getSymbol().getVariableValue()->EvaluateAsAbsolute(
+ if (Symbol->getVariableValue()->EvaluateAsAbsolute(
Res, Layout, Writer->getSectionAddressMap())) {
FixedValue = Res;
return;
}
}
- Asm.getContext().FatalError(Fixup.getLoc(),
+ Asm.getContext().reportFatalError(Fixup.getLoc(),
"unsupported relocation of variable '" +
Symbol->getName() + "'");
}
@@ -362,16 +383,16 @@ void AArch64MachObjectWriter::RecordRelocation(
MachO::any_relocation_info MRE;
MRE.r_word0 = FixupOffset;
- MRE.r_word1 = ((Index << 0) | (IsPCRel << 24) | (Log2Size << 25) |
- (IsExtern << 27) | (Type << 28));
- Writer->addRelocation(Fragment->getParent(), MRE);
+ MRE.r_word1 =
+ (Index << 0) | (IsPCRel << 24) | (Log2Size << 25) | (Type << 28);
+ Writer->addRelocation(RelSymbol, Fragment->getParent(), MRE);
// Now set up the Addend relocation.
Type = MachO::ARM64_RELOC_ADDEND;
Index = Value;
+ RelSymbol = nullptr;
IsPCRel = 0;
Log2Size = 2;
- IsExtern = 0;
// Put zero into the instruction itself. The addend is in the relocation.
Value = 0;
@@ -383,14 +404,14 @@ void AArch64MachObjectWriter::RecordRelocation(
// struct relocation_info (8 bytes)
MachO::any_relocation_info MRE;
MRE.r_word0 = FixupOffset;
- MRE.r_word1 = ((Index << 0) | (IsPCRel << 24) | (Log2Size << 25) |
- (IsExtern << 27) | (Type << 28));
- Writer->addRelocation(Fragment->getParent(), MRE);
+ MRE.r_word1 =
+ (Index << 0) | (IsPCRel << 24) | (Log2Size << 25) | (Type << 28);
+ Writer->addRelocation(RelSymbol, Fragment->getParent(), MRE);
}
-MCObjectWriter *llvm::createAArch64MachObjectWriter(raw_ostream &OS,
- uint32_t CPUType,
- uint32_t CPUSubtype) {
+MCObjectWriter *llvm::createAArch64MachObjectWriter(raw_pwrite_stream &OS,
+ uint32_t CPUType,
+ uint32_t CPUSubtype) {
return createMachObjectWriter(
new AArch64MachObjectWriter(CPUType, CPUSubtype), OS,
/*IsLittleEndian=*/true);
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp
index e3112fa..52b000d 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp
@@ -1,4 +1,4 @@
-//===- AArch64TargetStreamer.cpp - AArch64TargetStreamer class --*- C++ -*---------===//
+//===- AArch64TargetStreamer.cpp - AArch64TargetStreamer class ------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -10,12 +10,9 @@
// This file implements the AArch64TargetStreamer class.
//
//===----------------------------------------------------------------------===//
-#include "llvm/ADT/MapVector.h"
-#include "llvm/MC/ConstantPools.h"
-#include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCExpr.h"
-#include "llvm/MC/MCStreamer.h"
+#include "AArch64TargetStreamer.h"
+#include "llvm/MC/ConstantPools.h"
using namespace llvm;
//
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.h b/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.h
new file mode 100644
index 0000000..fcc0d05
--- /dev/null
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.h
@@ -0,0 +1,42 @@
+//===-- AArch64TargetStreamer.h - AArch64 Target Streamer ------*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_AARCH64_MCTARGETDESC_AARCH64TARGETSTREAMER_H
+#define LLVM_LIB_TARGET_AARCH64_MCTARGETDESC_AARCH64TARGETSTREAMER_H
+
+#include "llvm/MC/MCStreamer.h"
+
+namespace llvm {
+
+class AArch64TargetStreamer : public MCTargetStreamer {
+public:
+ AArch64TargetStreamer(MCStreamer &S);
+ ~AArch64TargetStreamer() override;
+
+ void finish() override;
+
+ /// Callback used to implement the ldr= pseudo.
+ /// Add a new entry to the constant pool for the current section and return an
+ /// MCExpr that can be used to refer to the constant pool location.
+ const MCExpr *addConstantPoolEntry(const MCExpr *, unsigned Size);
+
+ /// Callback used to implemnt the .ltorg directive.
+ /// Emit contents of constant pool for the current section.
+ void emitCurrentConstantPool();
+
+ /// Callback used to implement the .inst directive.
+ virtual void emitInst(uint32_t Inst);
+
+private:
+ std::unique_ptr<AssemblerConstantPools> ConstantPools;
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp b/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp
index bc6c7a9..28b8e7e 100644
--- a/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp
+++ b/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp
@@ -18,11 +18,12 @@
using namespace llvm;
-StringRef AArch64NamedImmMapper::toString(uint32_t Value, bool &Valid) const {
- for (unsigned i = 0; i < NumPairs; ++i) {
- if (Pairs[i].Value == Value) {
+StringRef AArch64NamedImmMapper::toString(uint32_t Value,
+ const FeatureBitset& FeatureBits, bool &Valid) const {
+ for (unsigned i = 0; i < NumMappings; ++i) {
+ if (Mappings[i].isValueEqual(Value, FeatureBits)) {
Valid = true;
- return Pairs[i].Name;
+ return Mappings[i].Name;
}
}
@@ -30,12 +31,13 @@ StringRef AArch64NamedImmMapper::toString(uint32_t Value, bool &Valid) const {
return StringRef();
}
-uint32_t AArch64NamedImmMapper::fromString(StringRef Name, bool &Valid) const {
+uint32_t AArch64NamedImmMapper::fromString(StringRef Name,
+ const FeatureBitset& FeatureBits, bool &Valid) const {
std::string LowerCaseName = Name.lower();
- for (unsigned i = 0; i < NumPairs; ++i) {
- if (Pairs[i].Name == LowerCaseName) {
+ for (unsigned i = 0; i < NumMappings; ++i) {
+ if (Mappings[i].isNameEqual(LowerCaseName, FeatureBits)) {
Valid = true;
- return Pairs[i].Value;
+ return Mappings[i].Value;
}
}
@@ -47,747 +49,781 @@ bool AArch64NamedImmMapper::validImm(uint32_t Value) const {
return Value < TooBigImm;
}
-const AArch64NamedImmMapper::Mapping AArch64AT::ATMapper::ATPairs[] = {
- {"s1e1r", S1E1R},
- {"s1e2r", S1E2R},
- {"s1e3r", S1E3R},
- {"s1e1w", S1E1W},
- {"s1e2w", S1E2W},
- {"s1e3w", S1E3W},
- {"s1e0r", S1E0R},
- {"s1e0w", S1E0W},
- {"s12e1r", S12E1R},
- {"s12e1w", S12E1W},
- {"s12e0r", S12E0R},
- {"s12e0w", S12E0W},
+const AArch64NamedImmMapper::Mapping AArch64AT::ATMapper::ATMappings[] = {
+ {"s1e1r", S1E1R, {}},
+ {"s1e2r", S1E2R, {}},
+ {"s1e3r", S1E3R, {}},
+ {"s1e1w", S1E1W, {}},
+ {"s1e2w", S1E2W, {}},
+ {"s1e3w", S1E3W, {}},
+ {"s1e0r", S1E0R, {}},
+ {"s1e0w", S1E0W, {}},
+ {"s12e1r", S12E1R, {}},
+ {"s12e1w", S12E1W, {}},
+ {"s12e0r", S12E0R, {}},
+ {"s12e0w", S12E0W, {}},
};
AArch64AT::ATMapper::ATMapper()
- : AArch64NamedImmMapper(ATPairs, 0) {}
-
-const AArch64NamedImmMapper::Mapping AArch64DB::DBarrierMapper::DBarrierPairs[] = {
- {"oshld", OSHLD},
- {"oshst", OSHST},
- {"osh", OSH},
- {"nshld", NSHLD},
- {"nshst", NSHST},
- {"nsh", NSH},
- {"ishld", ISHLD},
- {"ishst", ISHST},
- {"ish", ISH},
- {"ld", LD},
- {"st", ST},
- {"sy", SY}
+ : AArch64NamedImmMapper(ATMappings, 0) {}
+
+const AArch64NamedImmMapper::Mapping AArch64DB::DBarrierMapper::DBarrierMappings[] = {
+ {"oshld", OSHLD, {}},
+ {"oshst", OSHST, {}},
+ {"osh", OSH, {}},
+ {"nshld", NSHLD, {}},
+ {"nshst", NSHST, {}},
+ {"nsh", NSH, {}},
+ {"ishld", ISHLD, {}},
+ {"ishst", ISHST, {}},
+ {"ish", ISH, {}},
+ {"ld", LD, {}},
+ {"st", ST, {}},
+ {"sy", SY, {}}
};
AArch64DB::DBarrierMapper::DBarrierMapper()
- : AArch64NamedImmMapper(DBarrierPairs, 16u) {}
-
-const AArch64NamedImmMapper::Mapping AArch64DC::DCMapper::DCPairs[] = {
- {"zva", ZVA},
- {"ivac", IVAC},
- {"isw", ISW},
- {"cvac", CVAC},
- {"csw", CSW},
- {"cvau", CVAU},
- {"civac", CIVAC},
- {"cisw", CISW}
+ : AArch64NamedImmMapper(DBarrierMappings, 16u) {}
+
+const AArch64NamedImmMapper::Mapping AArch64DC::DCMapper::DCMappings[] = {
+ {"zva", ZVA, {}},
+ {"ivac", IVAC, {}},
+ {"isw", ISW, {}},
+ {"cvac", CVAC, {}},
+ {"csw", CSW, {}},
+ {"cvau", CVAU, {}},
+ {"civac", CIVAC, {}},
+ {"cisw", CISW, {}}
};
AArch64DC::DCMapper::DCMapper()
- : AArch64NamedImmMapper(DCPairs, 0) {}
+ : AArch64NamedImmMapper(DCMappings, 0) {}
-const AArch64NamedImmMapper::Mapping AArch64IC::ICMapper::ICPairs[] = {
- {"ialluis", IALLUIS},
- {"iallu", IALLU},
- {"ivau", IVAU}
+const AArch64NamedImmMapper::Mapping AArch64IC::ICMapper::ICMappings[] = {
+ {"ialluis", IALLUIS, {}},
+ {"iallu", IALLU, {}},
+ {"ivau", IVAU, {}}
};
AArch64IC::ICMapper::ICMapper()
- : AArch64NamedImmMapper(ICPairs, 0) {}
+ : AArch64NamedImmMapper(ICMappings, 0) {}
-const AArch64NamedImmMapper::Mapping AArch64ISB::ISBMapper::ISBPairs[] = {
- {"sy", SY},
+const AArch64NamedImmMapper::Mapping AArch64ISB::ISBMapper::ISBMappings[] = {
+ {"sy", SY, {}},
};
AArch64ISB::ISBMapper::ISBMapper()
- : AArch64NamedImmMapper(ISBPairs, 16) {}
-
-const AArch64NamedImmMapper::Mapping AArch64PRFM::PRFMMapper::PRFMPairs[] = {
- {"pldl1keep", PLDL1KEEP},
- {"pldl1strm", PLDL1STRM},
- {"pldl2keep", PLDL2KEEP},
- {"pldl2strm", PLDL2STRM},
- {"pldl3keep", PLDL3KEEP},
- {"pldl3strm", PLDL3STRM},
- {"plil1keep", PLIL1KEEP},
- {"plil1strm", PLIL1STRM},
- {"plil2keep", PLIL2KEEP},
- {"plil2strm", PLIL2STRM},
- {"plil3keep", PLIL3KEEP},
- {"plil3strm", PLIL3STRM},
- {"pstl1keep", PSTL1KEEP},
- {"pstl1strm", PSTL1STRM},
- {"pstl2keep", PSTL2KEEP},
- {"pstl2strm", PSTL2STRM},
- {"pstl3keep", PSTL3KEEP},
- {"pstl3strm", PSTL3STRM}
+ : AArch64NamedImmMapper(ISBMappings, 16) {}
+
+const AArch64NamedImmMapper::Mapping AArch64PRFM::PRFMMapper::PRFMMappings[] = {
+ {"pldl1keep", PLDL1KEEP, {}},
+ {"pldl1strm", PLDL1STRM, {}},
+ {"pldl2keep", PLDL2KEEP, {}},
+ {"pldl2strm", PLDL2STRM, {}},
+ {"pldl3keep", PLDL3KEEP, {}},
+ {"pldl3strm", PLDL3STRM, {}},
+ {"plil1keep", PLIL1KEEP, {}},
+ {"plil1strm", PLIL1STRM, {}},
+ {"plil2keep", PLIL2KEEP, {}},
+ {"plil2strm", PLIL2STRM, {}},
+ {"plil3keep", PLIL3KEEP, {}},
+ {"plil3strm", PLIL3STRM, {}},
+ {"pstl1keep", PSTL1KEEP, {}},
+ {"pstl1strm", PSTL1STRM, {}},
+ {"pstl2keep", PSTL2KEEP, {}},
+ {"pstl2strm", PSTL2STRM, {}},
+ {"pstl3keep", PSTL3KEEP, {}},
+ {"pstl3strm", PSTL3STRM, {}}
};
AArch64PRFM::PRFMMapper::PRFMMapper()
- : AArch64NamedImmMapper(PRFMPairs, 32) {}
+ : AArch64NamedImmMapper(PRFMMappings, 32) {}
-const AArch64NamedImmMapper::Mapping AArch64PState::PStateMapper::PStatePairs[] = {
- {"spsel", SPSel},
- {"daifset", DAIFSet},
- {"daifclr", DAIFClr}
+const AArch64NamedImmMapper::Mapping AArch64PState::PStateMapper::PStateMappings[] = {
+ {"spsel", SPSel, {}},
+ {"daifset", DAIFSet, {}},
+ {"daifclr", DAIFClr, {}},
+
+ // v8.1a "Privileged Access Never" extension-specific PStates
+ {"pan", PAN, {AArch64::HasV8_1aOps}},
};
AArch64PState::PStateMapper::PStateMapper()
- : AArch64NamedImmMapper(PStatePairs, 0) {}
-
-const AArch64NamedImmMapper::Mapping AArch64SysReg::MRSMapper::MRSPairs[] = {
- {"mdccsr_el0", MDCCSR_EL0},
- {"dbgdtrrx_el0", DBGDTRRX_EL0},
- {"mdrar_el1", MDRAR_EL1},
- {"oslsr_el1", OSLSR_EL1},
- {"dbgauthstatus_el1", DBGAUTHSTATUS_EL1},
- {"pmceid0_el0", PMCEID0_EL0},
- {"pmceid1_el0", PMCEID1_EL0},
- {"midr_el1", MIDR_EL1},
- {"ccsidr_el1", CCSIDR_EL1},
- {"clidr_el1", CLIDR_EL1},
- {"ctr_el0", CTR_EL0},
- {"mpidr_el1", MPIDR_EL1},
- {"revidr_el1", REVIDR_EL1},
- {"aidr_el1", AIDR_EL1},
- {"dczid_el0", DCZID_EL0},
- {"id_pfr0_el1", ID_PFR0_EL1},
- {"id_pfr1_el1", ID_PFR1_EL1},
- {"id_dfr0_el1", ID_DFR0_EL1},
- {"id_afr0_el1", ID_AFR0_EL1},
- {"id_mmfr0_el1", ID_MMFR0_EL1},
- {"id_mmfr1_el1", ID_MMFR1_EL1},
- {"id_mmfr2_el1", ID_MMFR2_EL1},
- {"id_mmfr3_el1", ID_MMFR3_EL1},
- {"id_isar0_el1", ID_ISAR0_EL1},
- {"id_isar1_el1", ID_ISAR1_EL1},
- {"id_isar2_el1", ID_ISAR2_EL1},
- {"id_isar3_el1", ID_ISAR3_EL1},
- {"id_isar4_el1", ID_ISAR4_EL1},
- {"id_isar5_el1", ID_ISAR5_EL1},
- {"id_aa64pfr0_el1", ID_A64PFR0_EL1},
- {"id_aa64pfr1_el1", ID_A64PFR1_EL1},
- {"id_aa64dfr0_el1", ID_A64DFR0_EL1},
- {"id_aa64dfr1_el1", ID_A64DFR1_EL1},
- {"id_aa64afr0_el1", ID_A64AFR0_EL1},
- {"id_aa64afr1_el1", ID_A64AFR1_EL1},
- {"id_aa64isar0_el1", ID_A64ISAR0_EL1},
- {"id_aa64isar1_el1", ID_A64ISAR1_EL1},
- {"id_aa64mmfr0_el1", ID_A64MMFR0_EL1},
- {"id_aa64mmfr1_el1", ID_A64MMFR1_EL1},
- {"mvfr0_el1", MVFR0_EL1},
- {"mvfr1_el1", MVFR1_EL1},
- {"mvfr2_el1", MVFR2_EL1},
- {"rvbar_el1", RVBAR_EL1},
- {"rvbar_el2", RVBAR_EL2},
- {"rvbar_el3", RVBAR_EL3},
- {"isr_el1", ISR_EL1},
- {"cntpct_el0", CNTPCT_EL0},
- {"cntvct_el0", CNTVCT_EL0},
+ : AArch64NamedImmMapper(PStateMappings, 0) {}
+
+const AArch64NamedImmMapper::Mapping AArch64SysReg::MRSMapper::MRSMappings[] = {
+ {"mdccsr_el0", MDCCSR_EL0, {}},
+ {"dbgdtrrx_el0", DBGDTRRX_EL0, {}},
+ {"mdrar_el1", MDRAR_EL1, {}},
+ {"oslsr_el1", OSLSR_EL1, {}},
+ {"dbgauthstatus_el1", DBGAUTHSTATUS_EL1, {}},
+ {"pmceid0_el0", PMCEID0_EL0, {}},
+ {"pmceid1_el0", PMCEID1_EL0, {}},
+ {"midr_el1", MIDR_EL1, {}},
+ {"ccsidr_el1", CCSIDR_EL1, {}},
+ {"clidr_el1", CLIDR_EL1, {}},
+ {"ctr_el0", CTR_EL0, {}},
+ {"mpidr_el1", MPIDR_EL1, {}},
+ {"revidr_el1", REVIDR_EL1, {}},
+ {"aidr_el1", AIDR_EL1, {}},
+ {"dczid_el0", DCZID_EL0, {}},
+ {"id_pfr0_el1", ID_PFR0_EL1, {}},
+ {"id_pfr1_el1", ID_PFR1_EL1, {}},
+ {"id_dfr0_el1", ID_DFR0_EL1, {}},
+ {"id_afr0_el1", ID_AFR0_EL1, {}},
+ {"id_mmfr0_el1", ID_MMFR0_EL1, {}},
+ {"id_mmfr1_el1", ID_MMFR1_EL1, {}},
+ {"id_mmfr2_el1", ID_MMFR2_EL1, {}},
+ {"id_mmfr3_el1", ID_MMFR3_EL1, {}},
+ {"id_isar0_el1", ID_ISAR0_EL1, {}},
+ {"id_isar1_el1", ID_ISAR1_EL1, {}},
+ {"id_isar2_el1", ID_ISAR2_EL1, {}},
+ {"id_isar3_el1", ID_ISAR3_EL1, {}},
+ {"id_isar4_el1", ID_ISAR4_EL1, {}},
+ {"id_isar5_el1", ID_ISAR5_EL1, {}},
+ {"id_aa64pfr0_el1", ID_A64PFR0_EL1, {}},
+ {"id_aa64pfr1_el1", ID_A64PFR1_EL1, {}},
+ {"id_aa64dfr0_el1", ID_A64DFR0_EL1, {}},
+ {"id_aa64dfr1_el1", ID_A64DFR1_EL1, {}},
+ {"id_aa64afr0_el1", ID_A64AFR0_EL1, {}},
+ {"id_aa64afr1_el1", ID_A64AFR1_EL1, {}},
+ {"id_aa64isar0_el1", ID_A64ISAR0_EL1, {}},
+ {"id_aa64isar1_el1", ID_A64ISAR1_EL1, {}},
+ {"id_aa64mmfr0_el1", ID_A64MMFR0_EL1, {}},
+ {"id_aa64mmfr1_el1", ID_A64MMFR1_EL1, {}},
+ {"mvfr0_el1", MVFR0_EL1, {}},
+ {"mvfr1_el1", MVFR1_EL1, {}},
+ {"mvfr2_el1", MVFR2_EL1, {}},
+ {"rvbar_el1", RVBAR_EL1, {}},
+ {"rvbar_el2", RVBAR_EL2, {}},
+ {"rvbar_el3", RVBAR_EL3, {}},
+ {"isr_el1", ISR_EL1, {}},
+ {"cntpct_el0", CNTPCT_EL0, {}},
+ {"cntvct_el0", CNTVCT_EL0, {}},
// Trace registers
- {"trcstatr", TRCSTATR},
- {"trcidr8", TRCIDR8},
- {"trcidr9", TRCIDR9},
- {"trcidr10", TRCIDR10},
- {"trcidr11", TRCIDR11},
- {"trcidr12", TRCIDR12},
- {"trcidr13", TRCIDR13},
- {"trcidr0", TRCIDR0},
- {"trcidr1", TRCIDR1},
- {"trcidr2", TRCIDR2},
- {"trcidr3", TRCIDR3},
- {"trcidr4", TRCIDR4},
- {"trcidr5", TRCIDR5},
- {"trcidr6", TRCIDR6},
- {"trcidr7", TRCIDR7},
- {"trcoslsr", TRCOSLSR},
- {"trcpdsr", TRCPDSR},
- {"trcdevaff0", TRCDEVAFF0},
- {"trcdevaff1", TRCDEVAFF1},
- {"trclsr", TRCLSR},
- {"trcauthstatus", TRCAUTHSTATUS},
- {"trcdevarch", TRCDEVARCH},
- {"trcdevid", TRCDEVID},
- {"trcdevtype", TRCDEVTYPE},
- {"trcpidr4", TRCPIDR4},
- {"trcpidr5", TRCPIDR5},
- {"trcpidr6", TRCPIDR6},
- {"trcpidr7", TRCPIDR7},
- {"trcpidr0", TRCPIDR0},
- {"trcpidr1", TRCPIDR1},
- {"trcpidr2", TRCPIDR2},
- {"trcpidr3", TRCPIDR3},
- {"trccidr0", TRCCIDR0},
- {"trccidr1", TRCCIDR1},
- {"trccidr2", TRCCIDR2},
- {"trccidr3", TRCCIDR3},
+ {"trcstatr", TRCSTATR, {}},
+ {"trcidr8", TRCIDR8, {}},
+ {"trcidr9", TRCIDR9, {}},
+ {"trcidr10", TRCIDR10, {}},
+ {"trcidr11", TRCIDR11, {}},
+ {"trcidr12", TRCIDR12, {}},
+ {"trcidr13", TRCIDR13, {}},
+ {"trcidr0", TRCIDR0, {}},
+ {"trcidr1", TRCIDR1, {}},
+ {"trcidr2", TRCIDR2, {}},
+ {"trcidr3", TRCIDR3, {}},
+ {"trcidr4", TRCIDR4, {}},
+ {"trcidr5", TRCIDR5, {}},
+ {"trcidr6", TRCIDR6, {}},
+ {"trcidr7", TRCIDR7, {}},
+ {"trcoslsr", TRCOSLSR, {}},
+ {"trcpdsr", TRCPDSR, {}},
+ {"trcdevaff0", TRCDEVAFF0, {}},
+ {"trcdevaff1", TRCDEVAFF1, {}},
+ {"trclsr", TRCLSR, {}},
+ {"trcauthstatus", TRCAUTHSTATUS, {}},
+ {"trcdevarch", TRCDEVARCH, {}},
+ {"trcdevid", TRCDEVID, {}},
+ {"trcdevtype", TRCDEVTYPE, {}},
+ {"trcpidr4", TRCPIDR4, {}},
+ {"trcpidr5", TRCPIDR5, {}},
+ {"trcpidr6", TRCPIDR6, {}},
+ {"trcpidr7", TRCPIDR7, {}},
+ {"trcpidr0", TRCPIDR0, {}},
+ {"trcpidr1", TRCPIDR1, {}},
+ {"trcpidr2", TRCPIDR2, {}},
+ {"trcpidr3", TRCPIDR3, {}},
+ {"trccidr0", TRCCIDR0, {}},
+ {"trccidr1", TRCCIDR1, {}},
+ {"trccidr2", TRCCIDR2, {}},
+ {"trccidr3", TRCCIDR3, {}},
// GICv3 registers
- {"icc_iar1_el1", ICC_IAR1_EL1},
- {"icc_iar0_el1", ICC_IAR0_EL1},
- {"icc_hppir1_el1", ICC_HPPIR1_EL1},
- {"icc_hppir0_el1", ICC_HPPIR0_EL1},
- {"icc_rpr_el1", ICC_RPR_EL1},
- {"ich_vtr_el2", ICH_VTR_EL2},
- {"ich_eisr_el2", ICH_EISR_EL2},
- {"ich_elsr_el2", ICH_ELSR_EL2}
+ {"icc_iar1_el1", ICC_IAR1_EL1, {}},
+ {"icc_iar0_el1", ICC_IAR0_EL1, {}},
+ {"icc_hppir1_el1", ICC_HPPIR1_EL1, {}},
+ {"icc_hppir0_el1", ICC_HPPIR0_EL1, {}},
+ {"icc_rpr_el1", ICC_RPR_EL1, {}},
+ {"ich_vtr_el2", ICH_VTR_EL2, {}},
+ {"ich_eisr_el2", ICH_EISR_EL2, {}},
+ {"ich_elsr_el2", ICH_ELSR_EL2, {}},
+
+ // v8.1a "Limited Ordering Regions" extension-specific system registers
+ {"lorid_el1", LORID_EL1, {AArch64::HasV8_1aOps}},
};
-AArch64SysReg::MRSMapper::MRSMapper(uint64_t FeatureBits)
- : SysRegMapper(FeatureBits) {
- InstPairs = &MRSPairs[0];
- NumInstPairs = llvm::array_lengthof(MRSPairs);
+AArch64SysReg::MRSMapper::MRSMapper() {
+ InstMappings = &MRSMappings[0];
+ NumInstMappings = llvm::array_lengthof(MRSMappings);
}
-const AArch64NamedImmMapper::Mapping AArch64SysReg::MSRMapper::MSRPairs[] = {
- {"dbgdtrtx_el0", DBGDTRTX_EL0},
- {"oslar_el1", OSLAR_EL1},
- {"pmswinc_el0", PMSWINC_EL0},
+const AArch64NamedImmMapper::Mapping AArch64SysReg::MSRMapper::MSRMappings[] = {
+ {"dbgdtrtx_el0", DBGDTRTX_EL0, {}},
+ {"oslar_el1", OSLAR_EL1, {}},
+ {"pmswinc_el0", PMSWINC_EL0, {}},
// Trace registers
- {"trcoslar", TRCOSLAR},
- {"trclar", TRCLAR},
+ {"trcoslar", TRCOSLAR, {}},
+ {"trclar", TRCLAR, {}},
// GICv3 registers
- {"icc_eoir1_el1", ICC_EOIR1_EL1},
- {"icc_eoir0_el1", ICC_EOIR0_EL1},
- {"icc_dir_el1", ICC_DIR_EL1},
- {"icc_sgi1r_el1", ICC_SGI1R_EL1},
- {"icc_asgi1r_el1", ICC_ASGI1R_EL1},
- {"icc_sgi0r_el1", ICC_SGI0R_EL1}
+ {"icc_eoir1_el1", ICC_EOIR1_EL1, {}},
+ {"icc_eoir0_el1", ICC_EOIR0_EL1, {}},
+ {"icc_dir_el1", ICC_DIR_EL1, {}},
+ {"icc_sgi1r_el1", ICC_SGI1R_EL1, {}},
+ {"icc_asgi1r_el1", ICC_ASGI1R_EL1, {}},
+ {"icc_sgi0r_el1", ICC_SGI0R_EL1, {}},
+
+ // v8.1a "Privileged Access Never" extension-specific system registers
+ {"pan", PAN, {AArch64::HasV8_1aOps}},
};
-AArch64SysReg::MSRMapper::MSRMapper(uint64_t FeatureBits)
- : SysRegMapper(FeatureBits) {
- InstPairs = &MSRPairs[0];
- NumInstPairs = llvm::array_lengthof(MSRPairs);
+AArch64SysReg::MSRMapper::MSRMapper() {
+ InstMappings = &MSRMappings[0];
+ NumInstMappings = llvm::array_lengthof(MSRMappings);
}
-const AArch64NamedImmMapper::Mapping AArch64SysReg::SysRegMapper::SysRegPairs[] = {
- {"osdtrrx_el1", OSDTRRX_EL1},
- {"osdtrtx_el1", OSDTRTX_EL1},
- {"teecr32_el1", TEECR32_EL1},
- {"mdccint_el1", MDCCINT_EL1},
- {"mdscr_el1", MDSCR_EL1},
- {"dbgdtr_el0", DBGDTR_EL0},
- {"oseccr_el1", OSECCR_EL1},
- {"dbgvcr32_el2", DBGVCR32_EL2},
- {"dbgbvr0_el1", DBGBVR0_EL1},
- {"dbgbvr1_el1", DBGBVR1_EL1},
- {"dbgbvr2_el1", DBGBVR2_EL1},
- {"dbgbvr3_el1", DBGBVR3_EL1},
- {"dbgbvr4_el1", DBGBVR4_EL1},
- {"dbgbvr5_el1", DBGBVR5_EL1},
- {"dbgbvr6_el1", DBGBVR6_EL1},
- {"dbgbvr7_el1", DBGBVR7_EL1},
- {"dbgbvr8_el1", DBGBVR8_EL1},
- {"dbgbvr9_el1", DBGBVR9_EL1},
- {"dbgbvr10_el1", DBGBVR10_EL1},
- {"dbgbvr11_el1", DBGBVR11_EL1},
- {"dbgbvr12_el1", DBGBVR12_EL1},
- {"dbgbvr13_el1", DBGBVR13_EL1},
- {"dbgbvr14_el1", DBGBVR14_EL1},
- {"dbgbvr15_el1", DBGBVR15_EL1},
- {"dbgbcr0_el1", DBGBCR0_EL1},
- {"dbgbcr1_el1", DBGBCR1_EL1},
- {"dbgbcr2_el1", DBGBCR2_EL1},
- {"dbgbcr3_el1", DBGBCR3_EL1},
- {"dbgbcr4_el1", DBGBCR4_EL1},
- {"dbgbcr5_el1", DBGBCR5_EL1},
- {"dbgbcr6_el1", DBGBCR6_EL1},
- {"dbgbcr7_el1", DBGBCR7_EL1},
- {"dbgbcr8_el1", DBGBCR8_EL1},
- {"dbgbcr9_el1", DBGBCR9_EL1},
- {"dbgbcr10_el1", DBGBCR10_EL1},
- {"dbgbcr11_el1", DBGBCR11_EL1},
- {"dbgbcr12_el1", DBGBCR12_EL1},
- {"dbgbcr13_el1", DBGBCR13_EL1},
- {"dbgbcr14_el1", DBGBCR14_EL1},
- {"dbgbcr15_el1", DBGBCR15_EL1},
- {"dbgwvr0_el1", DBGWVR0_EL1},
- {"dbgwvr1_el1", DBGWVR1_EL1},
- {"dbgwvr2_el1", DBGWVR2_EL1},
- {"dbgwvr3_el1", DBGWVR3_EL1},
- {"dbgwvr4_el1", DBGWVR4_EL1},
- {"dbgwvr5_el1", DBGWVR5_EL1},
- {"dbgwvr6_el1", DBGWVR6_EL1},
- {"dbgwvr7_el1", DBGWVR7_EL1},
- {"dbgwvr8_el1", DBGWVR8_EL1},
- {"dbgwvr9_el1", DBGWVR9_EL1},
- {"dbgwvr10_el1", DBGWVR10_EL1},
- {"dbgwvr11_el1", DBGWVR11_EL1},
- {"dbgwvr12_el1", DBGWVR12_EL1},
- {"dbgwvr13_el1", DBGWVR13_EL1},
- {"dbgwvr14_el1", DBGWVR14_EL1},
- {"dbgwvr15_el1", DBGWVR15_EL1},
- {"dbgwcr0_el1", DBGWCR0_EL1},
- {"dbgwcr1_el1", DBGWCR1_EL1},
- {"dbgwcr2_el1", DBGWCR2_EL1},
- {"dbgwcr3_el1", DBGWCR3_EL1},
- {"dbgwcr4_el1", DBGWCR4_EL1},
- {"dbgwcr5_el1", DBGWCR5_EL1},
- {"dbgwcr6_el1", DBGWCR6_EL1},
- {"dbgwcr7_el1", DBGWCR7_EL1},
- {"dbgwcr8_el1", DBGWCR8_EL1},
- {"dbgwcr9_el1", DBGWCR9_EL1},
- {"dbgwcr10_el1", DBGWCR10_EL1},
- {"dbgwcr11_el1", DBGWCR11_EL1},
- {"dbgwcr12_el1", DBGWCR12_EL1},
- {"dbgwcr13_el1", DBGWCR13_EL1},
- {"dbgwcr14_el1", DBGWCR14_EL1},
- {"dbgwcr15_el1", DBGWCR15_EL1},
- {"teehbr32_el1", TEEHBR32_EL1},
- {"osdlr_el1", OSDLR_EL1},
- {"dbgprcr_el1", DBGPRCR_EL1},
- {"dbgclaimset_el1", DBGCLAIMSET_EL1},
- {"dbgclaimclr_el1", DBGCLAIMCLR_EL1},
- {"csselr_el1", CSSELR_EL1},
- {"vpidr_el2", VPIDR_EL2},
- {"vmpidr_el2", VMPIDR_EL2},
- {"sctlr_el1", SCTLR_EL1},
- {"sctlr_el2", SCTLR_EL2},
- {"sctlr_el3", SCTLR_EL3},
- {"actlr_el1", ACTLR_EL1},
- {"actlr_el2", ACTLR_EL2},
- {"actlr_el3", ACTLR_EL3},
- {"cpacr_el1", CPACR_EL1},
- {"hcr_el2", HCR_EL2},
- {"scr_el3", SCR_EL3},
- {"mdcr_el2", MDCR_EL2},
- {"sder32_el3", SDER32_EL3},
- {"cptr_el2", CPTR_EL2},
- {"cptr_el3", CPTR_EL3},
- {"hstr_el2", HSTR_EL2},
- {"hacr_el2", HACR_EL2},
- {"mdcr_el3", MDCR_EL3},
- {"ttbr0_el1", TTBR0_EL1},
- {"ttbr0_el2", TTBR0_EL2},
- {"ttbr0_el3", TTBR0_EL3},
- {"ttbr1_el1", TTBR1_EL1},
- {"tcr_el1", TCR_EL1},
- {"tcr_el2", TCR_EL2},
- {"tcr_el3", TCR_EL3},
- {"vttbr_el2", VTTBR_EL2},
- {"vtcr_el2", VTCR_EL2},
- {"dacr32_el2", DACR32_EL2},
- {"spsr_el1", SPSR_EL1},
- {"spsr_el2", SPSR_EL2},
- {"spsr_el3", SPSR_EL3},
- {"elr_el1", ELR_EL1},
- {"elr_el2", ELR_EL2},
- {"elr_el3", ELR_EL3},
- {"sp_el0", SP_EL0},
- {"sp_el1", SP_EL1},
- {"sp_el2", SP_EL2},
- {"spsel", SPSel},
- {"nzcv", NZCV},
- {"daif", DAIF},
- {"currentel", CurrentEL},
- {"spsr_irq", SPSR_irq},
- {"spsr_abt", SPSR_abt},
- {"spsr_und", SPSR_und},
- {"spsr_fiq", SPSR_fiq},
- {"fpcr", FPCR},
- {"fpsr", FPSR},
- {"dspsr_el0", DSPSR_EL0},
- {"dlr_el0", DLR_EL0},
- {"ifsr32_el2", IFSR32_EL2},
- {"afsr0_el1", AFSR0_EL1},
- {"afsr0_el2", AFSR0_EL2},
- {"afsr0_el3", AFSR0_EL3},
- {"afsr1_el1", AFSR1_EL1},
- {"afsr1_el2", AFSR1_EL2},
- {"afsr1_el3", AFSR1_EL3},
- {"esr_el1", ESR_EL1},
- {"esr_el2", ESR_EL2},
- {"esr_el3", ESR_EL3},
- {"fpexc32_el2", FPEXC32_EL2},
- {"far_el1", FAR_EL1},
- {"far_el2", FAR_EL2},
- {"far_el3", FAR_EL3},
- {"hpfar_el2", HPFAR_EL2},
- {"par_el1", PAR_EL1},
- {"pmcr_el0", PMCR_EL0},
- {"pmcntenset_el0", PMCNTENSET_EL0},
- {"pmcntenclr_el0", PMCNTENCLR_EL0},
- {"pmovsclr_el0", PMOVSCLR_EL0},
- {"pmselr_el0", PMSELR_EL0},
- {"pmccntr_el0", PMCCNTR_EL0},
- {"pmxevtyper_el0", PMXEVTYPER_EL0},
- {"pmxevcntr_el0", PMXEVCNTR_EL0},
- {"pmuserenr_el0", PMUSERENR_EL0},
- {"pmintenset_el1", PMINTENSET_EL1},
- {"pmintenclr_el1", PMINTENCLR_EL1},
- {"pmovsset_el0", PMOVSSET_EL0},
- {"mair_el1", MAIR_EL1},
- {"mair_el2", MAIR_EL2},
- {"mair_el3", MAIR_EL3},
- {"amair_el1", AMAIR_EL1},
- {"amair_el2", AMAIR_EL2},
- {"amair_el3", AMAIR_EL3},
- {"vbar_el1", VBAR_EL1},
- {"vbar_el2", VBAR_EL2},
- {"vbar_el3", VBAR_EL3},
- {"rmr_el1", RMR_EL1},
- {"rmr_el2", RMR_EL2},
- {"rmr_el3", RMR_EL3},
- {"contextidr_el1", CONTEXTIDR_EL1},
- {"tpidr_el0", TPIDR_EL0},
- {"tpidr_el2", TPIDR_EL2},
- {"tpidr_el3", TPIDR_EL3},
- {"tpidrro_el0", TPIDRRO_EL0},
- {"tpidr_el1", TPIDR_EL1},
- {"cntfrq_el0", CNTFRQ_EL0},
- {"cntvoff_el2", CNTVOFF_EL2},
- {"cntkctl_el1", CNTKCTL_EL1},
- {"cnthctl_el2", CNTHCTL_EL2},
- {"cntp_tval_el0", CNTP_TVAL_EL0},
- {"cnthp_tval_el2", CNTHP_TVAL_EL2},
- {"cntps_tval_el1", CNTPS_TVAL_EL1},
- {"cntp_ctl_el0", CNTP_CTL_EL0},
- {"cnthp_ctl_el2", CNTHP_CTL_EL2},
- {"cntps_ctl_el1", CNTPS_CTL_EL1},
- {"cntp_cval_el0", CNTP_CVAL_EL0},
- {"cnthp_cval_el2", CNTHP_CVAL_EL2},
- {"cntps_cval_el1", CNTPS_CVAL_EL1},
- {"cntv_tval_el0", CNTV_TVAL_EL0},
- {"cntv_ctl_el0", CNTV_CTL_EL0},
- {"cntv_cval_el0", CNTV_CVAL_EL0},
- {"pmevcntr0_el0", PMEVCNTR0_EL0},
- {"pmevcntr1_el0", PMEVCNTR1_EL0},
- {"pmevcntr2_el0", PMEVCNTR2_EL0},
- {"pmevcntr3_el0", PMEVCNTR3_EL0},
- {"pmevcntr4_el0", PMEVCNTR4_EL0},
- {"pmevcntr5_el0", PMEVCNTR5_EL0},
- {"pmevcntr6_el0", PMEVCNTR6_EL0},
- {"pmevcntr7_el0", PMEVCNTR7_EL0},
- {"pmevcntr8_el0", PMEVCNTR8_EL0},
- {"pmevcntr9_el0", PMEVCNTR9_EL0},
- {"pmevcntr10_el0", PMEVCNTR10_EL0},
- {"pmevcntr11_el0", PMEVCNTR11_EL0},
- {"pmevcntr12_el0", PMEVCNTR12_EL0},
- {"pmevcntr13_el0", PMEVCNTR13_EL0},
- {"pmevcntr14_el0", PMEVCNTR14_EL0},
- {"pmevcntr15_el0", PMEVCNTR15_EL0},
- {"pmevcntr16_el0", PMEVCNTR16_EL0},
- {"pmevcntr17_el0", PMEVCNTR17_EL0},
- {"pmevcntr18_el0", PMEVCNTR18_EL0},
- {"pmevcntr19_el0", PMEVCNTR19_EL0},
- {"pmevcntr20_el0", PMEVCNTR20_EL0},
- {"pmevcntr21_el0", PMEVCNTR21_EL0},
- {"pmevcntr22_el0", PMEVCNTR22_EL0},
- {"pmevcntr23_el0", PMEVCNTR23_EL0},
- {"pmevcntr24_el0", PMEVCNTR24_EL0},
- {"pmevcntr25_el0", PMEVCNTR25_EL0},
- {"pmevcntr26_el0", PMEVCNTR26_EL0},
- {"pmevcntr27_el0", PMEVCNTR27_EL0},
- {"pmevcntr28_el0", PMEVCNTR28_EL0},
- {"pmevcntr29_el0", PMEVCNTR29_EL0},
- {"pmevcntr30_el0", PMEVCNTR30_EL0},
- {"pmccfiltr_el0", PMCCFILTR_EL0},
- {"pmevtyper0_el0", PMEVTYPER0_EL0},
- {"pmevtyper1_el0", PMEVTYPER1_EL0},
- {"pmevtyper2_el0", PMEVTYPER2_EL0},
- {"pmevtyper3_el0", PMEVTYPER3_EL0},
- {"pmevtyper4_el0", PMEVTYPER4_EL0},
- {"pmevtyper5_el0", PMEVTYPER5_EL0},
- {"pmevtyper6_el0", PMEVTYPER6_EL0},
- {"pmevtyper7_el0", PMEVTYPER7_EL0},
- {"pmevtyper8_el0", PMEVTYPER8_EL0},
- {"pmevtyper9_el0", PMEVTYPER9_EL0},
- {"pmevtyper10_el0", PMEVTYPER10_EL0},
- {"pmevtyper11_el0", PMEVTYPER11_EL0},
- {"pmevtyper12_el0", PMEVTYPER12_EL0},
- {"pmevtyper13_el0", PMEVTYPER13_EL0},
- {"pmevtyper14_el0", PMEVTYPER14_EL0},
- {"pmevtyper15_el0", PMEVTYPER15_EL0},
- {"pmevtyper16_el0", PMEVTYPER16_EL0},
- {"pmevtyper17_el0", PMEVTYPER17_EL0},
- {"pmevtyper18_el0", PMEVTYPER18_EL0},
- {"pmevtyper19_el0", PMEVTYPER19_EL0},
- {"pmevtyper20_el0", PMEVTYPER20_EL0},
- {"pmevtyper21_el0", PMEVTYPER21_EL0},
- {"pmevtyper22_el0", PMEVTYPER22_EL0},
- {"pmevtyper23_el0", PMEVTYPER23_EL0},
- {"pmevtyper24_el0", PMEVTYPER24_EL0},
- {"pmevtyper25_el0", PMEVTYPER25_EL0},
- {"pmevtyper26_el0", PMEVTYPER26_EL0},
- {"pmevtyper27_el0", PMEVTYPER27_EL0},
- {"pmevtyper28_el0", PMEVTYPER28_EL0},
- {"pmevtyper29_el0", PMEVTYPER29_EL0},
- {"pmevtyper30_el0", PMEVTYPER30_EL0},
+const AArch64NamedImmMapper::Mapping AArch64SysReg::SysRegMapper::SysRegMappings[] = {
+ {"osdtrrx_el1", OSDTRRX_EL1, {}},
+ {"osdtrtx_el1", OSDTRTX_EL1, {}},
+ {"teecr32_el1", TEECR32_EL1, {}},
+ {"mdccint_el1", MDCCINT_EL1, {}},
+ {"mdscr_el1", MDSCR_EL1, {}},
+ {"dbgdtr_el0", DBGDTR_EL0, {}},
+ {"oseccr_el1", OSECCR_EL1, {}},
+ {"dbgvcr32_el2", DBGVCR32_EL2, {}},
+ {"dbgbvr0_el1", DBGBVR0_EL1, {}},
+ {"dbgbvr1_el1", DBGBVR1_EL1, {}},
+ {"dbgbvr2_el1", DBGBVR2_EL1, {}},
+ {"dbgbvr3_el1", DBGBVR3_EL1, {}},
+ {"dbgbvr4_el1", DBGBVR4_EL1, {}},
+ {"dbgbvr5_el1", DBGBVR5_EL1, {}},
+ {"dbgbvr6_el1", DBGBVR6_EL1, {}},
+ {"dbgbvr7_el1", DBGBVR7_EL1, {}},
+ {"dbgbvr8_el1", DBGBVR8_EL1, {}},
+ {"dbgbvr9_el1", DBGBVR9_EL1, {}},
+ {"dbgbvr10_el1", DBGBVR10_EL1, {}},
+ {"dbgbvr11_el1", DBGBVR11_EL1, {}},
+ {"dbgbvr12_el1", DBGBVR12_EL1, {}},
+ {"dbgbvr13_el1", DBGBVR13_EL1, {}},
+ {"dbgbvr14_el1", DBGBVR14_EL1, {}},
+ {"dbgbvr15_el1", DBGBVR15_EL1, {}},
+ {"dbgbcr0_el1", DBGBCR0_EL1, {}},
+ {"dbgbcr1_el1", DBGBCR1_EL1, {}},
+ {"dbgbcr2_el1", DBGBCR2_EL1, {}},
+ {"dbgbcr3_el1", DBGBCR3_EL1, {}},
+ {"dbgbcr4_el1", DBGBCR4_EL1, {}},
+ {"dbgbcr5_el1", DBGBCR5_EL1, {}},
+ {"dbgbcr6_el1", DBGBCR6_EL1, {}},
+ {"dbgbcr7_el1", DBGBCR7_EL1, {}},
+ {"dbgbcr8_el1", DBGBCR8_EL1, {}},
+ {"dbgbcr9_el1", DBGBCR9_EL1, {}},
+ {"dbgbcr10_el1", DBGBCR10_EL1, {}},
+ {"dbgbcr11_el1", DBGBCR11_EL1, {}},
+ {"dbgbcr12_el1", DBGBCR12_EL1, {}},
+ {"dbgbcr13_el1", DBGBCR13_EL1, {}},
+ {"dbgbcr14_el1", DBGBCR14_EL1, {}},
+ {"dbgbcr15_el1", DBGBCR15_EL1, {}},
+ {"dbgwvr0_el1", DBGWVR0_EL1, {}},
+ {"dbgwvr1_el1", DBGWVR1_EL1, {}},
+ {"dbgwvr2_el1", DBGWVR2_EL1, {}},
+ {"dbgwvr3_el1", DBGWVR3_EL1, {}},
+ {"dbgwvr4_el1", DBGWVR4_EL1, {}},
+ {"dbgwvr5_el1", DBGWVR5_EL1, {}},
+ {"dbgwvr6_el1", DBGWVR6_EL1, {}},
+ {"dbgwvr7_el1", DBGWVR7_EL1, {}},
+ {"dbgwvr8_el1", DBGWVR8_EL1, {}},
+ {"dbgwvr9_el1", DBGWVR9_EL1, {}},
+ {"dbgwvr10_el1", DBGWVR10_EL1, {}},
+ {"dbgwvr11_el1", DBGWVR11_EL1, {}},
+ {"dbgwvr12_el1", DBGWVR12_EL1, {}},
+ {"dbgwvr13_el1", DBGWVR13_EL1, {}},
+ {"dbgwvr14_el1", DBGWVR14_EL1, {}},
+ {"dbgwvr15_el1", DBGWVR15_EL1, {}},
+ {"dbgwcr0_el1", DBGWCR0_EL1, {}},
+ {"dbgwcr1_el1", DBGWCR1_EL1, {}},
+ {"dbgwcr2_el1", DBGWCR2_EL1, {}},
+ {"dbgwcr3_el1", DBGWCR3_EL1, {}},
+ {"dbgwcr4_el1", DBGWCR4_EL1, {}},
+ {"dbgwcr5_el1", DBGWCR5_EL1, {}},
+ {"dbgwcr6_el1", DBGWCR6_EL1, {}},
+ {"dbgwcr7_el1", DBGWCR7_EL1, {}},
+ {"dbgwcr8_el1", DBGWCR8_EL1, {}},
+ {"dbgwcr9_el1", DBGWCR9_EL1, {}},
+ {"dbgwcr10_el1", DBGWCR10_EL1, {}},
+ {"dbgwcr11_el1", DBGWCR11_EL1, {}},
+ {"dbgwcr12_el1", DBGWCR12_EL1, {}},
+ {"dbgwcr13_el1", DBGWCR13_EL1, {}},
+ {"dbgwcr14_el1", DBGWCR14_EL1, {}},
+ {"dbgwcr15_el1", DBGWCR15_EL1, {}},
+ {"teehbr32_el1", TEEHBR32_EL1, {}},
+ {"osdlr_el1", OSDLR_EL1, {}},
+ {"dbgprcr_el1", DBGPRCR_EL1, {}},
+ {"dbgclaimset_el1", DBGCLAIMSET_EL1, {}},
+ {"dbgclaimclr_el1", DBGCLAIMCLR_EL1, {}},
+ {"csselr_el1", CSSELR_EL1, {}},
+ {"vpidr_el2", VPIDR_EL2, {}},
+ {"vmpidr_el2", VMPIDR_EL2, {}},
+ {"sctlr_el1", SCTLR_EL1, {}},
+ {"sctlr_el2", SCTLR_EL2, {}},
+ {"sctlr_el3", SCTLR_EL3, {}},
+ {"actlr_el1", ACTLR_EL1, {}},
+ {"actlr_el2", ACTLR_EL2, {}},
+ {"actlr_el3", ACTLR_EL3, {}},
+ {"cpacr_el1", CPACR_EL1, {}},
+ {"hcr_el2", HCR_EL2, {}},
+ {"scr_el3", SCR_EL3, {}},
+ {"mdcr_el2", MDCR_EL2, {}},
+ {"sder32_el3", SDER32_EL3, {}},
+ {"cptr_el2", CPTR_EL2, {}},
+ {"cptr_el3", CPTR_EL3, {}},
+ {"hstr_el2", HSTR_EL2, {}},
+ {"hacr_el2", HACR_EL2, {}},
+ {"mdcr_el3", MDCR_EL3, {}},
+ {"ttbr0_el1", TTBR0_EL1, {}},
+ {"ttbr0_el2", TTBR0_EL2, {}},
+ {"ttbr0_el3", TTBR0_EL3, {}},
+ {"ttbr1_el1", TTBR1_EL1, {}},
+ {"tcr_el1", TCR_EL1, {}},
+ {"tcr_el2", TCR_EL2, {}},
+ {"tcr_el3", TCR_EL3, {}},
+ {"vttbr_el2", VTTBR_EL2, {}},
+ {"vtcr_el2", VTCR_EL2, {}},
+ {"dacr32_el2", DACR32_EL2, {}},
+ {"spsr_el1", SPSR_EL1, {}},
+ {"spsr_el2", SPSR_EL2, {}},
+ {"spsr_el3", SPSR_EL3, {}},
+ {"elr_el1", ELR_EL1, {}},
+ {"elr_el2", ELR_EL2, {}},
+ {"elr_el3", ELR_EL3, {}},
+ {"sp_el0", SP_EL0, {}},
+ {"sp_el1", SP_EL1, {}},
+ {"sp_el2", SP_EL2, {}},
+ {"spsel", SPSel, {}},
+ {"nzcv", NZCV, {}},
+ {"daif", DAIF, {}},
+ {"currentel", CurrentEL, {}},
+ {"spsr_irq", SPSR_irq, {}},
+ {"spsr_abt", SPSR_abt, {}},
+ {"spsr_und", SPSR_und, {}},
+ {"spsr_fiq", SPSR_fiq, {}},
+ {"fpcr", FPCR, {}},
+ {"fpsr", FPSR, {}},
+ {"dspsr_el0", DSPSR_EL0, {}},
+ {"dlr_el0", DLR_EL0, {}},
+ {"ifsr32_el2", IFSR32_EL2, {}},
+ {"afsr0_el1", AFSR0_EL1, {}},
+ {"afsr0_el2", AFSR0_EL2, {}},
+ {"afsr0_el3", AFSR0_EL3, {}},
+ {"afsr1_el1", AFSR1_EL1, {}},
+ {"afsr1_el2", AFSR1_EL2, {}},
+ {"afsr1_el3", AFSR1_EL3, {}},
+ {"esr_el1", ESR_EL1, {}},
+ {"esr_el2", ESR_EL2, {}},
+ {"esr_el3", ESR_EL3, {}},
+ {"fpexc32_el2", FPEXC32_EL2, {}},
+ {"far_el1", FAR_EL1, {}},
+ {"far_el2", FAR_EL2, {}},
+ {"far_el3", FAR_EL3, {}},
+ {"hpfar_el2", HPFAR_EL2, {}},
+ {"par_el1", PAR_EL1, {}},
+ {"pmcr_el0", PMCR_EL0, {}},
+ {"pmcntenset_el0", PMCNTENSET_EL0, {}},
+ {"pmcntenclr_el0", PMCNTENCLR_EL0, {}},
+ {"pmovsclr_el0", PMOVSCLR_EL0, {}},
+ {"pmselr_el0", PMSELR_EL0, {}},
+ {"pmccntr_el0", PMCCNTR_EL0, {}},
+ {"pmxevtyper_el0", PMXEVTYPER_EL0, {}},
+ {"pmxevcntr_el0", PMXEVCNTR_EL0, {}},
+ {"pmuserenr_el0", PMUSERENR_EL0, {}},
+ {"pmintenset_el1", PMINTENSET_EL1, {}},
+ {"pmintenclr_el1", PMINTENCLR_EL1, {}},
+ {"pmovsset_el0", PMOVSSET_EL0, {}},
+ {"mair_el1", MAIR_EL1, {}},
+ {"mair_el2", MAIR_EL2, {}},
+ {"mair_el3", MAIR_EL3, {}},
+ {"amair_el1", AMAIR_EL1, {}},
+ {"amair_el2", AMAIR_EL2, {}},
+ {"amair_el3", AMAIR_EL3, {}},
+ {"vbar_el1", VBAR_EL1, {}},
+ {"vbar_el2", VBAR_EL2, {}},
+ {"vbar_el3", VBAR_EL3, {}},
+ {"rmr_el1", RMR_EL1, {}},
+ {"rmr_el2", RMR_EL2, {}},
+ {"rmr_el3", RMR_EL3, {}},
+ {"contextidr_el1", CONTEXTIDR_EL1, {}},
+ {"tpidr_el0", TPIDR_EL0, {}},
+ {"tpidr_el2", TPIDR_EL2, {}},
+ {"tpidr_el3", TPIDR_EL3, {}},
+ {"tpidrro_el0", TPIDRRO_EL0, {}},
+ {"tpidr_el1", TPIDR_EL1, {}},
+ {"cntfrq_el0", CNTFRQ_EL0, {}},
+ {"cntvoff_el2", CNTVOFF_EL2, {}},
+ {"cntkctl_el1", CNTKCTL_EL1, {}},
+ {"cnthctl_el2", CNTHCTL_EL2, {}},
+ {"cntp_tval_el0", CNTP_TVAL_EL0, {}},
+ {"cnthp_tval_el2", CNTHP_TVAL_EL2, {}},
+ {"cntps_tval_el1", CNTPS_TVAL_EL1, {}},
+ {"cntp_ctl_el0", CNTP_CTL_EL0, {}},
+ {"cnthp_ctl_el2", CNTHP_CTL_EL2, {}},
+ {"cntps_ctl_el1", CNTPS_CTL_EL1, {}},
+ {"cntp_cval_el0", CNTP_CVAL_EL0, {}},
+ {"cnthp_cval_el2", CNTHP_CVAL_EL2, {}},
+ {"cntps_cval_el1", CNTPS_CVAL_EL1, {}},
+ {"cntv_tval_el0", CNTV_TVAL_EL0, {}},
+ {"cntv_ctl_el0", CNTV_CTL_EL0, {}},
+ {"cntv_cval_el0", CNTV_CVAL_EL0, {}},
+ {"pmevcntr0_el0", PMEVCNTR0_EL0, {}},
+ {"pmevcntr1_el0", PMEVCNTR1_EL0, {}},
+ {"pmevcntr2_el0", PMEVCNTR2_EL0, {}},
+ {"pmevcntr3_el0", PMEVCNTR3_EL0, {}},
+ {"pmevcntr4_el0", PMEVCNTR4_EL0, {}},
+ {"pmevcntr5_el0", PMEVCNTR5_EL0, {}},
+ {"pmevcntr6_el0", PMEVCNTR6_EL0, {}},
+ {"pmevcntr7_el0", PMEVCNTR7_EL0, {}},
+ {"pmevcntr8_el0", PMEVCNTR8_EL0, {}},
+ {"pmevcntr9_el0", PMEVCNTR9_EL0, {}},
+ {"pmevcntr10_el0", PMEVCNTR10_EL0, {}},
+ {"pmevcntr11_el0", PMEVCNTR11_EL0, {}},
+ {"pmevcntr12_el0", PMEVCNTR12_EL0, {}},
+ {"pmevcntr13_el0", PMEVCNTR13_EL0, {}},
+ {"pmevcntr14_el0", PMEVCNTR14_EL0, {}},
+ {"pmevcntr15_el0", PMEVCNTR15_EL0, {}},
+ {"pmevcntr16_el0", PMEVCNTR16_EL0, {}},
+ {"pmevcntr17_el0", PMEVCNTR17_EL0, {}},
+ {"pmevcntr18_el0", PMEVCNTR18_EL0, {}},
+ {"pmevcntr19_el0", PMEVCNTR19_EL0, {}},
+ {"pmevcntr20_el0", PMEVCNTR20_EL0, {}},
+ {"pmevcntr21_el0", PMEVCNTR21_EL0, {}},
+ {"pmevcntr22_el0", PMEVCNTR22_EL0, {}},
+ {"pmevcntr23_el0", PMEVCNTR23_EL0, {}},
+ {"pmevcntr24_el0", PMEVCNTR24_EL0, {}},
+ {"pmevcntr25_el0", PMEVCNTR25_EL0, {}},
+ {"pmevcntr26_el0", PMEVCNTR26_EL0, {}},
+ {"pmevcntr27_el0", PMEVCNTR27_EL0, {}},
+ {"pmevcntr28_el0", PMEVCNTR28_EL0, {}},
+ {"pmevcntr29_el0", PMEVCNTR29_EL0, {}},
+ {"pmevcntr30_el0", PMEVCNTR30_EL0, {}},
+ {"pmccfiltr_el0", PMCCFILTR_EL0, {}},
+ {"pmevtyper0_el0", PMEVTYPER0_EL0, {}},
+ {"pmevtyper1_el0", PMEVTYPER1_EL0, {}},
+ {"pmevtyper2_el0", PMEVTYPER2_EL0, {}},
+ {"pmevtyper3_el0", PMEVTYPER3_EL0, {}},
+ {"pmevtyper4_el0", PMEVTYPER4_EL0, {}},
+ {"pmevtyper5_el0", PMEVTYPER5_EL0, {}},
+ {"pmevtyper6_el0", PMEVTYPER6_EL0, {}},
+ {"pmevtyper7_el0", PMEVTYPER7_EL0, {}},
+ {"pmevtyper8_el0", PMEVTYPER8_EL0, {}},
+ {"pmevtyper9_el0", PMEVTYPER9_EL0, {}},
+ {"pmevtyper10_el0", PMEVTYPER10_EL0, {}},
+ {"pmevtyper11_el0", PMEVTYPER11_EL0, {}},
+ {"pmevtyper12_el0", PMEVTYPER12_EL0, {}},
+ {"pmevtyper13_el0", PMEVTYPER13_EL0, {}},
+ {"pmevtyper14_el0", PMEVTYPER14_EL0, {}},
+ {"pmevtyper15_el0", PMEVTYPER15_EL0, {}},
+ {"pmevtyper16_el0", PMEVTYPER16_EL0, {}},
+ {"pmevtyper17_el0", PMEVTYPER17_EL0, {}},
+ {"pmevtyper18_el0", PMEVTYPER18_EL0, {}},
+ {"pmevtyper19_el0", PMEVTYPER19_EL0, {}},
+ {"pmevtyper20_el0", PMEVTYPER20_EL0, {}},
+ {"pmevtyper21_el0", PMEVTYPER21_EL0, {}},
+ {"pmevtyper22_el0", PMEVTYPER22_EL0, {}},
+ {"pmevtyper23_el0", PMEVTYPER23_EL0, {}},
+ {"pmevtyper24_el0", PMEVTYPER24_EL0, {}},
+ {"pmevtyper25_el0", PMEVTYPER25_EL0, {}},
+ {"pmevtyper26_el0", PMEVTYPER26_EL0, {}},
+ {"pmevtyper27_el0", PMEVTYPER27_EL0, {}},
+ {"pmevtyper28_el0", PMEVTYPER28_EL0, {}},
+ {"pmevtyper29_el0", PMEVTYPER29_EL0, {}},
+ {"pmevtyper30_el0", PMEVTYPER30_EL0, {}},
// Trace registers
- {"trcprgctlr", TRCPRGCTLR},
- {"trcprocselr", TRCPROCSELR},
- {"trcconfigr", TRCCONFIGR},
- {"trcauxctlr", TRCAUXCTLR},
- {"trceventctl0r", TRCEVENTCTL0R},
- {"trceventctl1r", TRCEVENTCTL1R},
- {"trcstallctlr", TRCSTALLCTLR},
- {"trctsctlr", TRCTSCTLR},
- {"trcsyncpr", TRCSYNCPR},
- {"trcccctlr", TRCCCCTLR},
- {"trcbbctlr", TRCBBCTLR},
- {"trctraceidr", TRCTRACEIDR},
- {"trcqctlr", TRCQCTLR},
- {"trcvictlr", TRCVICTLR},
- {"trcviiectlr", TRCVIIECTLR},
- {"trcvissctlr", TRCVISSCTLR},
- {"trcvipcssctlr", TRCVIPCSSCTLR},
- {"trcvdctlr", TRCVDCTLR},
- {"trcvdsacctlr", TRCVDSACCTLR},
- {"trcvdarcctlr", TRCVDARCCTLR},
- {"trcseqevr0", TRCSEQEVR0},
- {"trcseqevr1", TRCSEQEVR1},
- {"trcseqevr2", TRCSEQEVR2},
- {"trcseqrstevr", TRCSEQRSTEVR},
- {"trcseqstr", TRCSEQSTR},
- {"trcextinselr", TRCEXTINSELR},
- {"trccntrldvr0", TRCCNTRLDVR0},
- {"trccntrldvr1", TRCCNTRLDVR1},
- {"trccntrldvr2", TRCCNTRLDVR2},
- {"trccntrldvr3", TRCCNTRLDVR3},
- {"trccntctlr0", TRCCNTCTLR0},
- {"trccntctlr1", TRCCNTCTLR1},
- {"trccntctlr2", TRCCNTCTLR2},
- {"trccntctlr3", TRCCNTCTLR3},
- {"trccntvr0", TRCCNTVR0},
- {"trccntvr1", TRCCNTVR1},
- {"trccntvr2", TRCCNTVR2},
- {"trccntvr3", TRCCNTVR3},
- {"trcimspec0", TRCIMSPEC0},
- {"trcimspec1", TRCIMSPEC1},
- {"trcimspec2", TRCIMSPEC2},
- {"trcimspec3", TRCIMSPEC3},
- {"trcimspec4", TRCIMSPEC4},
- {"trcimspec5", TRCIMSPEC5},
- {"trcimspec6", TRCIMSPEC6},
- {"trcimspec7", TRCIMSPEC7},
- {"trcrsctlr2", TRCRSCTLR2},
- {"trcrsctlr3", TRCRSCTLR3},
- {"trcrsctlr4", TRCRSCTLR4},
- {"trcrsctlr5", TRCRSCTLR5},
- {"trcrsctlr6", TRCRSCTLR6},
- {"trcrsctlr7", TRCRSCTLR7},
- {"trcrsctlr8", TRCRSCTLR8},
- {"trcrsctlr9", TRCRSCTLR9},
- {"trcrsctlr10", TRCRSCTLR10},
- {"trcrsctlr11", TRCRSCTLR11},
- {"trcrsctlr12", TRCRSCTLR12},
- {"trcrsctlr13", TRCRSCTLR13},
- {"trcrsctlr14", TRCRSCTLR14},
- {"trcrsctlr15", TRCRSCTLR15},
- {"trcrsctlr16", TRCRSCTLR16},
- {"trcrsctlr17", TRCRSCTLR17},
- {"trcrsctlr18", TRCRSCTLR18},
- {"trcrsctlr19", TRCRSCTLR19},
- {"trcrsctlr20", TRCRSCTLR20},
- {"trcrsctlr21", TRCRSCTLR21},
- {"trcrsctlr22", TRCRSCTLR22},
- {"trcrsctlr23", TRCRSCTLR23},
- {"trcrsctlr24", TRCRSCTLR24},
- {"trcrsctlr25", TRCRSCTLR25},
- {"trcrsctlr26", TRCRSCTLR26},
- {"trcrsctlr27", TRCRSCTLR27},
- {"trcrsctlr28", TRCRSCTLR28},
- {"trcrsctlr29", TRCRSCTLR29},
- {"trcrsctlr30", TRCRSCTLR30},
- {"trcrsctlr31", TRCRSCTLR31},
- {"trcssccr0", TRCSSCCR0},
- {"trcssccr1", TRCSSCCR1},
- {"trcssccr2", TRCSSCCR2},
- {"trcssccr3", TRCSSCCR3},
- {"trcssccr4", TRCSSCCR4},
- {"trcssccr5", TRCSSCCR5},
- {"trcssccr6", TRCSSCCR6},
- {"trcssccr7", TRCSSCCR7},
- {"trcsscsr0", TRCSSCSR0},
- {"trcsscsr1", TRCSSCSR1},
- {"trcsscsr2", TRCSSCSR2},
- {"trcsscsr3", TRCSSCSR3},
- {"trcsscsr4", TRCSSCSR4},
- {"trcsscsr5", TRCSSCSR5},
- {"trcsscsr6", TRCSSCSR6},
- {"trcsscsr7", TRCSSCSR7},
- {"trcsspcicr0", TRCSSPCICR0},
- {"trcsspcicr1", TRCSSPCICR1},
- {"trcsspcicr2", TRCSSPCICR2},
- {"trcsspcicr3", TRCSSPCICR3},
- {"trcsspcicr4", TRCSSPCICR4},
- {"trcsspcicr5", TRCSSPCICR5},
- {"trcsspcicr6", TRCSSPCICR6},
- {"trcsspcicr7", TRCSSPCICR7},
- {"trcpdcr", TRCPDCR},
- {"trcacvr0", TRCACVR0},
- {"trcacvr1", TRCACVR1},
- {"trcacvr2", TRCACVR2},
- {"trcacvr3", TRCACVR3},
- {"trcacvr4", TRCACVR4},
- {"trcacvr5", TRCACVR5},
- {"trcacvr6", TRCACVR6},
- {"trcacvr7", TRCACVR7},
- {"trcacvr8", TRCACVR8},
- {"trcacvr9", TRCACVR9},
- {"trcacvr10", TRCACVR10},
- {"trcacvr11", TRCACVR11},
- {"trcacvr12", TRCACVR12},
- {"trcacvr13", TRCACVR13},
- {"trcacvr14", TRCACVR14},
- {"trcacvr15", TRCACVR15},
- {"trcacatr0", TRCACATR0},
- {"trcacatr1", TRCACATR1},
- {"trcacatr2", TRCACATR2},
- {"trcacatr3", TRCACATR3},
- {"trcacatr4", TRCACATR4},
- {"trcacatr5", TRCACATR5},
- {"trcacatr6", TRCACATR6},
- {"trcacatr7", TRCACATR7},
- {"trcacatr8", TRCACATR8},
- {"trcacatr9", TRCACATR9},
- {"trcacatr10", TRCACATR10},
- {"trcacatr11", TRCACATR11},
- {"trcacatr12", TRCACATR12},
- {"trcacatr13", TRCACATR13},
- {"trcacatr14", TRCACATR14},
- {"trcacatr15", TRCACATR15},
- {"trcdvcvr0", TRCDVCVR0},
- {"trcdvcvr1", TRCDVCVR1},
- {"trcdvcvr2", TRCDVCVR2},
- {"trcdvcvr3", TRCDVCVR3},
- {"trcdvcvr4", TRCDVCVR4},
- {"trcdvcvr5", TRCDVCVR5},
- {"trcdvcvr6", TRCDVCVR6},
- {"trcdvcvr7", TRCDVCVR7},
- {"trcdvcmr0", TRCDVCMR0},
- {"trcdvcmr1", TRCDVCMR1},
- {"trcdvcmr2", TRCDVCMR2},
- {"trcdvcmr3", TRCDVCMR3},
- {"trcdvcmr4", TRCDVCMR4},
- {"trcdvcmr5", TRCDVCMR5},
- {"trcdvcmr6", TRCDVCMR6},
- {"trcdvcmr7", TRCDVCMR7},
- {"trccidcvr0", TRCCIDCVR0},
- {"trccidcvr1", TRCCIDCVR1},
- {"trccidcvr2", TRCCIDCVR2},
- {"trccidcvr3", TRCCIDCVR3},
- {"trccidcvr4", TRCCIDCVR4},
- {"trccidcvr5", TRCCIDCVR5},
- {"trccidcvr6", TRCCIDCVR6},
- {"trccidcvr7", TRCCIDCVR7},
- {"trcvmidcvr0", TRCVMIDCVR0},
- {"trcvmidcvr1", TRCVMIDCVR1},
- {"trcvmidcvr2", TRCVMIDCVR2},
- {"trcvmidcvr3", TRCVMIDCVR3},
- {"trcvmidcvr4", TRCVMIDCVR4},
- {"trcvmidcvr5", TRCVMIDCVR5},
- {"trcvmidcvr6", TRCVMIDCVR6},
- {"trcvmidcvr7", TRCVMIDCVR7},
- {"trccidcctlr0", TRCCIDCCTLR0},
- {"trccidcctlr1", TRCCIDCCTLR1},
- {"trcvmidcctlr0", TRCVMIDCCTLR0},
- {"trcvmidcctlr1", TRCVMIDCCTLR1},
- {"trcitctrl", TRCITCTRL},
- {"trcclaimset", TRCCLAIMSET},
- {"trcclaimclr", TRCCLAIMCLR},
+ {"trcprgctlr", TRCPRGCTLR, {}},
+ {"trcprocselr", TRCPROCSELR, {}},
+ {"trcconfigr", TRCCONFIGR, {}},
+ {"trcauxctlr", TRCAUXCTLR, {}},
+ {"trceventctl0r", TRCEVENTCTL0R, {}},
+ {"trceventctl1r", TRCEVENTCTL1R, {}},
+ {"trcstallctlr", TRCSTALLCTLR, {}},
+ {"trctsctlr", TRCTSCTLR, {}},
+ {"trcsyncpr", TRCSYNCPR, {}},
+ {"trcccctlr", TRCCCCTLR, {}},
+ {"trcbbctlr", TRCBBCTLR, {}},
+ {"trctraceidr", TRCTRACEIDR, {}},
+ {"trcqctlr", TRCQCTLR, {}},
+ {"trcvictlr", TRCVICTLR, {}},
+ {"trcviiectlr", TRCVIIECTLR, {}},
+ {"trcvissctlr", TRCVISSCTLR, {}},
+ {"trcvipcssctlr", TRCVIPCSSCTLR, {}},
+ {"trcvdctlr", TRCVDCTLR, {}},
+ {"trcvdsacctlr", TRCVDSACCTLR, {}},
+ {"trcvdarcctlr", TRCVDARCCTLR, {}},
+ {"trcseqevr0", TRCSEQEVR0, {}},
+ {"trcseqevr1", TRCSEQEVR1, {}},
+ {"trcseqevr2", TRCSEQEVR2, {}},
+ {"trcseqrstevr", TRCSEQRSTEVR, {}},
+ {"trcseqstr", TRCSEQSTR, {}},
+ {"trcextinselr", TRCEXTINSELR, {}},
+ {"trccntrldvr0", TRCCNTRLDVR0, {}},
+ {"trccntrldvr1", TRCCNTRLDVR1, {}},
+ {"trccntrldvr2", TRCCNTRLDVR2, {}},
+ {"trccntrldvr3", TRCCNTRLDVR3, {}},
+ {"trccntctlr0", TRCCNTCTLR0, {}},
+ {"trccntctlr1", TRCCNTCTLR1, {}},
+ {"trccntctlr2", TRCCNTCTLR2, {}},
+ {"trccntctlr3", TRCCNTCTLR3, {}},
+ {"trccntvr0", TRCCNTVR0, {}},
+ {"trccntvr1", TRCCNTVR1, {}},
+ {"trccntvr2", TRCCNTVR2, {}},
+ {"trccntvr3", TRCCNTVR3, {}},
+ {"trcimspec0", TRCIMSPEC0, {}},
+ {"trcimspec1", TRCIMSPEC1, {}},
+ {"trcimspec2", TRCIMSPEC2, {}},
+ {"trcimspec3", TRCIMSPEC3, {}},
+ {"trcimspec4", TRCIMSPEC4, {}},
+ {"trcimspec5", TRCIMSPEC5, {}},
+ {"trcimspec6", TRCIMSPEC6, {}},
+ {"trcimspec7", TRCIMSPEC7, {}},
+ {"trcrsctlr2", TRCRSCTLR2, {}},
+ {"trcrsctlr3", TRCRSCTLR3, {}},
+ {"trcrsctlr4", TRCRSCTLR4, {}},
+ {"trcrsctlr5", TRCRSCTLR5, {}},
+ {"trcrsctlr6", TRCRSCTLR6, {}},
+ {"trcrsctlr7", TRCRSCTLR7, {}},
+ {"trcrsctlr8", TRCRSCTLR8, {}},
+ {"trcrsctlr9", TRCRSCTLR9, {}},
+ {"trcrsctlr10", TRCRSCTLR10, {}},
+ {"trcrsctlr11", TRCRSCTLR11, {}},
+ {"trcrsctlr12", TRCRSCTLR12, {}},
+ {"trcrsctlr13", TRCRSCTLR13, {}},
+ {"trcrsctlr14", TRCRSCTLR14, {}},
+ {"trcrsctlr15", TRCRSCTLR15, {}},
+ {"trcrsctlr16", TRCRSCTLR16, {}},
+ {"trcrsctlr17", TRCRSCTLR17, {}},
+ {"trcrsctlr18", TRCRSCTLR18, {}},
+ {"trcrsctlr19", TRCRSCTLR19, {}},
+ {"trcrsctlr20", TRCRSCTLR20, {}},
+ {"trcrsctlr21", TRCRSCTLR21, {}},
+ {"trcrsctlr22", TRCRSCTLR22, {}},
+ {"trcrsctlr23", TRCRSCTLR23, {}},
+ {"trcrsctlr24", TRCRSCTLR24, {}},
+ {"trcrsctlr25", TRCRSCTLR25, {}},
+ {"trcrsctlr26", TRCRSCTLR26, {}},
+ {"trcrsctlr27", TRCRSCTLR27, {}},
+ {"trcrsctlr28", TRCRSCTLR28, {}},
+ {"trcrsctlr29", TRCRSCTLR29, {}},
+ {"trcrsctlr30", TRCRSCTLR30, {}},
+ {"trcrsctlr31", TRCRSCTLR31, {}},
+ {"trcssccr0", TRCSSCCR0, {}},
+ {"trcssccr1", TRCSSCCR1, {}},
+ {"trcssccr2", TRCSSCCR2, {}},
+ {"trcssccr3", TRCSSCCR3, {}},
+ {"trcssccr4", TRCSSCCR4, {}},
+ {"trcssccr5", TRCSSCCR5, {}},
+ {"trcssccr6", TRCSSCCR6, {}},
+ {"trcssccr7", TRCSSCCR7, {}},
+ {"trcsscsr0", TRCSSCSR0, {}},
+ {"trcsscsr1", TRCSSCSR1, {}},
+ {"trcsscsr2", TRCSSCSR2, {}},
+ {"trcsscsr3", TRCSSCSR3, {}},
+ {"trcsscsr4", TRCSSCSR4, {}},
+ {"trcsscsr5", TRCSSCSR5, {}},
+ {"trcsscsr6", TRCSSCSR6, {}},
+ {"trcsscsr7", TRCSSCSR7, {}},
+ {"trcsspcicr0", TRCSSPCICR0, {}},
+ {"trcsspcicr1", TRCSSPCICR1, {}},
+ {"trcsspcicr2", TRCSSPCICR2, {}},
+ {"trcsspcicr3", TRCSSPCICR3, {}},
+ {"trcsspcicr4", TRCSSPCICR4, {}},
+ {"trcsspcicr5", TRCSSPCICR5, {}},
+ {"trcsspcicr6", TRCSSPCICR6, {}},
+ {"trcsspcicr7", TRCSSPCICR7, {}},
+ {"trcpdcr", TRCPDCR, {}},
+ {"trcacvr0", TRCACVR0, {}},
+ {"trcacvr1", TRCACVR1, {}},
+ {"trcacvr2", TRCACVR2, {}},
+ {"trcacvr3", TRCACVR3, {}},
+ {"trcacvr4", TRCACVR4, {}},
+ {"trcacvr5", TRCACVR5, {}},
+ {"trcacvr6", TRCACVR6, {}},
+ {"trcacvr7", TRCACVR7, {}},
+ {"trcacvr8", TRCACVR8, {}},
+ {"trcacvr9", TRCACVR9, {}},
+ {"trcacvr10", TRCACVR10, {}},
+ {"trcacvr11", TRCACVR11, {}},
+ {"trcacvr12", TRCACVR12, {}},
+ {"trcacvr13", TRCACVR13, {}},
+ {"trcacvr14", TRCACVR14, {}},
+ {"trcacvr15", TRCACVR15, {}},
+ {"trcacatr0", TRCACATR0, {}},
+ {"trcacatr1", TRCACATR1, {}},
+ {"trcacatr2", TRCACATR2, {}},
+ {"trcacatr3", TRCACATR3, {}},
+ {"trcacatr4", TRCACATR4, {}},
+ {"trcacatr5", TRCACATR5, {}},
+ {"trcacatr6", TRCACATR6, {}},
+ {"trcacatr7", TRCACATR7, {}},
+ {"trcacatr8", TRCACATR8, {}},
+ {"trcacatr9", TRCACATR9, {}},
+ {"trcacatr10", TRCACATR10, {}},
+ {"trcacatr11", TRCACATR11, {}},
+ {"trcacatr12", TRCACATR12, {}},
+ {"trcacatr13", TRCACATR13, {}},
+ {"trcacatr14", TRCACATR14, {}},
+ {"trcacatr15", TRCACATR15, {}},
+ {"trcdvcvr0", TRCDVCVR0, {}},
+ {"trcdvcvr1", TRCDVCVR1, {}},
+ {"trcdvcvr2", TRCDVCVR2, {}},
+ {"trcdvcvr3", TRCDVCVR3, {}},
+ {"trcdvcvr4", TRCDVCVR4, {}},
+ {"trcdvcvr5", TRCDVCVR5, {}},
+ {"trcdvcvr6", TRCDVCVR6, {}},
+ {"trcdvcvr7", TRCDVCVR7, {}},
+ {"trcdvcmr0", TRCDVCMR0, {}},
+ {"trcdvcmr1", TRCDVCMR1, {}},
+ {"trcdvcmr2", TRCDVCMR2, {}},
+ {"trcdvcmr3", TRCDVCMR3, {}},
+ {"trcdvcmr4", TRCDVCMR4, {}},
+ {"trcdvcmr5", TRCDVCMR5, {}},
+ {"trcdvcmr6", TRCDVCMR6, {}},
+ {"trcdvcmr7", TRCDVCMR7, {}},
+ {"trccidcvr0", TRCCIDCVR0, {}},
+ {"trccidcvr1", TRCCIDCVR1, {}},
+ {"trccidcvr2", TRCCIDCVR2, {}},
+ {"trccidcvr3", TRCCIDCVR3, {}},
+ {"trccidcvr4", TRCCIDCVR4, {}},
+ {"trccidcvr5", TRCCIDCVR5, {}},
+ {"trccidcvr6", TRCCIDCVR6, {}},
+ {"trccidcvr7", TRCCIDCVR7, {}},
+ {"trcvmidcvr0", TRCVMIDCVR0, {}},
+ {"trcvmidcvr1", TRCVMIDCVR1, {}},
+ {"trcvmidcvr2", TRCVMIDCVR2, {}},
+ {"trcvmidcvr3", TRCVMIDCVR3, {}},
+ {"trcvmidcvr4", TRCVMIDCVR4, {}},
+ {"trcvmidcvr5", TRCVMIDCVR5, {}},
+ {"trcvmidcvr6", TRCVMIDCVR6, {}},
+ {"trcvmidcvr7", TRCVMIDCVR7, {}},
+ {"trccidcctlr0", TRCCIDCCTLR0, {}},
+ {"trccidcctlr1", TRCCIDCCTLR1, {}},
+ {"trcvmidcctlr0", TRCVMIDCCTLR0, {}},
+ {"trcvmidcctlr1", TRCVMIDCCTLR1, {}},
+ {"trcitctrl", TRCITCTRL, {}},
+ {"trcclaimset", TRCCLAIMSET, {}},
+ {"trcclaimclr", TRCCLAIMCLR, {}},
// GICv3 registers
- {"icc_bpr1_el1", ICC_BPR1_EL1},
- {"icc_bpr0_el1", ICC_BPR0_EL1},
- {"icc_pmr_el1", ICC_PMR_EL1},
- {"icc_ctlr_el1", ICC_CTLR_EL1},
- {"icc_ctlr_el3", ICC_CTLR_EL3},
- {"icc_sre_el1", ICC_SRE_EL1},
- {"icc_sre_el2", ICC_SRE_EL2},
- {"icc_sre_el3", ICC_SRE_EL3},
- {"icc_igrpen0_el1", ICC_IGRPEN0_EL1},
- {"icc_igrpen1_el1", ICC_IGRPEN1_EL1},
- {"icc_igrpen1_el3", ICC_IGRPEN1_EL3},
- {"icc_seien_el1", ICC_SEIEN_EL1},
- {"icc_ap0r0_el1", ICC_AP0R0_EL1},
- {"icc_ap0r1_el1", ICC_AP0R1_EL1},
- {"icc_ap0r2_el1", ICC_AP0R2_EL1},
- {"icc_ap0r3_el1", ICC_AP0R3_EL1},
- {"icc_ap1r0_el1", ICC_AP1R0_EL1},
- {"icc_ap1r1_el1", ICC_AP1R1_EL1},
- {"icc_ap1r2_el1", ICC_AP1R2_EL1},
- {"icc_ap1r3_el1", ICC_AP1R3_EL1},
- {"ich_ap0r0_el2", ICH_AP0R0_EL2},
- {"ich_ap0r1_el2", ICH_AP0R1_EL2},
- {"ich_ap0r2_el2", ICH_AP0R2_EL2},
- {"ich_ap0r3_el2", ICH_AP0R3_EL2},
- {"ich_ap1r0_el2", ICH_AP1R0_EL2},
- {"ich_ap1r1_el2", ICH_AP1R1_EL2},
- {"ich_ap1r2_el2", ICH_AP1R2_EL2},
- {"ich_ap1r3_el2", ICH_AP1R3_EL2},
- {"ich_hcr_el2", ICH_HCR_EL2},
- {"ich_misr_el2", ICH_MISR_EL2},
- {"ich_vmcr_el2", ICH_VMCR_EL2},
- {"ich_vseir_el2", ICH_VSEIR_EL2},
- {"ich_lr0_el2", ICH_LR0_EL2},
- {"ich_lr1_el2", ICH_LR1_EL2},
- {"ich_lr2_el2", ICH_LR2_EL2},
- {"ich_lr3_el2", ICH_LR3_EL2},
- {"ich_lr4_el2", ICH_LR4_EL2},
- {"ich_lr5_el2", ICH_LR5_EL2},
- {"ich_lr6_el2", ICH_LR6_EL2},
- {"ich_lr7_el2", ICH_LR7_EL2},
- {"ich_lr8_el2", ICH_LR8_EL2},
- {"ich_lr9_el2", ICH_LR9_EL2},
- {"ich_lr10_el2", ICH_LR10_EL2},
- {"ich_lr11_el2", ICH_LR11_EL2},
- {"ich_lr12_el2", ICH_LR12_EL2},
- {"ich_lr13_el2", ICH_LR13_EL2},
- {"ich_lr14_el2", ICH_LR14_EL2},
- {"ich_lr15_el2", ICH_LR15_EL2}
-};
-
-const AArch64NamedImmMapper::Mapping
-AArch64SysReg::SysRegMapper::CycloneSysRegPairs[] = {
- {"cpm_ioacc_ctl_el3", CPM_IOACC_CTL_EL3}
+ {"icc_bpr1_el1", ICC_BPR1_EL1, {}},
+ {"icc_bpr0_el1", ICC_BPR0_EL1, {}},
+ {"icc_pmr_el1", ICC_PMR_EL1, {}},
+ {"icc_ctlr_el1", ICC_CTLR_EL1, {}},
+ {"icc_ctlr_el3", ICC_CTLR_EL3, {}},
+ {"icc_sre_el1", ICC_SRE_EL1, {}},
+ {"icc_sre_el2", ICC_SRE_EL2, {}},
+ {"icc_sre_el3", ICC_SRE_EL3, {}},
+ {"icc_igrpen0_el1", ICC_IGRPEN0_EL1, {}},
+ {"icc_igrpen1_el1", ICC_IGRPEN1_EL1, {}},
+ {"icc_igrpen1_el3", ICC_IGRPEN1_EL3, {}},
+ {"icc_seien_el1", ICC_SEIEN_EL1, {}},
+ {"icc_ap0r0_el1", ICC_AP0R0_EL1, {}},
+ {"icc_ap0r1_el1", ICC_AP0R1_EL1, {}},
+ {"icc_ap0r2_el1", ICC_AP0R2_EL1, {}},
+ {"icc_ap0r3_el1", ICC_AP0R3_EL1, {}},
+ {"icc_ap1r0_el1", ICC_AP1R0_EL1, {}},
+ {"icc_ap1r1_el1", ICC_AP1R1_EL1, {}},
+ {"icc_ap1r2_el1", ICC_AP1R2_EL1, {}},
+ {"icc_ap1r3_el1", ICC_AP1R3_EL1, {}},
+ {"ich_ap0r0_el2", ICH_AP0R0_EL2, {}},
+ {"ich_ap0r1_el2", ICH_AP0R1_EL2, {}},
+ {"ich_ap0r2_el2", ICH_AP0R2_EL2, {}},
+ {"ich_ap0r3_el2", ICH_AP0R3_EL2, {}},
+ {"ich_ap1r0_el2", ICH_AP1R0_EL2, {}},
+ {"ich_ap1r1_el2", ICH_AP1R1_EL2, {}},
+ {"ich_ap1r2_el2", ICH_AP1R2_EL2, {}},
+ {"ich_ap1r3_el2", ICH_AP1R3_EL2, {}},
+ {"ich_hcr_el2", ICH_HCR_EL2, {}},
+ {"ich_misr_el2", ICH_MISR_EL2, {}},
+ {"ich_vmcr_el2", ICH_VMCR_EL2, {}},
+ {"ich_vseir_el2", ICH_VSEIR_EL2, {}},
+ {"ich_lr0_el2", ICH_LR0_EL2, {}},
+ {"ich_lr1_el2", ICH_LR1_EL2, {}},
+ {"ich_lr2_el2", ICH_LR2_EL2, {}},
+ {"ich_lr3_el2", ICH_LR3_EL2, {}},
+ {"ich_lr4_el2", ICH_LR4_EL2, {}},
+ {"ich_lr5_el2", ICH_LR5_EL2, {}},
+ {"ich_lr6_el2", ICH_LR6_EL2, {}},
+ {"ich_lr7_el2", ICH_LR7_EL2, {}},
+ {"ich_lr8_el2", ICH_LR8_EL2, {}},
+ {"ich_lr9_el2", ICH_LR9_EL2, {}},
+ {"ich_lr10_el2", ICH_LR10_EL2, {}},
+ {"ich_lr11_el2", ICH_LR11_EL2, {}},
+ {"ich_lr12_el2", ICH_LR12_EL2, {}},
+ {"ich_lr13_el2", ICH_LR13_EL2, {}},
+ {"ich_lr14_el2", ICH_LR14_EL2, {}},
+ {"ich_lr15_el2", ICH_LR15_EL2, {}},
+
+ // Cyclone registers
+ {"cpm_ioacc_ctl_el3", CPM_IOACC_CTL_EL3, {AArch64::ProcCyclone}},
+
+ // v8.1a "Privileged Access Never" extension-specific system registers
+ {"pan", PAN, {AArch64::HasV8_1aOps}},
+
+ // v8.1a "Limited Ordering Regions" extension-specific system registers
+ {"lorsa_el1", LORSA_EL1, {AArch64::HasV8_1aOps}},
+ {"lorea_el1", LOREA_EL1, {AArch64::HasV8_1aOps}},
+ {"lorn_el1", LORN_EL1, {AArch64::HasV8_1aOps}},
+ {"lorc_el1", LORC_EL1, {AArch64::HasV8_1aOps}},
+
+ // v8.1a "Virtualization host extensions" system registers
+ {"ttbr1_el2", TTBR1_EL2, {AArch64::HasV8_1aOps}},
+ {"contextidr_el2", CONTEXTIDR_EL2, {AArch64::HasV8_1aOps}},
+ {"cnthv_tval_el2", CNTHV_TVAL_EL2, {AArch64::HasV8_1aOps}},
+ {"cnthv_cval_el2", CNTHV_CVAL_EL2, {AArch64::HasV8_1aOps}},
+ {"cnthv_ctl_el2", CNTHV_CTL_EL2, {AArch64::HasV8_1aOps}},
+ {"sctlr_el12", SCTLR_EL12, {AArch64::HasV8_1aOps}},
+ {"cpacr_el12", CPACR_EL12, {AArch64::HasV8_1aOps}},
+ {"ttbr0_el12", TTBR0_EL12, {AArch64::HasV8_1aOps}},
+ {"ttbr1_el12", TTBR1_EL12, {AArch64::HasV8_1aOps}},
+ {"tcr_el12", TCR_EL12, {AArch64::HasV8_1aOps}},
+ {"afsr0_el12", AFSR0_EL12, {AArch64::HasV8_1aOps}},
+ {"afsr1_el12", AFSR1_EL12, {AArch64::HasV8_1aOps}},
+ {"esr_el12", ESR_EL12, {AArch64::HasV8_1aOps}},
+ {"far_el12", FAR_EL12, {AArch64::HasV8_1aOps}},
+ {"mair_el12", MAIR_EL12, {AArch64::HasV8_1aOps}},
+ {"amair_el12", AMAIR_EL12, {AArch64::HasV8_1aOps}},
+ {"vbar_el12", VBAR_EL12, {AArch64::HasV8_1aOps}},
+ {"contextidr_el12", CONTEXTIDR_EL12, {AArch64::HasV8_1aOps}},
+ {"cntkctl_el12", CNTKCTL_EL12, {AArch64::HasV8_1aOps}},
+ {"cntp_tval_el02", CNTP_TVAL_EL02, {AArch64::HasV8_1aOps}},
+ {"cntp_ctl_el02", CNTP_CTL_EL02, {AArch64::HasV8_1aOps}},
+ {"cntp_cval_el02", CNTP_CVAL_EL02, {AArch64::HasV8_1aOps}},
+ {"cntv_tval_el02", CNTV_TVAL_EL02, {AArch64::HasV8_1aOps}},
+ {"cntv_ctl_el02", CNTV_CTL_EL02, {AArch64::HasV8_1aOps}},
+ {"cntv_cval_el02", CNTV_CVAL_EL02, {AArch64::HasV8_1aOps}},
+ {"spsr_el12", SPSR_EL12, {AArch64::HasV8_1aOps}},
+ {"elr_el12", ELR_EL12, {AArch64::HasV8_1aOps}},
};
uint32_t
-AArch64SysReg::SysRegMapper::fromString(StringRef Name, bool &Valid) const {
+AArch64SysReg::SysRegMapper::fromString(StringRef Name,
+ const FeatureBitset& FeatureBits, bool &Valid) const {
std::string NameLower = Name.lower();
// First search the registers shared by all
- for (unsigned i = 0; i < array_lengthof(SysRegPairs); ++i) {
- if (SysRegPairs[i].Name == NameLower) {
+ for (unsigned i = 0; i < array_lengthof(SysRegMappings); ++i) {
+ if (SysRegMappings[i].isNameEqual(NameLower, FeatureBits)) {
Valid = true;
- return SysRegPairs[i].Value;
- }
- }
-
- // Next search for target specific registers
- if (FeatureBits & AArch64::ProcCyclone) {
- for (unsigned i = 0; i < array_lengthof(CycloneSysRegPairs); ++i) {
- if (CycloneSysRegPairs[i].Name == NameLower) {
- Valid = true;
- return CycloneSysRegPairs[i].Value;
- }
+ return SysRegMappings[i].Value;
}
}
// Now try the instruction-specific registers (either read-only or
// write-only).
- for (unsigned i = 0; i < NumInstPairs; ++i) {
- if (InstPairs[i].Name == NameLower) {
+ for (unsigned i = 0; i < NumInstMappings; ++i) {
+ if (InstMappings[i].isNameEqual(NameLower, FeatureBits)) {
Valid = true;
- return InstPairs[i].Value;
+ return InstMappings[i].Value;
}
}
@@ -814,28 +850,20 @@ AArch64SysReg::SysRegMapper::fromString(StringRef Name, bool &Valid) const {
}
std::string
-AArch64SysReg::SysRegMapper::toString(uint32_t Bits) const {
+AArch64SysReg::SysRegMapper::toString(uint32_t Bits,
+ const FeatureBitset& FeatureBits) const {
// First search the registers shared by all
- for (unsigned i = 0; i < array_lengthof(SysRegPairs); ++i) {
- if (SysRegPairs[i].Value == Bits) {
- return SysRegPairs[i].Name;
- }
- }
-
- // Next search for target specific registers
- if (FeatureBits & AArch64::ProcCyclone) {
- for (unsigned i = 0; i < array_lengthof(CycloneSysRegPairs); ++i) {
- if (CycloneSysRegPairs[i].Value == Bits) {
- return CycloneSysRegPairs[i].Name;
- }
+ for (unsigned i = 0; i < array_lengthof(SysRegMappings); ++i) {
+ if (SysRegMappings[i].isValueEqual(Bits, FeatureBits)) {
+ return SysRegMappings[i].Name;
}
}
// Now try the instruction-specific registers (either read-only or
// write-only).
- for (unsigned i = 0; i < NumInstPairs; ++i) {
- if (InstPairs[i].Value == Bits) {
- return InstPairs[i].Name;
+ for (unsigned i = 0; i < NumInstMappings; ++i) {
+ if (InstMappings[i].isValueEqual(Bits, FeatureBits)) {
+ return InstMappings[i].Name;
}
}
@@ -850,40 +878,40 @@ AArch64SysReg::SysRegMapper::toString(uint32_t Bits) const {
+ "_c" + utostr(CRm) + "_" + utostr(Op2);
}
-const AArch64NamedImmMapper::Mapping AArch64TLBI::TLBIMapper::TLBIPairs[] = {
- {"ipas2e1is", IPAS2E1IS},
- {"ipas2le1is", IPAS2LE1IS},
- {"vmalle1is", VMALLE1IS},
- {"alle2is", ALLE2IS},
- {"alle3is", ALLE3IS},
- {"vae1is", VAE1IS},
- {"vae2is", VAE2IS},
- {"vae3is", VAE3IS},
- {"aside1is", ASIDE1IS},
- {"vaae1is", VAAE1IS},
- {"alle1is", ALLE1IS},
- {"vale1is", VALE1IS},
- {"vale2is", VALE2IS},
- {"vale3is", VALE3IS},
- {"vmalls12e1is", VMALLS12E1IS},
- {"vaale1is", VAALE1IS},
- {"ipas2e1", IPAS2E1},
- {"ipas2le1", IPAS2LE1},
- {"vmalle1", VMALLE1},
- {"alle2", ALLE2},
- {"alle3", ALLE3},
- {"vae1", VAE1},
- {"vae2", VAE2},
- {"vae3", VAE3},
- {"aside1", ASIDE1},
- {"vaae1", VAAE1},
- {"alle1", ALLE1},
- {"vale1", VALE1},
- {"vale2", VALE2},
- {"vale3", VALE3},
- {"vmalls12e1", VMALLS12E1},
- {"vaale1", VAALE1}
+const AArch64NamedImmMapper::Mapping AArch64TLBI::TLBIMapper::TLBIMappings[] = {
+ {"ipas2e1is", IPAS2E1IS, {}},
+ {"ipas2le1is", IPAS2LE1IS, {}},
+ {"vmalle1is", VMALLE1IS, {}},
+ {"alle2is", ALLE2IS, {}},
+ {"alle3is", ALLE3IS, {}},
+ {"vae1is", VAE1IS, {}},
+ {"vae2is", VAE2IS, {}},
+ {"vae3is", VAE3IS, {}},
+ {"aside1is", ASIDE1IS, {}},
+ {"vaae1is", VAAE1IS, {}},
+ {"alle1is", ALLE1IS, {}},
+ {"vale1is", VALE1IS, {}},
+ {"vale2is", VALE2IS, {}},
+ {"vale3is", VALE3IS, {}},
+ {"vmalls12e1is", VMALLS12E1IS, {}},
+ {"vaale1is", VAALE1IS, {}},
+ {"ipas2e1", IPAS2E1, {}},
+ {"ipas2le1", IPAS2LE1, {}},
+ {"vmalle1", VMALLE1, {}},
+ {"alle2", ALLE2, {}},
+ {"alle3", ALLE3, {}},
+ {"vae1", VAE1, {}},
+ {"vae2", VAE2, {}},
+ {"vae3", VAE3, {}},
+ {"aside1", ASIDE1, {}},
+ {"vaae1", VAAE1, {}},
+ {"alle1", ALLE1, {}},
+ {"vale1", VALE1, {}},
+ {"vale2", VALE2, {}},
+ {"vale3", VALE3, {}},
+ {"vmalls12e1", VMALLS12E1, {}},
+ {"vaale1", VAALE1, {}}
};
AArch64TLBI::TLBIMapper::TLBIMapper()
- : AArch64NamedImmMapper(TLBIPairs, 0) {}
+ : AArch64NamedImmMapper(TLBIMappings, 0) {}
diff --git a/lib/Target/AArch64/Utils/AArch64BaseInfo.h b/lib/Target/AArch64/Utils/AArch64BaseInfo.h
index 6d0337c..7125f14 100644
--- a/lib/Target/AArch64/Utils/AArch64BaseInfo.h
+++ b/lib/Target/AArch64/Utils/AArch64BaseInfo.h
@@ -22,6 +22,7 @@
#include "MCTargetDesc/AArch64MCTargetDesc.h" // For AArch64::X0 and friends.
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringSwitch.h"
+#include "llvm/MC/SubtargetFeature.h"
#include "llvm/Support/ErrorHandling.h"
namespace llvm {
@@ -280,22 +281,45 @@ struct AArch64NamedImmMapper {
struct Mapping {
const char *Name;
uint32_t Value;
+ // Set of features this mapping is available for
+ // Zero value of FeatureBitSet means the mapping is always available
+ FeatureBitset FeatureBitSet;
+
+ bool isNameEqual(std::string Other,
+ const FeatureBitset& FeatureBits) const {
+ if (FeatureBitSet.any() &&
+ (FeatureBitSet & FeatureBits).none())
+ return false;
+ return Name == Other;
+ }
+
+ bool isValueEqual(uint32_t Other,
+ const FeatureBitset& FeatureBits) const {
+ if (FeatureBitSet.any() &&
+ (FeatureBitSet & FeatureBits).none())
+ return false;
+ return Value == Other;
+ }
};
template<int N>
- AArch64NamedImmMapper(const Mapping (&Pairs)[N], uint32_t TooBigImm)
- : Pairs(&Pairs[0]), NumPairs(N), TooBigImm(TooBigImm) {}
+ AArch64NamedImmMapper(const Mapping (&Mappings)[N], uint32_t TooBigImm)
+ : Mappings(&Mappings[0]), NumMappings(N), TooBigImm(TooBigImm) {}
- StringRef toString(uint32_t Value, bool &Valid) const;
- uint32_t fromString(StringRef Name, bool &Valid) const;
+ // Maps value to string, depending on availability for FeatureBits given
+ StringRef toString(uint32_t Value, const FeatureBitset& FeatureBits,
+ bool &Valid) const;
+ // Maps string to value, depending on availability for FeatureBits given
+ uint32_t fromString(StringRef Name, const FeatureBitset& FeatureBits,
+ bool &Valid) const;
/// Many of the instructions allow an alternative assembly form consisting of
/// a simple immediate. Currently the only valid forms are ranges [0, N) where
/// N being 0 indicates no immediate syntax-form is allowed.
bool validImm(uint32_t Value) const;
protected:
- const Mapping *Pairs;
- size_t NumPairs;
+ const Mapping *Mappings;
+ size_t NumMappings;
uint32_t TooBigImm;
};
@@ -317,7 +341,7 @@ namespace AArch64AT {
};
struct ATMapper : AArch64NamedImmMapper {
- const static Mapping ATPairs[];
+ const static Mapping ATMappings[];
ATMapper();
};
@@ -341,7 +365,7 @@ namespace AArch64DB {
};
struct DBarrierMapper : AArch64NamedImmMapper {
- const static Mapping DBarrierPairs[];
+ const static Mapping DBarrierMappings[];
DBarrierMapper();
};
@@ -361,7 +385,7 @@ namespace AArch64DC {
};
struct DCMapper : AArch64NamedImmMapper {
- const static Mapping DCPairs[];
+ const static Mapping DCMappings[];
DCMapper();
};
@@ -378,7 +402,7 @@ namespace AArch64IC {
struct ICMapper : AArch64NamedImmMapper {
- const static Mapping ICPairs[];
+ const static Mapping ICMappings[];
ICMapper();
};
@@ -394,7 +418,7 @@ namespace AArch64ISB {
SY = 0xf
};
struct ISBMapper : AArch64NamedImmMapper {
- const static Mapping ISBPairs[];
+ const static Mapping ISBMappings[];
ISBMapper();
};
@@ -424,7 +448,7 @@ namespace AArch64PRFM {
};
struct PRFMMapper : AArch64NamedImmMapper {
- const static Mapping PRFMPairs[];
+ const static Mapping PRFMMappings[];
PRFMMapper();
};
@@ -435,11 +459,14 @@ namespace AArch64PState {
Invalid = -1,
SPSel = 0x05,
DAIFSet = 0x1e,
- DAIFClr = 0x1f
+ DAIFClr = 0x1f,
+
+ // v8.1a "Privileged Access Never" extension-specific PStates
+ PAN = 0x04,
};
struct PStateMapper : AArch64NamedImmMapper {
- const static Mapping PStatePairs[];
+ const static Mapping PStateMappings[];
PStateMapper();
};
@@ -1122,11 +1149,48 @@ namespace AArch64SysReg {
ICH_LR13_EL2 = 0xe66d, // 11 100 1100 1101 101
ICH_LR14_EL2 = 0xe66e, // 11 100 1100 1101 110
ICH_LR15_EL2 = 0xe66f, // 11 100 1100 1101 111
- };
- // Cyclone specific system registers
- enum CycloneSysRegValues {
- CPM_IOACC_CTL_EL3 = 0xff90
+ // v8.1a "Privileged Access Never" extension-specific system registers
+ PAN = 0xc213, // 11 000 0100 0010 011
+
+ // v8.1a "Limited Ordering Regions" extension-specific system registers
+ LORSA_EL1 = 0xc520, // 11 000 1010 0100 000
+ LOREA_EL1 = 0xc521, // 11 000 1010 0100 001
+ LORN_EL1 = 0xc522, // 11 000 1010 0100 010
+ LORC_EL1 = 0xc523, // 11 000 1010 0100 011
+ LORID_EL1 = 0xc527, // 11 000 1010 0100 111
+
+ // v8.1a "Virtualization host extensions" system registers
+ TTBR1_EL2 = 0xe101, // 11 100 0010 0000 001
+ CONTEXTIDR_EL2 = 0xe681, // 11 100 1101 0000 001
+ CNTHV_TVAL_EL2 = 0xe718, // 11 100 1110 0011 000
+ CNTHV_CVAL_EL2 = 0xe71a, // 11 100 1110 0011 010
+ CNTHV_CTL_EL2 = 0xe719, // 11 100 1110 0011 001
+ SCTLR_EL12 = 0xe880, // 11 101 0001 0000 000
+ CPACR_EL12 = 0xe882, // 11 101 0001 0000 010
+ TTBR0_EL12 = 0xe900, // 11 101 0010 0000 000
+ TTBR1_EL12 = 0xe901, // 11 101 0010 0000 001
+ TCR_EL12 = 0xe902, // 11 101 0010 0000 010
+ AFSR0_EL12 = 0xea88, // 11 101 0101 0001 000
+ AFSR1_EL12 = 0xea89, // 11 101 0101 0001 001
+ ESR_EL12 = 0xea90, // 11 101 0101 0010 000
+ FAR_EL12 = 0xeb00, // 11 101 0110 0000 000
+ MAIR_EL12 = 0xed10, // 11 101 1010 0010 000
+ AMAIR_EL12 = 0xed18, // 11 101 1010 0011 000
+ VBAR_EL12 = 0xee00, // 11 101 1100 0000 000
+ CONTEXTIDR_EL12 = 0xee81, // 11 101 1101 0000 001
+ CNTKCTL_EL12 = 0xef08, // 11 101 1110 0001 000
+ CNTP_TVAL_EL02 = 0xef10, // 11 101 1110 0010 000
+ CNTP_CTL_EL02 = 0xef11, // 11 101 1110 0010 001
+ CNTP_CVAL_EL02 = 0xef12, // 11 101 1110 0010 010
+ CNTV_TVAL_EL02 = 0xef18, // 11 101 1110 0011 000
+ CNTV_CTL_EL02 = 0xef19, // 11 101 1110 0011 001
+ CNTV_CVAL_EL02 = 0xef1a, // 11 101 1110 0011 010
+ SPSR_EL12 = 0xea00, // 11 101 0100 0000 000
+ ELR_EL12 = 0xea01, // 11 101 0100 0000 001
+
+ // Cyclone specific system registers
+ CPM_IOACC_CTL_EL3 = 0xff90,
};
// Note that these do not inherit from AArch64NamedImmMapper. This class is
@@ -1134,26 +1198,25 @@ namespace AArch64SysReg {
// burdening the common AArch64NamedImmMapper with abstractions only needed in
// this one case.
struct SysRegMapper {
- static const AArch64NamedImmMapper::Mapping SysRegPairs[];
- static const AArch64NamedImmMapper::Mapping CycloneSysRegPairs[];
+ static const AArch64NamedImmMapper::Mapping SysRegMappings[];
- const AArch64NamedImmMapper::Mapping *InstPairs;
- size_t NumInstPairs;
- uint64_t FeatureBits;
+ const AArch64NamedImmMapper::Mapping *InstMappings;
+ size_t NumInstMappings;
- SysRegMapper(uint64_t FeatureBits) : FeatureBits(FeatureBits) { }
- uint32_t fromString(StringRef Name, bool &Valid) const;
- std::string toString(uint32_t Bits) const;
+ SysRegMapper() { }
+ uint32_t fromString(StringRef Name, const FeatureBitset& FeatureBits,
+ bool &Valid) const;
+ std::string toString(uint32_t Bits, const FeatureBitset& FeatureBits) const;
};
struct MSRMapper : SysRegMapper {
- static const AArch64NamedImmMapper::Mapping MSRPairs[];
- MSRMapper(uint64_t FeatureBits);
+ static const AArch64NamedImmMapper::Mapping MSRMappings[];
+ MSRMapper();
};
struct MRSMapper : SysRegMapper {
- static const AArch64NamedImmMapper::Mapping MRSPairs[];
- MRSMapper(uint64_t FeatureBits);
+ static const AArch64NamedImmMapper::Mapping MRSMappings[];
+ MRSMapper();
};
uint32_t ParseGenericRegister(StringRef Name, bool &Valid);
@@ -1197,7 +1260,7 @@ namespace AArch64TLBI {
};
struct TLBIMapper : AArch64NamedImmMapper {
- const static Mapping TLBIPairs[];
+ const static Mapping TLBIMappings[];
TLBIMapper();
};
OpenPOWER on IntegriCloud