diff options
author | dim <dim@FreeBSD.org> | 2015-05-27 18:44:32 +0000 |
---|---|---|
committer | dim <dim@FreeBSD.org> | 2015-05-27 18:44:32 +0000 |
commit | 782067d0278612ee75d024b9b135c221c327e9e8 (patch) | |
tree | a6140557876943cdd800ee997c9317283394b22c /lib/Target/AArch64 | |
parent | 6669eceb008a9f13853b330dc0b099d6386fe879 (diff) | |
download | FreeBSD-src-782067d0278612ee75d024b9b135c221c327e9e8.zip FreeBSD-src-782067d0278612ee75d024b9b135c221c327e9e8.tar.gz |
Vendor import of llvm trunk r238337:
https://llvm.org/svn/llvm-project/llvm/trunk@238337
Diffstat (limited to 'lib/Target/AArch64')
64 files changed, 4152 insertions, 2822 deletions
diff --git a/lib/Target/AArch64/AArch64.h b/lib/Target/AArch64/AArch64.h index e96d18b..21106c9 100644 --- a/lib/Target/AArch64/AArch64.h +++ b/lib/Target/AArch64/AArch64.h @@ -40,9 +40,6 @@ FunctionPass *createAArch64ConditionOptimizerPass(); FunctionPass *createAArch64AddressTypePromotionPass(); FunctionPass *createAArch64A57FPLoadBalancing(); FunctionPass *createAArch64A53Fix835769(); -/// \brief Creates an ARM-specific Target Transformation Info pass. -ImmutablePass * -createAArch64TargetTransformInfoPass(const AArch64TargetMachine *TM); FunctionPass *createAArch64CleanupLocalDynamicTLSPass(); diff --git a/lib/Target/AArch64/AArch64.td b/lib/Target/AArch64/AArch64.td index e6a27c3..9a7d6c8 100644 --- a/lib/Target/AArch64/AArch64.td +++ b/lib/Target/AArch64/AArch64.td @@ -41,6 +41,13 @@ def FeatureZCZeroing : SubtargetFeature<"zcz", "HasZeroCycleZeroing", "true", "Has zero-cycle zeroing instructions">; //===----------------------------------------------------------------------===// +// Architectures. +// + +def HasV8_1aOps : SubtargetFeature<"v8.1a", "HasV8_1aOps", "true", + "Support ARM v8.1a instructions", [FeatureCRC]>; + +//===----------------------------------------------------------------------===// // Register File Description //===----------------------------------------------------------------------===// @@ -91,6 +98,8 @@ def : ProcessorModel<"generic", NoSchedModel, [FeatureFPARMv8, def : ProcessorModel<"cortex-a53", CortexA53Model, [ProcA53]>; def : ProcessorModel<"cortex-a57", CortexA57Model, [ProcA57]>; +// FIXME: Cortex-A72 is currently modelled as an Cortex-A57. +def : ProcessorModel<"cortex-a72", CortexA57Model, [ProcA57]>; def : ProcessorModel<"cyclone", CycloneModel, [ProcCyclone]>; //===----------------------------------------------------------------------===// @@ -114,12 +123,14 @@ def AppleAsmParserVariant : AsmParserVariant { // AsmWriter bits get associated with the correct class. def GenericAsmWriter : AsmWriter { string AsmWriterClassName = "InstPrinter"; + int PassSubtarget = 1; int Variant = 0; bit isMCAsmWriter = 1; } def AppleAsmWriter : AsmWriter { let AsmWriterClassName = "AppleInstPrinter"; + int PassSubtarget = 1; int Variant = 1; int isMCAsmWriter = 1; } diff --git a/lib/Target/AArch64/AArch64A53Fix835769.cpp b/lib/Target/AArch64/AArch64A53Fix835769.cpp index 852a635..d7ef3f4 100644 --- a/lib/Target/AArch64/AArch64A53Fix835769.cpp +++ b/lib/Target/AArch64/AArch64A53Fix835769.cpp @@ -16,8 +16,6 @@ //===----------------------------------------------------------------------===// #include "AArch64.h" -#include "AArch64InstrInfo.h" -#include "AArch64Subtarget.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" @@ -26,6 +24,8 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" using namespace llvm; @@ -48,7 +48,7 @@ static bool isFirstInstructionInSequence(MachineInstr *MI) { case AArch64::PRFUMi: return true; default: - return (MI->mayLoad() || MI->mayStore()); + return MI->mayLoadOrStore(); } } @@ -79,7 +79,7 @@ static bool isSecondInstructionInSequence(MachineInstr *MI) { namespace { class AArch64A53Fix835769 : public MachineFunctionPass { - const AArch64InstrInfo *TII; + const TargetInstrInfo *TII; public: static char ID; @@ -107,17 +107,13 @@ char AArch64A53Fix835769::ID = 0; bool AArch64A53Fix835769::runOnMachineFunction(MachineFunction &F) { - const TargetMachine &TM = F.getTarget(); - - bool Changed = false; DEBUG(dbgs() << "***** AArch64A53Fix835769 *****\n"); - - TII = TM.getSubtarget<AArch64Subtarget>().getInstrInfo(); + bool Changed = false; + TII = F.getSubtarget().getInstrInfo(); for (auto &MBB : F) { Changed |= runOnBasicBlock(MBB); } - return Changed; } diff --git a/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp b/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp index dd1a1ea..bffd9e6 100644 --- a/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp +++ b/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp @@ -96,6 +96,10 @@ static bool isMla(MachineInstr *MI) { } } +namespace llvm { +static void initializeAArch64A57FPLoadBalancingPass(PassRegistry &); +} + //===----------------------------------------------------------------------===// namespace { @@ -109,14 +113,15 @@ static const char *ColorNames[2] = { "Even", "Odd" }; class Chain; class AArch64A57FPLoadBalancing : public MachineFunctionPass { - const AArch64InstrInfo *TII; MachineRegisterInfo *MRI; const TargetRegisterInfo *TRI; RegisterClassInfo RCI; public: static char ID; - explicit AArch64A57FPLoadBalancing() : MachineFunctionPass(ID) {} + explicit AArch64A57FPLoadBalancing() : MachineFunctionPass(ID) { + initializeAArch64A57FPLoadBalancingPass(*PassRegistry::getPassRegistry()); + } bool runOnMachineFunction(MachineFunction &F) override; @@ -137,14 +142,22 @@ private: int scavengeRegister(Chain *G, Color C, MachineBasicBlock &MBB); void scanInstruction(MachineInstr *MI, unsigned Idx, std::map<unsigned, Chain*> &Active, - std::set<std::unique_ptr<Chain>> &AllChains); + std::vector<std::unique_ptr<Chain>> &AllChains); void maybeKillChain(MachineOperand &MO, unsigned Idx, std::map<unsigned, Chain*> &RegChains); Color getColor(unsigned Register); Chain *getAndEraseNext(Color PreferredColor, std::vector<Chain*> &L); }; +} + char AArch64A57FPLoadBalancing::ID = 0; +INITIALIZE_PASS_BEGIN(AArch64A57FPLoadBalancing, DEBUG_TYPE, + "AArch64 A57 FP Load-Balancing", false, false) +INITIALIZE_PASS_END(AArch64A57FPLoadBalancing, DEBUG_TYPE, + "AArch64 A57 FP Load-Balancing", false, false) + +namespace { /// A Chain is a sequence of instructions that are linked together by /// an accumulation operand. For example: /// @@ -259,7 +272,7 @@ public: } /// Return true if this chain starts before Other. - bool startsBefore(Chain *Other) { + bool startsBefore(const Chain *Other) const { return StartInstIdx < Other->StartInstIdx; } @@ -274,12 +287,12 @@ public: raw_string_ostream OS(S); OS << "{"; - StartInst->print(OS, NULL, true); + StartInst->print(OS, /* SkipOpers= */true); OS << " -> "; - LastInst->print(OS, NULL, true); + LastInst->print(OS, /* SkipOpers= */true); if (KillInst) { OS << " (kill @ "; - KillInst->print(OS, NULL, true); + KillInst->print(OS, /* SkipOpers= */true); OS << ")"; } OS << "}"; @@ -294,13 +307,16 @@ public: //===----------------------------------------------------------------------===// bool AArch64A57FPLoadBalancing::runOnMachineFunction(MachineFunction &F) { + // Don't do anything if this isn't an A53 or A57. + if (!(F.getSubtarget<AArch64Subtarget>().isCortexA53() || + F.getSubtarget<AArch64Subtarget>().isCortexA57())) + return false; + bool Changed = false; DEBUG(dbgs() << "***** AArch64A57FPLoadBalancing *****\n"); - const TargetMachine &TM = F.getTarget(); MRI = &F.getRegInfo(); TRI = F.getRegInfo().getTargetRegisterInfo(); - TII = TM.getSubtarget<AArch64Subtarget>().getInstrInfo(); RCI.runOnMachineFunction(F); for (auto &MBB : F) { @@ -320,7 +336,7 @@ bool AArch64A57FPLoadBalancing::runOnBasicBlock(MachineBasicBlock &MBB) { // been killed yet. This is keyed by register - all chains can only have one // "link" register between each inst in the chain. std::map<unsigned, Chain*> ActiveChains; - std::set<std::unique_ptr<Chain>> AllChains; + std::vector<std::unique_ptr<Chain>> AllChains; unsigned Idx = 0; for (auto &MI : MBB) scanInstruction(&MI, Idx++, ActiveChains, AllChains); @@ -431,10 +447,17 @@ bool AArch64A57FPLoadBalancing::colorChainSet(std::vector<Chain*> GV, // chains that we cannot change before we look at those we can, // so the parity counter is updated and we know what color we should // change them to! + // Final tie-break with instruction order so pass output is stable (i.e. not + // dependent on malloc'd pointer values). std::sort(GV.begin(), GV.end(), [](const Chain *G1, const Chain *G2) { if (G1->size() != G2->size()) return G1->size() > G2->size(); - return G1->requiresFixup() > G2->requiresFixup(); + if (G1->requiresFixup() != G2->requiresFixup()) + return G1->requiresFixup() > G2->requiresFixup(); + // Make sure startsBefore() produces a stable final order. + assert((G1 == G2 || (G1->startsBefore(G2) ^ G2->startsBefore(G1))) && + "Starts before not total order!"); + return G1->startsBefore(G2); }); Color PreferredColor = Parity < 0 ? Color::Even : Color::Odd; @@ -580,10 +603,9 @@ bool AArch64A57FPLoadBalancing::colorChain(Chain *G, Color C, return Changed; } -void AArch64A57FPLoadBalancing:: -scanInstruction(MachineInstr *MI, unsigned Idx, - std::map<unsigned, Chain*> &ActiveChains, - std::set<std::unique_ptr<Chain>> &AllChains) { +void AArch64A57FPLoadBalancing::scanInstruction( + MachineInstr *MI, unsigned Idx, std::map<unsigned, Chain *> &ActiveChains, + std::vector<std::unique_ptr<Chain>> &AllChains) { // Inspect "MI", updating ActiveChains and AllChains. if (isMul(MI)) { @@ -602,7 +624,7 @@ scanInstruction(MachineInstr *MI, unsigned Idx, auto G = llvm::make_unique<Chain>(MI, Idx, getColor(DestReg)); ActiveChains[DestReg] = G.get(); - AllChains.insert(std::move(G)); + AllChains.push_back(std::move(G)); } else if (isMla(MI)) { @@ -646,7 +668,7 @@ scanInstruction(MachineInstr *MI, unsigned Idx, << TRI->getName(DestReg) << "\n"); auto G = llvm::make_unique<Chain>(MI, Idx, getColor(DestReg)); ActiveChains[DestReg] = G.get(); - AllChains.insert(std::move(G)); + AllChains.push_back(std::move(G)); } else { diff --git a/lib/Target/AArch64/AArch64AddressTypePromotion.cpp b/lib/Target/AArch64/AArch64AddressTypePromotion.cpp index 287989f..716e1a3 100644 --- a/lib/Target/AArch64/AArch64AddressTypePromotion.cpp +++ b/lib/Target/AArch64/AArch64AddressTypePromotion.cpp @@ -41,6 +41,7 @@ #include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; diff --git a/lib/Target/AArch64/AArch64AdvSIMDScalarPass.cpp b/lib/Target/AArch64/AArch64AdvSIMDScalarPass.cpp index 5afe0f4..18d21fd 100644 --- a/lib/Target/AArch64/AArch64AdvSIMDScalarPass.cpp +++ b/lib/Target/AArch64/AArch64AdvSIMDScalarPass.cpp @@ -64,7 +64,7 @@ STATISTIC(NumCopiesInserted, "Number of cross-class copies inserted"); namespace { class AArch64AdvSIMDScalar : public MachineFunctionPass { MachineRegisterInfo *MRI; - const AArch64InstrInfo *TII; + const TargetInstrInfo *TII; private: // isProfitableToTransform - Predicate function to determine whether an @@ -158,7 +158,7 @@ static unsigned getSrcFromCopy(const MachineInstr *MI, // getTransformOpcode - For any opcode for which there is an AdvSIMD equivalent // that we're considering transforming to, return that AdvSIMD opcode. For all // others, return the original opcode. -static int getTransformOpcode(unsigned Opc) { +static unsigned getTransformOpcode(unsigned Opc) { switch (Opc) { default: break; @@ -179,7 +179,7 @@ static int getTransformOpcode(unsigned Opc) { } static bool isTransformable(const MachineInstr *MI) { - int Opc = MI->getOpcode(); + unsigned Opc = MI->getOpcode(); return Opc != getTransformOpcode(Opc); } @@ -268,7 +268,7 @@ AArch64AdvSIMDScalar::isProfitableToTransform(const MachineInstr *MI) const { return TransformAll; } -static MachineInstr *insertCopy(const AArch64InstrInfo *TII, MachineInstr *MI, +static MachineInstr *insertCopy(const TargetInstrInfo *TII, MachineInstr *MI, unsigned Dst, unsigned Src, bool IsKill) { MachineInstrBuilder MIB = BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), TII->get(AArch64::COPY), @@ -286,8 +286,8 @@ void AArch64AdvSIMDScalar::transformInstruction(MachineInstr *MI) { DEBUG(dbgs() << "Scalar transform: " << *MI); MachineBasicBlock *MBB = MI->getParent(); - int OldOpc = MI->getOpcode(); - int NewOpc = getTransformOpcode(OldOpc); + unsigned OldOpc = MI->getOpcode(); + unsigned NewOpc = getTransformOpcode(OldOpc); assert(OldOpc != NewOpc && "transform an instruction to itself?!"); // Check if we need a copy for the source registers. @@ -376,10 +376,8 @@ bool AArch64AdvSIMDScalar::runOnMachineFunction(MachineFunction &mf) { bool Changed = false; DEBUG(dbgs() << "***** AArch64AdvSIMDScalar *****\n"); - const TargetMachine &TM = mf.getTarget(); MRI = &mf.getRegInfo(); - TII = static_cast<const AArch64InstrInfo *>( - TM.getSubtargetImpl()->getInstrInfo()); + TII = mf.getSubtarget().getInstrInfo(); // Just check things on a one-block-at-a-time basis. for (MachineFunction::iterator I = mf.begin(), E = mf.end(); I != E; ++I) diff --git a/lib/Target/AArch64/AArch64AsmPrinter.cpp b/lib/Target/AArch64/AArch64AsmPrinter.cpp index 5159dbf..a0a09e4 100644 --- a/lib/Target/AArch64/AArch64AsmPrinter.cpp +++ b/lib/Target/AArch64/AArch64AsmPrinter.cpp @@ -13,13 +13,13 @@ //===----------------------------------------------------------------------===// #include "MCTargetDesc/AArch64AddressingModes.h" -#include "MCTargetDesc/AArch64MCExpr.h" #include "AArch64.h" #include "AArch64MCInstLower.h" #include "AArch64MachineFunctionInfo.h" #include "AArch64RegisterInfo.h" #include "AArch64Subtarget.h" #include "InstPrinter/AArch64InstPrinter.h" +#include "MCTargetDesc/AArch64MCExpr.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/Twine.h" @@ -36,8 +36,10 @@ #include "llvm/MC/MCInstBuilder.h" #include "llvm/MC/MCLinkerOptimizationHint.h" #include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSymbol.h" #include "llvm/Support/Debug.h" #include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; #define DEBUG_TYPE "asm-printer" @@ -45,19 +47,13 @@ using namespace llvm; namespace { class AArch64AsmPrinter : public AsmPrinter { - /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can - /// make the right decision when printing asm code for different targets. - const AArch64Subtarget *Subtarget; - AArch64MCInstLower MCInstLowering; StackMaps SM; public: - AArch64AsmPrinter(TargetMachine &TM, MCStreamer &Streamer) - : AsmPrinter(TM, Streamer), - Subtarget(&TM.getSubtarget<AArch64Subtarget>()), - MCInstLowering(OutContext, *this), SM(*this), AArch64FI(nullptr), - LOHLabelCounter(0) {} + AArch64AsmPrinter(TargetMachine &TM, std::unique_ptr<MCStreamer> Streamer) + : AsmPrinter(TM, std::move(Streamer)), MCInstLowering(OutContext, *this), + SM(*this), AArch64FI(nullptr) {} const char *getPassName() const override { return "AArch64 Assembly Printer"; @@ -118,7 +114,6 @@ private: typedef std::map<const MachineInstr *, MCSymbol *> MInstToMCSymbol; MInstToMCSymbol LOHInstToLabel; - unsigned LOHLabelCounter; }; } // end of anonymous namespace @@ -126,38 +121,16 @@ private: //===----------------------------------------------------------------------===// void AArch64AsmPrinter::EmitEndOfAsmFile(Module &M) { - if (Subtarget->isTargetMachO()) { + Triple TT(TM.getTargetTriple()); + if (TT.isOSBinFormatMachO()) { // Funny Darwin hack: This flag tells the linker that no global symbols // contain code that falls through to other global symbols (e.g. the obvious // implementation of multiple entry points). If this doesn't occur, the // linker can safely perform dead code stripping. Since LLVM never // generates code that does this, it is always safe to set. - OutStreamer.EmitAssemblerFlag(MCAF_SubsectionsViaSymbols); + OutStreamer->EmitAssemblerFlag(MCAF_SubsectionsViaSymbols); SM.serializeToStackMapSection(); } - - // Emit a .data.rel section containing any stubs that were created. - if (Subtarget->isTargetELF()) { - const TargetLoweringObjectFileELF &TLOFELF = - static_cast<const TargetLoweringObjectFileELF &>(getObjFileLowering()); - - MachineModuleInfoELF &MMIELF = MMI->getObjFileInfo<MachineModuleInfoELF>(); - - // Output stubs for external and common global variables. - MachineModuleInfoELF::SymbolListTy Stubs = MMIELF.GetGVStubList(); - if (!Stubs.empty()) { - OutStreamer.SwitchSection(TLOFELF.getDataRelSection()); - const DataLayout *TD = TM.getSubtargetImpl()->getDataLayout(); - - for (unsigned i = 0, e = Stubs.size(); i != e; ++i) { - OutStreamer.EmitLabel(Stubs[i].first); - OutStreamer.EmitSymbolValue(Stubs[i].second.getPointer(), - TD->getPointerSize(0)); - } - Stubs.clear(); - } - } - } MachineLocation @@ -183,7 +156,7 @@ void AArch64AsmPrinter::EmitLOHs() { "Label hasn't been inserted for LOH related instruction"); MCArgs.push_back(LabelIt->second); } - OutStreamer.EmitLOHDirective(D.getKind(), MCArgs); + OutStreamer->EmitLOHDirective(D.getKind(), MCArgs); MCArgs.clear(); } } @@ -199,11 +172,11 @@ MCSymbol *AArch64AsmPrinter::GetCPISymbol(unsigned CPID) const { // avoid addends on the relocation?), ELF has no such concept and // uses a normal private symbol. if (getDataLayout().getLinkerPrivateGlobalPrefix()[0]) - return OutContext.GetOrCreateSymbol( + return OutContext.getOrCreateSymbol( Twine(getDataLayout().getLinkerPrivateGlobalPrefix()) + "CPI" + Twine(getFunctionNumber()) + "_" + Twine(CPID)); - return OutContext.GetOrCreateSymbol( + return OutContext.getOrCreateSymbol( Twine(getDataLayout().getPrivateGlobalPrefix()) + "CPI" + Twine(getFunctionNumber()) + "_" + Twine(CPID)); } @@ -226,6 +199,17 @@ void AArch64AsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNum, O << '#' << Imm; break; } + case MachineOperand::MO_GlobalAddress: { + const GlobalValue *GV = MO.getGlobal(); + MCSymbol *Sym = getSymbol(GV); + + // FIXME: Can we get anything other than a plain symbol here? + assert(!MO.getTargetFlags() && "Unknown operand target flag!"); + + O << *Sym; + printOffset(MO.getOffset(), O); + break; + } } } @@ -254,8 +238,8 @@ bool AArch64AsmPrinter::printAsmRegInClass(const MachineOperand &MO, const TargetRegisterClass *RC, bool isVector, raw_ostream &O) { assert(MO.isReg() && "Should only get here with a register!"); - const AArch64RegisterInfo *RI = static_cast<const AArch64RegisterInfo *>( - TM.getSubtargetImpl()->getRegisterInfo()); + const AArch64RegisterInfo *RI = + MF->getSubtarget<AArch64Subtarget>().getRegisterInfo(); unsigned Reg = MO.getReg(); unsigned RegToPrint = RC->getRegister(RI->getEncodingValue(Reg)); assert(RI->regsOverlap(RegToPrint, Reg)); @@ -364,8 +348,8 @@ void AArch64AsmPrinter::PrintDebugValueComment(const MachineInstr *MI, assert(NOps == 4); OS << '\t' << MAI->getCommentString() << "DEBUG_VALUE: "; // cast away const; DIetc do not take const operands for some reason. - DIVariable V(const_cast<MDNode *>(MI->getOperand(NOps - 1).getMetadata())); - OS << V.getName(); + OS << cast<DILocalVariable>(MI->getOperand(NOps - 2).getMetadata()) + ->getName(); OS << " <- "; // Frame address. Currently handles register +- offset only. assert(MI->getOperand(0).isReg() && MI->getOperand(1).isImm()); @@ -452,15 +436,15 @@ void AArch64AsmPrinter::LowerPATCHPOINT(MCStreamer &OutStreamer, StackMaps &SM, void AArch64AsmPrinter::EmitInstruction(const MachineInstr *MI) { // Do any auto-generated pseudo lowerings. - if (emitPseudoExpansionLowering(OutStreamer, MI)) + if (emitPseudoExpansionLowering(*OutStreamer, MI)) return; if (AArch64FI->getLOHRelated().count(MI)) { // Generate a label for LOH related instruction - MCSymbol *LOHLabel = GetTempSymbol("loh", LOHLabelCounter++); + MCSymbol *LOHLabel = createTempSymbol("loh"); // Associate the instruction with the label LOHInstToLabel[MI] = LOHLabel; - OutStreamer.EmitLabel(LOHLabel); + OutStreamer->EmitLabel(LOHLabel); } // Do any manual lowerings. @@ -468,11 +452,11 @@ void AArch64AsmPrinter::EmitInstruction(const MachineInstr *MI) { default: break; case AArch64::DBG_VALUE: { - if (isVerbose() && OutStreamer.hasRawTextSupport()) { + if (isVerbose() && OutStreamer->hasRawTextSupport()) { SmallString<128> TmpStr; raw_svector_ostream OS(TmpStr); PrintDebugValueComment(MI, OS); - OutStreamer.EmitRawText(StringRef(OS.str())); + OutStreamer->EmitRawText(StringRef(OS.str())); } return; } @@ -483,8 +467,8 @@ void AArch64AsmPrinter::EmitInstruction(const MachineInstr *MI) { case AArch64::TCRETURNri: { MCInst TmpInst; TmpInst.setOpcode(AArch64::BR); - TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg())); - EmitToStreamer(OutStreamer, TmpInst); + TmpInst.addOperand(MCOperand::createReg(MI->getOperand(0).getReg())); + EmitToStreamer(*OutStreamer, TmpInst); return; } case AArch64::TCRETURNdi: { @@ -493,7 +477,7 @@ void AArch64AsmPrinter::EmitInstruction(const MachineInstr *MI) { MCInst TmpInst; TmpInst.setOpcode(AArch64::B); TmpInst.addOperand(Dest); - EmitToStreamer(OutStreamer, TmpInst); + EmitToStreamer(*OutStreamer, TmpInst); return; } case AArch64::TLSDESC_CALLSEQ: { @@ -516,52 +500,52 @@ void AArch64AsmPrinter::EmitInstruction(const MachineInstr *MI) { MCInst Adrp; Adrp.setOpcode(AArch64::ADRP); - Adrp.addOperand(MCOperand::CreateReg(AArch64::X0)); + Adrp.addOperand(MCOperand::createReg(AArch64::X0)); Adrp.addOperand(SymTLSDesc); - EmitToStreamer(OutStreamer, Adrp); + EmitToStreamer(*OutStreamer, Adrp); MCInst Ldr; Ldr.setOpcode(AArch64::LDRXui); - Ldr.addOperand(MCOperand::CreateReg(AArch64::X1)); - Ldr.addOperand(MCOperand::CreateReg(AArch64::X0)); + Ldr.addOperand(MCOperand::createReg(AArch64::X1)); + Ldr.addOperand(MCOperand::createReg(AArch64::X0)); Ldr.addOperand(SymTLSDescLo12); - Ldr.addOperand(MCOperand::CreateImm(0)); - EmitToStreamer(OutStreamer, Ldr); + Ldr.addOperand(MCOperand::createImm(0)); + EmitToStreamer(*OutStreamer, Ldr); MCInst Add; Add.setOpcode(AArch64::ADDXri); - Add.addOperand(MCOperand::CreateReg(AArch64::X0)); - Add.addOperand(MCOperand::CreateReg(AArch64::X0)); + Add.addOperand(MCOperand::createReg(AArch64::X0)); + Add.addOperand(MCOperand::createReg(AArch64::X0)); Add.addOperand(SymTLSDescLo12); - Add.addOperand(MCOperand::CreateImm(AArch64_AM::getShiftValue(0))); - EmitToStreamer(OutStreamer, Add); + Add.addOperand(MCOperand::createImm(AArch64_AM::getShiftValue(0))); + EmitToStreamer(*OutStreamer, Add); // Emit a relocation-annotation. This expands to no code, but requests // the following instruction gets an R_AARCH64_TLSDESC_CALL. MCInst TLSDescCall; TLSDescCall.setOpcode(AArch64::TLSDESCCALL); TLSDescCall.addOperand(Sym); - EmitToStreamer(OutStreamer, TLSDescCall); + EmitToStreamer(*OutStreamer, TLSDescCall); MCInst Blr; Blr.setOpcode(AArch64::BLR); - Blr.addOperand(MCOperand::CreateReg(AArch64::X1)); - EmitToStreamer(OutStreamer, Blr); + Blr.addOperand(MCOperand::createReg(AArch64::X1)); + EmitToStreamer(*OutStreamer, Blr); return; } case TargetOpcode::STACKMAP: - return LowerSTACKMAP(OutStreamer, SM, *MI); + return LowerSTACKMAP(*OutStreamer, SM, *MI); case TargetOpcode::PATCHPOINT: - return LowerPATCHPOINT(OutStreamer, SM, *MI); + return LowerPATCHPOINT(*OutStreamer, SM, *MI); } // Finally, do the automated lowerings for everything else. MCInst TmpInst; MCInstLowering.Lower(MI, TmpInst); - EmitToStreamer(OutStreamer, TmpInst); + EmitToStreamer(*OutStreamer, TmpInst); } // Force static initialization. diff --git a/lib/Target/AArch64/AArch64BranchRelaxation.cpp b/lib/Target/AArch64/AArch64BranchRelaxation.cpp index e2b6367..d973234 100644 --- a/lib/Target/AArch64/AArch64BranchRelaxation.cpp +++ b/lib/Target/AArch64/AArch64BranchRelaxation.cpp @@ -476,9 +476,7 @@ bool AArch64BranchRelaxation::runOnMachineFunction(MachineFunction &mf) { DEBUG(dbgs() << "***** AArch64BranchRelaxation *****\n"); - TII = (const AArch64InstrInfo *)MF->getTarget() - .getSubtargetImpl() - ->getInstrInfo(); + TII = (const AArch64InstrInfo *)MF->getSubtarget().getInstrInfo(); // Renumber all of the machine basic blocks in the function, guaranteeing that // the numbers agree with the position of the block in the function. diff --git a/lib/Target/AArch64/AArch64CallingConvention.h b/lib/Target/AArch64/AArch64CallingConvention.h index baf80bc..1e2d1c3 100644 --- a/lib/Target/AArch64/AArch64CallingConvention.h +++ b/lib/Target/AArch64/AArch64CallingConvention.h @@ -46,7 +46,7 @@ static bool finishStackBlock(SmallVectorImpl<CCValAssign> &PendingMembers, CCState &State, unsigned SlotAlign) { unsigned Size = LocVT.getSizeInBits() / 8; unsigned StackAlign = State.getMachineFunction() - .getSubtarget() + .getTarget() .getDataLayout() ->getStackAlignment(); unsigned Align = std::min(ArgFlags.getOrigAlign(), StackAlign); diff --git a/lib/Target/AArch64/AArch64CallingConvention.td b/lib/Target/AArch64/AArch64CallingConvention.td index a391e76..4691e94 100644 --- a/lib/Target/AArch64/AArch64CallingConvention.td +++ b/lib/Target/AArch64/AArch64CallingConvention.td @@ -16,7 +16,7 @@ class CCIfAlign<string Align, CCAction A> : CCIf<!strconcat("ArgFlags.getOrigAlign() == ", Align), A>; /// CCIfBigEndian - Match only if we're in big endian mode. class CCIfBigEndian<CCAction A> : - CCIf<"State.getMachineFunction().getSubtarget().getDataLayout()->isBigEndian()", A>; + CCIf<"State.getMachineFunction().getTarget().getDataLayout()->isBigEndian()", A>; //===----------------------------------------------------------------------===// // ARM AAPCS64 Calling Convention diff --git a/lib/Target/AArch64/AArch64CleanupLocalDynamicTLSPass.cpp b/lib/Target/AArch64/AArch64CleanupLocalDynamicTLSPass.cpp index ba4fc3b..06ff9af 100644 --- a/lib/Target/AArch64/AArch64CleanupLocalDynamicTLSPass.cpp +++ b/lib/Target/AArch64/AArch64CleanupLocalDynamicTLSPass.cpp @@ -92,9 +92,7 @@ struct LDTLSCleanup : public MachineFunctionPass { MachineInstr *replaceTLSBaseAddrCall(MachineInstr *I, unsigned TLSBaseAddrReg) { MachineFunction *MF = I->getParent()->getParent(); - const AArch64TargetMachine *TM = - static_cast<const AArch64TargetMachine *>(&MF->getTarget()); - const AArch64InstrInfo *TII = TM->getSubtargetImpl()->getInstrInfo(); + const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); // Insert a Copy from TLSBaseAddrReg to x0, which is where the rest of the // code sequence assumes the address will be. @@ -112,9 +110,7 @@ struct LDTLSCleanup : public MachineFunctionPass { // inserting a copy instruction after I. Returns the new instruction. MachineInstr *setRegister(MachineInstr *I, unsigned *TLSBaseAddrReg) { MachineFunction *MF = I->getParent()->getParent(); - const AArch64TargetMachine *TM = - static_cast<const AArch64TargetMachine *>(&MF->getTarget()); - const AArch64InstrInfo *TII = TM->getSubtargetImpl()->getInstrInfo(); + const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); // Create a virtual register for the TLS base address. MachineRegisterInfo &RegInfo = MF->getRegInfo(); diff --git a/lib/Target/AArch64/AArch64CollectLOH.cpp b/lib/Target/AArch64/AArch64CollectLOH.cpp index 87b545b..efdb2e3 100644 --- a/lib/Target/AArch64/AArch64CollectLOH.cpp +++ b/lib/Target/AArch64/AArch64CollectLOH.cpp @@ -279,18 +279,16 @@ static const SetOfMachineInstr *getUses(const InstrToInstrs *sets, unsigned reg, /// definition. It also consider definitions of ADRP instructions as uses and /// ignore other uses. The ADRPMode is used to collect the information for LHO /// that involve ADRP operation only. -static void initReachingDef(MachineFunction &MF, +static void initReachingDef(const MachineFunction &MF, InstrToInstrs *ColorOpToReachedUses, BlockToInstrPerColor &Gen, BlockToRegSet &Kill, BlockToSetOfInstrsPerColor &ReachableUses, const MapRegToId &RegToId, const MachineInstr *DummyOp, bool ADRPMode) { - const TargetMachine &TM = MF.getTarget(); - const TargetRegisterInfo *TRI = TM.getSubtargetImpl()->getRegisterInfo(); - + const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); unsigned NbReg = RegToId.size(); - for (MachineBasicBlock &MBB : MF) { + for (const MachineBasicBlock &MBB : MF) { auto &BBGen = Gen[&MBB]; BBGen = make_unique<const MachineInstr *[]>(NbReg); std::fill(BBGen.get(), BBGen.get() + NbReg, nullptr); @@ -330,7 +328,7 @@ static void initReachingDef(MachineFunction &MF, const uint32_t *PreservedRegs = MO.getRegMask(); // Set generated regs. - for (const auto Entry : RegToId) { + for (const auto &Entry : RegToId) { unsigned Reg = Entry.second; // Use the global register ID when querying APIs external to this // pass. @@ -384,7 +382,7 @@ static void initReachingDef(MachineFunction &MF, /// op.reachedUses /// /// Out[bb] = Gen[bb] U (In[bb] - Kill[bb]) -static void reachingDefAlgorithm(MachineFunction &MF, +static void reachingDefAlgorithm(const MachineFunction &MF, InstrToInstrs *ColorOpToReachedUses, BlockToSetOfInstrsPerColor &In, BlockToSetOfInstrsPerColor &Out, @@ -394,7 +392,7 @@ static void reachingDefAlgorithm(MachineFunction &MF, bool HasChanged; do { HasChanged = false; - for (MachineBasicBlock &MBB : MF) { + for (const MachineBasicBlock &MBB : MF) { unsigned CurReg; for (CurReg = 0; CurReg < NbReg; ++CurReg) { SetOfMachineInstr &BBInSet = getSet(In, MBB, CurReg, NbReg); @@ -403,7 +401,7 @@ static void reachingDefAlgorithm(MachineFunction &MF, SetOfMachineInstr &BBOutSet = getSet(Out, MBB, CurReg, NbReg); unsigned Size = BBOutSet.size(); // In[bb][color] = U Out[bb.predecessors][color] - for (MachineBasicBlock *PredMBB : MBB.predecessors()) { + for (const MachineBasicBlock *PredMBB : MBB.predecessors()) { SetOfMachineInstr &PredOutSet = getSet(Out, *PredMBB, CurReg, NbReg); BBInSet.insert(PredOutSet.begin(), PredOutSet.end()); } @@ -435,7 +433,7 @@ static void reachingDefAlgorithm(MachineFunction &MF, /// @p DummyOp. /// \pre ColorOpToReachedUses is an array of at least number of registers of /// InstrToInstrs. -static void reachingDef(MachineFunction &MF, +static void reachingDef(const MachineFunction &MF, InstrToInstrs *ColorOpToReachedUses, const MapRegToId &RegToId, bool ADRPMode = false, const MachineInstr *DummyOp = nullptr) { @@ -985,7 +983,7 @@ static void computeOthers(const InstrToInstrs &UseToDefs, /// Look for every register defined by potential LOHs candidates. /// Map these registers with dense id in @p RegToId and vice-versa in /// @p IdToReg. @p IdToReg is populated only in DEBUG mode. -static void collectInvolvedReg(MachineFunction &MF, MapRegToId &RegToId, +static void collectInvolvedReg(const MachineFunction &MF, MapRegToId &RegToId, MapIdToReg &IdToReg, const TargetRegisterInfo *TRI) { unsigned CurRegId = 0; @@ -1026,8 +1024,7 @@ static void collectInvolvedReg(MachineFunction &MF, MapRegToId &RegToId, } bool AArch64CollectLOH::runOnMachineFunction(MachineFunction &MF) { - const TargetMachine &TM = MF.getTarget(); - const TargetRegisterInfo *TRI = TM.getSubtargetImpl()->getRegisterInfo(); + const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); const MachineDominatorTree *MDT = &getAnalysis<MachineDominatorTree>(); MapRegToId RegToId; @@ -1043,8 +1040,7 @@ bool AArch64CollectLOH::runOnMachineFunction(MachineFunction &MF) { MachineInstr *DummyOp = nullptr; if (BasicBlockScopeOnly) { - const AArch64InstrInfo *TII = static_cast<const AArch64InstrInfo *>( - TM.getSubtargetImpl()->getInstrInfo()); + const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); // For local analysis, create a dummy operation to record uses that are not // local. DummyOp = MF.CreateMachineInstr(TII->get(AArch64::COPY), DebugLoc()); diff --git a/lib/Target/AArch64/AArch64ConditionOptimizer.cpp b/lib/Target/AArch64/AArch64ConditionOptimizer.cpp index 0fbd3c6..b9e41c6 100644 --- a/lib/Target/AArch64/AArch64ConditionOptimizer.cpp +++ b/lib/Target/AArch64/AArch64ConditionOptimizer.cpp @@ -67,6 +67,7 @@ #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" @@ -86,11 +87,12 @@ namespace { class AArch64ConditionOptimizer : public MachineFunctionPass { const TargetInstrInfo *TII; MachineDominatorTree *DomTree; + const MachineRegisterInfo *MRI; public: // Stores immediate, compare instruction opcode and branch condition (in this // order) of adjusted comparison. - typedef std::tuple<int, int, AArch64CC::CondCode> CmpInfo; + typedef std::tuple<int, unsigned, AArch64CC::CondCode> CmpInfo; static char ID; AArch64ConditionOptimizer() : MachineFunctionPass(ID) {} @@ -116,7 +118,6 @@ void initializeAArch64ConditionOptimizerPass(PassRegistry &); INITIALIZE_PASS_BEGIN(AArch64ConditionOptimizer, "aarch64-condopt", "AArch64 CondOpt Pass", false, false) INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) -INITIALIZE_PASS_DEPENDENCY(LiveIntervals) INITIALIZE_PASS_END(AArch64ConditionOptimizer, "aarch64-condopt", "AArch64 CondOpt Pass", false, false) @@ -127,8 +128,6 @@ FunctionPass *llvm::createAArch64ConditionOptimizerPass() { void AArch64ConditionOptimizer::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired<MachineDominatorTree>(); AU.addPreserved<MachineDominatorTree>(); - AU.addRequired<LiveIntervals>(); - AU.addPreserved<LiveIntervals>(); MachineFunctionPass::getAnalysisUsage(AU); } @@ -155,7 +154,7 @@ MachineInstr *AArch64ConditionOptimizer::findSuitableCompare( // cmn is an alias for adds with a dead destination register. case AArch64::ADDSWri: case AArch64::ADDSXri: - if (I->getOperand(0).isDead()) + if (MRI->use_empty(I->getOperand(0).getReg())) return I; DEBUG(dbgs() << "Destination of cmp is not dead, " << *I << '\n'); @@ -216,7 +215,7 @@ static AArch64CC::CondCode getAdjustedCmp(AArch64CC::CondCode Cmp) { // operator and condition code. AArch64ConditionOptimizer::CmpInfo AArch64ConditionOptimizer::adjustCmp( MachineInstr *CmpMI, AArch64CC::CondCode Cmp) { - int Opc = CmpMI->getOpcode(); + unsigned Opc = CmpMI->getOpcode(); // CMN (compare with negative immediate) is an alias to ADDS (as // "operand - negative" == "operand + positive") @@ -245,7 +244,7 @@ AArch64ConditionOptimizer::CmpInfo AArch64ConditionOptimizer::adjustCmp( void AArch64ConditionOptimizer::modifyCmp(MachineInstr *CmpMI, const CmpInfo &Info) { int Imm; - int Opc; + unsigned Opc; AArch64CC::CondCode Cmp; std::tie(Imm, Opc, Cmp) = Info; @@ -304,8 +303,9 @@ bool AArch64ConditionOptimizer::adjustTo(MachineInstr *CmpMI, bool AArch64ConditionOptimizer::runOnMachineFunction(MachineFunction &MF) { DEBUG(dbgs() << "********** AArch64 Conditional Compares **********\n" << "********** Function: " << MF.getName() << '\n'); - TII = MF.getTarget().getSubtargetImpl()->getInstrInfo(); + TII = MF.getSubtarget().getInstrInfo(); DomTree = &getAnalysis<MachineDominatorTree>(); + MRI = &MF.getRegInfo(); bool Changed = false; diff --git a/lib/Target/AArch64/AArch64ConditionalCompares.cpp b/lib/Target/AArch64/AArch64ConditionalCompares.cpp index 54f53dc..2b0c92f 100644 --- a/lib/Target/AArch64/AArch64ConditionalCompares.cpp +++ b/lib/Target/AArch64/AArch64ConditionalCompares.cpp @@ -416,7 +416,7 @@ bool SSACCmpConv::canSpeculateInstrs(MachineBasicBlock *MBB, // We never speculate stores, so an AA pointer isn't necessary. bool DontMoveAcrossStore = true; - if (!I.isSafeToMove(TII, nullptr, DontMoveAcrossStore)) { + if (!I.isSafeToMove(nullptr, DontMoveAcrossStore)) { DEBUG(dbgs() << "Can't speculate: " << I); return false; } @@ -893,15 +893,13 @@ bool AArch64ConditionalCompares::runOnMachineFunction(MachineFunction &MF) { << "********** Function: " << MF.getName() << '\n'); TII = MF.getSubtarget().getInstrInfo(); TRI = MF.getSubtarget().getRegisterInfo(); - SchedModel = - MF.getTarget().getSubtarget<TargetSubtargetInfo>().getSchedModel(); + SchedModel = MF.getSubtarget().getSchedModel(); MRI = &MF.getRegInfo(); DomTree = &getAnalysis<MachineDominatorTree>(); Loops = getAnalysisIfAvailable<MachineLoopInfo>(); Traces = &getAnalysis<MachineTraceMetrics>(); MinInstr = nullptr; - MinSize = MF.getFunction()->getAttributes().hasAttribute( - AttributeSet::FunctionIndex, Attribute::MinSize); + MinSize = MF.getFunction()->hasFnAttribute(Attribute::MinSize); bool Changed = false; CmpConv.runOnMachineFunction(MF); diff --git a/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp b/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp index c850680..c2470f7 100644 --- a/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp +++ b/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp @@ -229,7 +229,7 @@ static bool isStartChunk(uint64_t Chunk) { if (Chunk == 0 || Chunk == UINT64_MAX) return false; - return (CountLeadingOnes_64(Chunk) + countTrailingZeros(Chunk)) == 64; + return isMask_64(~Chunk); } /// \brief Check whether this chunk matches the pattern '0...1...' This pattern @@ -239,7 +239,7 @@ static bool isEndChunk(uint64_t Chunk) { if (Chunk == 0 || Chunk == UINT64_MAX) return false; - return (countLeadingZeros(Chunk) + CountTrailingOnes_64(Chunk)) == 64; + return isMask_64(Chunk); } /// \brief Clear or set all bits in the chunk at the given index. @@ -698,12 +698,15 @@ bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB, return expandMOVImm(MBB, MBBI, 32); case AArch64::MOVi64imm: return expandMOVImm(MBB, MBBI, 64); - case AArch64::RET_ReallyLR: - BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::RET)) - .addReg(AArch64::LR); + case AArch64::RET_ReallyLR: { + MachineInstrBuilder MIB = + BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::RET)) + .addReg(AArch64::LR); + transferImpOps(MI, MIB, MIB); MI.eraseFromParent(); return true; } + } return false; } diff --git a/lib/Target/AArch64/AArch64FastISel.cpp b/lib/Target/AArch64/AArch64FastISel.cpp index ca4e97b..9977e2b 100644 --- a/lib/Target/AArch64/AArch64FastISel.cpp +++ b/lib/Target/AArch64/AArch64FastISel.cpp @@ -245,9 +245,10 @@ public: unsigned fastMaterializeFloatZero(const ConstantFP* CF) override; explicit AArch64FastISel(FunctionLoweringInfo &FuncInfo, - const TargetLibraryInfo *LibInfo) + const TargetLibraryInfo *LibInfo) : FastISel(FuncInfo, LibInfo, /*SkipTargetIndependentISel=*/true) { - Subtarget = &TM.getSubtarget<AArch64Subtarget>(); + Subtarget = + &static_cast<const AArch64Subtarget &>(FuncInfo.MF->getSubtarget()); Context = &FuncInfo.Fn->getContext(); } @@ -663,20 +664,22 @@ bool AArch64FastISel::computeAddress(const Value *Obj, Address &Addr, Type *Ty) Addr.setExtendType(AArch64_AM::LSL); const Value *Src = U->getOperand(0); - if (const auto *I = dyn_cast<Instruction>(Src)) - if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) - Src = I; - - // Fold the zext or sext when it won't become a noop. - if (const auto *ZE = dyn_cast<ZExtInst>(Src)) { - if (!isIntExtFree(ZE) && ZE->getOperand(0)->getType()->isIntegerTy(32)) { - Addr.setExtendType(AArch64_AM::UXTW); - Src = ZE->getOperand(0); - } - } else if (const auto *SE = dyn_cast<SExtInst>(Src)) { - if (!isIntExtFree(SE) && SE->getOperand(0)->getType()->isIntegerTy(32)) { - Addr.setExtendType(AArch64_AM::SXTW); - Src = SE->getOperand(0); + if (const auto *I = dyn_cast<Instruction>(Src)) { + if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) { + // Fold the zext or sext when it won't become a noop. + if (const auto *ZE = dyn_cast<ZExtInst>(I)) { + if (!isIntExtFree(ZE) && + ZE->getOperand(0)->getType()->isIntegerTy(32)) { + Addr.setExtendType(AArch64_AM::UXTW); + Src = ZE->getOperand(0); + } + } else if (const auto *SE = dyn_cast<SExtInst>(I)) { + if (!isIntExtFree(SE) && + SE->getOperand(0)->getType()->isIntegerTy(32)) { + Addr.setExtendType(AArch64_AM::SXTW); + Src = SE->getOperand(0); + } + } } } @@ -745,21 +748,22 @@ bool AArch64FastISel::computeAddress(const Value *Obj, Address &Addr, Type *Ty) Addr.setExtendType(AArch64_AM::LSL); const Value *Src = LHS; - if (const auto *I = dyn_cast<Instruction>(Src)) - if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) - Src = I; - - - // Fold the zext or sext when it won't become a noop. - if (const auto *ZE = dyn_cast<ZExtInst>(Src)) { - if (!isIntExtFree(ZE) && ZE->getOperand(0)->getType()->isIntegerTy(32)) { - Addr.setExtendType(AArch64_AM::UXTW); - Src = ZE->getOperand(0); - } - } else if (const auto *SE = dyn_cast<SExtInst>(Src)) { - if (!isIntExtFree(SE) && SE->getOperand(0)->getType()->isIntegerTy(32)) { - Addr.setExtendType(AArch64_AM::SXTW); - Src = SE->getOperand(0); + if (const auto *I = dyn_cast<Instruction>(Src)) { + if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) { + // Fold the zext or sext when it won't become a noop. + if (const auto *ZE = dyn_cast<ZExtInst>(I)) { + if (!isIntExtFree(ZE) && + ZE->getOperand(0)->getType()->isIntegerTy(32)) { + Addr.setExtendType(AArch64_AM::UXTW); + Src = ZE->getOperand(0); + } + } else if (const auto *SE = dyn_cast<SExtInst>(I)) { + if (!isIntExtFree(SE) && + SE->getOperand(0)->getType()->isIntegerTy(32)) { + Addr.setExtendType(AArch64_AM::SXTW); + Src = SE->getOperand(0); + } + } } } @@ -1916,7 +1920,8 @@ bool AArch64FastISel::selectLoad(const Instruction *I) { // could select it. Emit a copy to subreg if necessary. FastISel will remove // it when it selects the integer extend. unsigned Reg = lookUpRegForValue(IntExtVal); - if (!Reg) { + auto *MI = MRI.getUniqueVRegDef(Reg); + if (!MI) { if (RetVT == MVT::i64 && VT <= MVT::i32) { if (WantZExt) { // Delete the last emitted instruction from emitLoad (SUBREG_TO_REG). @@ -1934,10 +1939,7 @@ bool AArch64FastISel::selectLoad(const Instruction *I) { // The integer extend has already been emitted - delete all the instructions // that have been emitted by the integer extend lowering code and use the // result from the load instruction directly. - while (Reg) { - auto *MI = MRI.getUniqueVRegDef(Reg); - if (!MI) - break; + while (MI) { Reg = 0; for (auto &Opnd : MI->uses()) { if (Opnd.isReg()) { @@ -1946,6 +1948,9 @@ bool AArch64FastISel::selectLoad(const Instruction *I) { } } MI->eraseFromParent(); + MI = nullptr; + if (Reg) + MI = MRI.getUniqueVRegDef(Reg); } updateValueMap(IntExtVal, ResultReg); return true; @@ -2571,7 +2576,7 @@ bool AArch64FastISel::optimizeSelect(const SelectInst *SI) { Src1Reg = emitLogicalOp_ri(ISD::XOR, MVT::i32, Src1Reg, Src1IsKill, 1); Src1IsKill = true; } - unsigned ResultReg = fastEmitInst_rr(Opc, &AArch64::GPR32spRegClass, Src1Reg, + unsigned ResultReg = fastEmitInst_rr(Opc, &AArch64::GPR32RegClass, Src1Reg, Src1IsKill, Src2Reg, Src2IsKill); updateValueMap(SI, ResultReg); return true; @@ -2677,8 +2682,11 @@ bool AArch64FastISel::selectSelect(const Instruction *I) { return false; bool CondIsKill = hasTrivialKill(Cond); + const MCInstrDesc &II = TII.get(AArch64::ANDSWri); + CondReg = constrainOperandRegClass(II, CondReg, 1); + // Emit a TST instruction (ANDS wzr, reg, #imm). - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ANDSWri), + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, AArch64::WZR) .addReg(CondReg, getKillRegState(CondIsKill)) .addImm(AArch64_AM::encodeLogicalImmediate(1, 32)); @@ -3033,6 +3041,11 @@ bool AArch64FastISel::finishCall(CallLoweringInfo &CLI, MVT RetVT, // Copy all of the result registers out of their specified physreg. MVT CopyVT = RVLocs[0].getValVT(); + + // TODO: Handle big-endian results + if (CopyVT.isVector() && !Subtarget->isLittleEndian()) + return false; + unsigned ResultReg = createResultReg(TLI.getRegClassFor(CopyVT)); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY), ResultReg) @@ -3157,7 +3170,7 @@ bool AArch64FastISel::fastLowerCall(CallLoweringInfo &CLI) { // Add a register mask with the call-preserved registers. // Proper defs for return values will be added by setPhysRegsDeadExcept(). - MIB.addRegMask(TRI.getCallPreservedMask(CC)); + MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC)); CLI.Call = MIB; @@ -3256,7 +3269,7 @@ bool AArch64FastISel::foldXALUIntrinsic(AArch64CC::CondCode &CC, std::swap(LHS, RHS); // Simplify multiplies. - unsigned IID = II->getIntrinsicID(); + Intrinsic::ID IID = II->getIntrinsicID(); switch (IID) { default: break; @@ -3324,8 +3337,7 @@ bool AArch64FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) { MFI->setFrameAddressIsTaken(true); const AArch64RegisterInfo *RegInfo = - static_cast<const AArch64RegisterInfo *>( - TM.getSubtargetImpl()->getRegisterInfo()); + static_cast<const AArch64RegisterInfo *>(Subtarget->getRegisterInfo()); unsigned FramePtr = RegInfo->getFrameRegister(*(FuncInfo.MF)); unsigned SrcReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, @@ -3525,7 +3537,7 @@ bool AArch64FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) { std::swap(LHS, RHS); // Simplify multiplies. - unsigned IID = II->getIntrinsicID(); + Intrinsic::ID IID = II->getIntrinsicID(); switch (IID) { default: break; @@ -3589,7 +3601,10 @@ bool AArch64FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) { AArch64_AM::ASR, 31, /*WantResult=*/false); } else { assert(VT == MVT::i64 && "Unexpected value type."); - MulReg = emitMul_rr(VT, LHSReg, LHSIsKill, RHSReg, RHSIsKill); + // LHSReg and RHSReg cannot be killed by this Mul, since they are + // reused in the next instruction. + MulReg = emitMul_rr(VT, LHSReg, /*IsKill=*/false, RHSReg, + /*IsKill=*/false); unsigned SMULHReg = fastEmit_rr(VT, VT, ISD::MULHS, LHSReg, LHSIsKill, RHSReg, RHSIsKill); emitSubs_rs(VT, SMULHReg, /*IsKill=*/true, MulReg, /*IsKill=*/false, @@ -3618,7 +3633,10 @@ bool AArch64FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) { AArch64::sub_32); } else { assert(VT == MVT::i64 && "Unexpected value type."); - MulReg = emitMul_rr(VT, LHSReg, LHSIsKill, RHSReg, RHSIsKill); + // LHSReg and RHSReg cannot be killed by this Mul, since they are + // reused in the next instruction. + MulReg = emitMul_rr(VT, LHSReg, /*IsKill=*/false, RHSReg, + /*IsKill=*/false); unsigned UMULHReg = fastEmit_rr(VT, VT, ISD::MULHU, LHSReg, LHSIsKill, RHSReg, RHSIsKill); emitSubs_rr(VT, AArch64::XZR, /*IsKill=*/true, UMULHReg, @@ -4563,7 +4581,7 @@ bool AArch64FastISel::selectShift(const Instruction *I) { unsigned ResultReg = 0; uint64_t ShiftVal = C->getZExtValue(); MVT SrcVT = RetVT; - bool IsZExt = (I->getOpcode() == Instruction::AShr) ? false : true; + bool IsZExt = I->getOpcode() != Instruction::AShr; const Value *Op0 = I->getOperand(0); if (const auto *ZExt = dyn_cast<ZExtInst>(Op0)) { if (!isIntExtFree(ZExt)) { diff --git a/lib/Target/AArch64/AArch64FrameLowering.cpp b/lib/Target/AArch64/AArch64FrameLowering.cpp index d8e9156..3ba7e70 100644 --- a/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -9,6 +9,82 @@ // // This file contains the AArch64 implementation of TargetFrameLowering class. // +// On AArch64, stack frames are structured as follows: +// +// The stack grows downward. +// +// All of the individual frame areas on the frame below are optional, i.e. it's +// possible to create a function so that the particular area isn't present +// in the frame. +// +// At function entry, the "frame" looks as follows: +// +// | | Higher address +// |-----------------------------------| +// | | +// | arguments passed on the stack | +// | | +// |-----------------------------------| <- sp +// | | Lower address +// +// +// After the prologue has run, the frame has the following general structure. +// Note that this doesn't depict the case where a red-zone is used. Also, +// technically the last frame area (VLAs) doesn't get created until in the +// main function body, after the prologue is run. However, it's depicted here +// for completeness. +// +// | | Higher address +// |-----------------------------------| +// | | +// | arguments passed on the stack | +// | | +// |-----------------------------------| +// | | +// | prev_fp, prev_lr | +// | (a.k.a. "frame record") | +// |-----------------------------------| <- fp(=x29) +// | | +// | other callee-saved registers | +// | | +// |-----------------------------------| +// |.empty.space.to.make.part.below....| +// |.aligned.in.case.it.needs.more.than| (size of this area is unknown at +// |.the.standard.16-byte.alignment....| compile time; if present) +// |-----------------------------------| +// | | +// | local variables of fixed size | +// | including spill slots | +// |-----------------------------------| <- bp(not defined by ABI, +// |.variable-sized.local.variables....| LLVM chooses X19) +// |.(VLAs)............................| (size of this area is unknown at +// |...................................| compile time) +// |-----------------------------------| <- sp +// | | Lower address +// +// +// To access the data in a frame, at-compile time, a constant offset must be +// computable from one of the pointers (fp, bp, sp) to access it. The size +// of the areas with a dotted background cannot be computed at compile-time +// if they are present, making it required to have all three of fp, bp and +// sp to be set up to be able to access all contents in the frame areas, +// assuming all of the frame areas are non-empty. +// +// For most functions, some of the frame areas are empty. For those functions, +// it may not be necessary to set up fp or bp: +// * A base pointer is definitly needed when there are both VLAs and local +// variables with more-than-default alignment requirements. +// * A frame pointer is definitly needed when there are local variables with +// more-than-default alignment requirements. +// +// In some cases when a base pointer is not strictly needed, it is generated +// anyway when offsets from the frame pointer to access local variables become +// so large that the offset can't be encoded in the immediate fields of loads +// or stores. +// +// FIXME: also explain the redzone concept. +// FIXME: also explain the concept of reserved call frames. +// //===----------------------------------------------------------------------===// #include "AArch64FrameLowering.h" @@ -39,33 +115,12 @@ static cl::opt<bool> EnableRedZone("aarch64-redzone", STATISTIC(NumRedZoneFunctions, "Number of functions using red zone"); -static unsigned estimateStackSize(MachineFunction &MF) { - const MachineFrameInfo *FFI = MF.getFrameInfo(); - int Offset = 0; - for (int i = FFI->getObjectIndexBegin(); i != 0; ++i) { - int FixedOff = -FFI->getObjectOffset(i); - if (FixedOff > Offset) - Offset = FixedOff; - } - for (unsigned i = 0, e = FFI->getObjectIndexEnd(); i != e; ++i) { - if (FFI->isDeadObjectIndex(i)) - continue; - Offset += FFI->getObjectSize(i); - unsigned Align = FFI->getObjectAlignment(i); - // Adjust to alignment boundary - Offset = (Offset + Align - 1) / Align * Align; - } - // This does not include the 16 bytes used for fp and lr. - return (unsigned)Offset; -} - bool AArch64FrameLowering::canUseRedZone(const MachineFunction &MF) const { if (!EnableRedZone) return false; // Don't use the red zone if the function explicitly asks us not to. // This is typically used for kernel code. - if (MF.getFunction()->getAttributes().hasAttribute( - AttributeSet::FunctionIndex, Attribute::NoRedZone)) + if (MF.getFunction()->hasFnAttribute(Attribute::NoRedZone)) return false; const MachineFrameInfo *MFI = MF.getFrameInfo(); @@ -84,16 +139,10 @@ bool AArch64FrameLowering::canUseRedZone(const MachineFunction &MF) const { /// pointer register. bool AArch64FrameLowering::hasFP(const MachineFunction &MF) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); - -#ifndef NDEBUG const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo(); - assert(!RegInfo->needsStackRealignment(MF) && - "No stack realignment on AArch64!"); -#endif - return (MFI->hasCalls() || MFI->hasVarSizedObjects() || MFI->isFrameAddressTaken() || MFI->hasStackMap() || - MFI->hasPatchPoint()); + MFI->hasPatchPoint() || RegInfo->needsStackRealignment(MF)); } /// hasReservedCallFrame - Under normal circumstances, when a frame pointer is @@ -112,7 +161,7 @@ void AArch64FrameLowering::eliminateCallFramePseudoInstr( const AArch64InstrInfo *TII = static_cast<const AArch64InstrInfo *>(MF.getSubtarget().getInstrInfo()); DebugLoc DL = I->getDebugLoc(); - int Opc = I->getOpcode(); + unsigned Opc = I->getOpcode(); bool IsDestroy = Opc == TII->getCallFrameDestroyOpcode(); uint64_t CalleePopAmount = IsDestroy ? I->getOperand(1).getImm() : 0; @@ -167,7 +216,7 @@ void AArch64FrameLowering::emitCalleeSavedFrameMoves( if (CSI.empty()) return; - const DataLayout *TD = MF.getSubtarget().getDataLayout(); + const DataLayout *TD = MF.getTarget().getDataLayout(); bool HasFP = hasFP(MF); // Calculate amount of bytes used for return address storing. @@ -201,8 +250,33 @@ void AArch64FrameLowering::emitCalleeSavedFrameMoves( } } -void AArch64FrameLowering::emitPrologue(MachineFunction &MF) const { - MachineBasicBlock &MBB = MF.front(); // Prologue goes in entry BB. +/// Get FPOffset by analyzing the first instruction. +static int getFPOffsetInPrologue(MachineInstr *MBBI) { + // First instruction must a) allocate the stack and b) have an immediate + // that is a multiple of -2. + assert(((MBBI->getOpcode() == AArch64::STPXpre || + MBBI->getOpcode() == AArch64::STPDpre) && + MBBI->getOperand(3).getReg() == AArch64::SP && + MBBI->getOperand(4).getImm() < 0 && + (MBBI->getOperand(4).getImm() & 1) == 0)); + + // Frame pointer is fp = sp - 16. Since the STPXpre subtracts the space + // required for the callee saved register area we get the frame pointer + // by addding that offset - 16 = -getImm()*8 - 2*8 = -(getImm() + 2) * 8. + int FPOffset = -(MBBI->getOperand(4).getImm() + 2) * 8; + assert(FPOffset >= 0 && "Bad Framepointer Offset"); + return FPOffset; +} + +static bool isCSSave(MachineInstr *MBBI) { + return MBBI->getOpcode() == AArch64::STPXi || + MBBI->getOpcode() == AArch64::STPDi || + MBBI->getOpcode() == AArch64::STPXpre || + MBBI->getOpcode() == AArch64::STPDpre; +} + +void AArch64FrameLowering::emitPrologue(MachineFunction &MF, + MachineBasicBlock &MBB) const { MachineBasicBlock::iterator MBBI = MBB.begin(); const MachineFrameInfo *MFI = MF.getFrameInfo(); const Function *Fn = MF.getFunction(); @@ -228,7 +302,7 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF) const { AFI->setLocalStackSize(NumBytes); // Label used to tie together the PROLOG_LABEL and the MachineMoves. - MCSymbol *FrameLabel = MMI.getContext().CreateTempSymbol(); + MCSymbol *FrameLabel = MMI.getContext().createTempSymbol(); // REDZONE: If the stack size is less than 128 bytes, we don't need // to actually allocate. @@ -251,27 +325,11 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF) const { // Only set up FP if we actually need to. int FPOffset = 0; - if (HasFP) { - // First instruction must a) allocate the stack and b) have an immediate - // that is a multiple of -2. - assert((MBBI->getOpcode() == AArch64::STPXpre || - MBBI->getOpcode() == AArch64::STPDpre) && - MBBI->getOperand(3).getReg() == AArch64::SP && - MBBI->getOperand(4).getImm() < 0 && - (MBBI->getOperand(4).getImm() & 1) == 0); - - // Frame pointer is fp = sp - 16. Since the STPXpre subtracts the space - // required for the callee saved register area we get the frame pointer - // by addding that offset - 16 = -getImm()*8 - 2*8 = -(getImm() + 2) * 8. - FPOffset = -(MBBI->getOperand(4).getImm() + 2) * 8; - assert(FPOffset >= 0 && "Bad Framepointer Offset"); - } + if (HasFP) + FPOffset = getFPOffsetInPrologue(MBBI); // Move past the saves of the callee-saved registers. - while (MBBI->getOpcode() == AArch64::STPXi || - MBBI->getOpcode() == AArch64::STPDi || - MBBI->getOpcode() == AArch64::STPXpre || - MBBI->getOpcode() == AArch64::STPDpre) { + while (isCSSave(MBBI)) { ++MBBI; NumBytes -= 16; } @@ -289,11 +347,48 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF) const { AFI->setLocalStackSize(NumBytes); // Allocate space for the rest of the frame. - if (NumBytes) { - // If we're a leaf function, try using the red zone. - if (!canUseRedZone(MF)) - emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP, -NumBytes, TII, - MachineInstr::FrameSetup); + + const unsigned Alignment = MFI->getMaxAlignment(); + const bool NeedsRealignment = (Alignment > 16); + unsigned scratchSPReg = AArch64::SP; + if (NeedsRealignment) { + // Use the first callee-saved register as a scratch register + assert(MF.getRegInfo().isPhysRegUsed(AArch64::X9) && + "No scratch register to align SP!"); + scratchSPReg = AArch64::X9; + } + + // If we're a leaf function, try using the red zone. + if (NumBytes && !canUseRedZone(MF)) + // FIXME: in the case of dynamic re-alignment, NumBytes doesn't have + // the correct value here, as NumBytes also includes padding bytes, + // which shouldn't be counted here. + emitFrameOffset(MBB, MBBI, DL, scratchSPReg, AArch64::SP, -NumBytes, TII, + MachineInstr::FrameSetup); + + assert(!(NeedsRealignment && NumBytes==0) && + "NumBytes should never be 0 when realignment is needed"); + + if (NumBytes && NeedsRealignment) { + const unsigned NrBitsToZero = countTrailingZeros(Alignment); + assert(NrBitsToZero > 1); + assert(scratchSPReg != AArch64::SP); + + // SUB X9, SP, NumBytes + // -- X9 is temporary register, so shouldn't contain any live data here, + // -- free to use. This is already produced by emitFrameOffset above. + // AND SP, X9, 0b11111...0000 + // The logical immediates have a non-trivial encoding. The following + // formula computes the encoded immediate with all ones but + // NrBitsToZero zero bits as least significant bits. + uint32_t andMaskEncoded = + (1 <<12) // = N + | ((64-NrBitsToZero) << 6) // immr + | ((64-NrBitsToZero-1) << 0) // imms + ; + BuildMI(MBB, MBBI, DL, TII->get(AArch64::ANDXri), AArch64::SP) + .addReg(scratchSPReg, RegState::Kill) + .addImm(andMaskEncoded); } // If we need a base pointer, set it up here. It's whatever the value of the @@ -303,15 +398,15 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF) const { // FIXME: Clarify FrameSetup flags here. // Note: Use emitFrameOffset() like above for FP if the FrameSetup flag is // needed. - // - if (RegInfo->hasBasePointer(MF)) - TII->copyPhysReg(MBB, MBBI, DL, AArch64::X19, AArch64::SP, false); + if (RegInfo->hasBasePointer(MF)) { + TII->copyPhysReg(MBB, MBBI, DL, RegInfo->getBaseRegister(), AArch64::SP, + false); + } if (needsFrameMoves) { - const DataLayout *TD = MF.getSubtarget().getDataLayout(); + const DataLayout *TD = MF.getTarget().getDataLayout(); const int StackGrowth = -TD->getPointerSize(0); unsigned FramePtr = RegInfo->getFrameRegister(MF); - // An example of the prologue: // // .globl __foo @@ -444,15 +539,19 @@ static bool isCSRestore(MachineInstr *MI, const MCPhysReg *CSRegs) { void AArch64FrameLowering::emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const { MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); - assert(MBBI->isReturn() && "Can only insert epilog into returning blocks"); MachineFrameInfo *MFI = MF.getFrameInfo(); const AArch64InstrInfo *TII = static_cast<const AArch64InstrInfo *>(MF.getSubtarget().getInstrInfo()); const AArch64RegisterInfo *RegInfo = static_cast<const AArch64RegisterInfo *>( MF.getSubtarget().getRegisterInfo()); - DebugLoc DL = MBBI->getDebugLoc(); - unsigned RetOpcode = MBBI->getOpcode(); - + DebugLoc DL; + bool IsTailCallReturn = false; + if (MBB.end() != MBBI) { + DL = MBBI->getDebugLoc(); + unsigned RetOpcode = MBBI->getOpcode(); + IsTailCallReturn = RetOpcode == AArch64::TCRETURNdi || + RetOpcode == AArch64::TCRETURNri; + } int NumBytes = MFI->getStackSize(); const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); @@ -461,10 +560,10 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF, if (MF.getFunction()->getCallingConv() == CallingConv::GHC) return; - // Initial and residual are named for consitency with the prologue. Note that + // Initial and residual are named for consistency with the prologue. Note that // in the epilogue, the residual adjustment is executed first. uint64_t ArgumentPopSize = 0; - if (RetOpcode == AArch64::TCRETURNdi || RetOpcode == AArch64::TCRETURNri) { + if (IsTailCallReturn) { MachineOperand &StackAdjust = MBBI->getOperand(1); // For a tail-call in a callee-pops-arguments environment, some or all of @@ -509,7 +608,7 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF, unsigned NumRestores = 0; // Move past the restores of the callee-saved registers. - MachineBasicBlock::iterator LastPopI = MBBI; + MachineBasicBlock::iterator LastPopI = MBB.getFirstTerminator(); const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF); if (LastPopI != MBB.begin()) { do { @@ -572,9 +671,9 @@ int AArch64FrameLowering::resolveFrameIndexReference(const MachineFunction &MF, bool isFixed = MFI->isFixedObjectIndex(FI); // Use frame pointer to reference fixed objects. Use it for locals if - // there are VLAs (and thus the SP isn't reliable as a base). - // Make sure useFPForScavengingIndex() does the right thing for the emergency - // spill slot. + // there are VLAs or a dynamically realigned SP (and thus the SP isn't + // reliable as a base). Make sure useFPForScavengingIndex() does the + // right thing for the emergency spill slot. bool UseFP = false; if (AFI->hasStackFrame()) { // Note: Keeping the following as multiple 'if' statements rather than @@ -583,7 +682,8 @@ int AArch64FrameLowering::resolveFrameIndexReference(const MachineFunction &MF, // Argument access should always use the FP. if (isFixed) { UseFP = hasFP(MF); - } else if (hasFP(MF) && !RegInfo->hasBasePointer(MF)) { + } else if (hasFP(MF) && !RegInfo->hasBasePointer(MF) && + !RegInfo->needsStackRealignment(MF)) { // Use SP or FP, whichever gives us the best chance of the offset // being in range for direct access. If the FPOffset is positive, // that'll always be best, as the SP will be even further away. @@ -599,6 +699,10 @@ int AArch64FrameLowering::resolveFrameIndexReference(const MachineFunction &MF, } } + assert((isFixed || !RegInfo->needsStackRealignment(MF) || !UseFP) && + "In the presence of dynamic stack pointer realignment, " + "non-argument objects cannot be accessed through the frame pointer"); + if (UseFP) { FrameReg = RegInfo->getFrameRegister(MF); return FPOffset; @@ -696,6 +800,8 @@ bool AArch64FrameLowering::spillCalleeSavedRegisters( if (StrOpc == AArch64::STPDpre || StrOpc == AArch64::STPXpre) MIB.addReg(AArch64::SP, RegState::Define); + MBB.addLiveIn(Reg1); + MBB.addLiveIn(Reg2); MIB.addReg(Reg2, getPrologueDeath(MF, Reg2)) .addReg(Reg1, getPrologueDeath(MF, Reg1)) .addReg(AArch64::SP) @@ -795,6 +901,9 @@ void AArch64FrameLowering::processFunctionBeforeCalleeSavedScan( if (RegInfo->hasBasePointer(MF)) MRI->setPhysRegUsed(RegInfo->getBaseRegister()); + if (RegInfo->needsStackRealignment(MF) && !RegInfo->hasBasePointer(MF)) + MRI->setPhysRegUsed(AArch64::X9); + // If any callee-saved registers are used, the frame cannot be eliminated. unsigned NumGPRSpilled = 0; unsigned NumFPRSpilled = 0; @@ -868,7 +977,8 @@ void AArch64FrameLowering::processFunctionBeforeCalleeSavedScan( // The CSR spill slots have not been allocated yet, so estimateStackSize // won't include them. MachineFrameInfo *MFI = MF.getFrameInfo(); - unsigned CFSize = estimateStackSize(MF) + 8 * (NumGPRSpilled + NumFPRSpilled); + unsigned CFSize = + MFI->estimateStackSize(MF) + 8 * (NumGPRSpilled + NumFPRSpilled); DEBUG(dbgs() << "Estimated stack frame size: " << CFSize << " bytes.\n"); bool BigStack = (CFSize >= 256); if (BigStack || !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF)) diff --git a/lib/Target/AArch64/AArch64FrameLowering.h b/lib/Target/AArch64/AArch64FrameLowering.h index df3875f..b496fcc 100644 --- a/lib/Target/AArch64/AArch64FrameLowering.h +++ b/lib/Target/AArch64/AArch64FrameLowering.h @@ -22,7 +22,7 @@ class AArch64FrameLowering : public TargetFrameLowering { public: explicit AArch64FrameLowering() : TargetFrameLowering(StackGrowsDown, 16, 0, 16, - false /*StackRealignable*/) {} + true /*StackRealignable*/) {} void emitCalleeSavedFrameMoves(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, @@ -34,7 +34,7 @@ public: /// emitProlog/emitEpilog - These methods insert prolog and epilog code into /// the function. - void emitPrologue(MachineFunction &MF) const override; + void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override; void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override; int getFrameIndexOffset(const MachineFunction &MF, int FI) const override; diff --git a/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp index bb2e1e2..78a2021 100644 --- a/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ b/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -53,12 +53,10 @@ public: } bool runOnMachineFunction(MachineFunction &MF) override { - AttributeSet FnAttrs = MF.getFunction()->getAttributes(); ForCodeSize = - FnAttrs.hasAttribute(AttributeSet::FunctionIndex, - Attribute::OptimizeForSize) || - FnAttrs.hasAttribute(AttributeSet::FunctionIndex, Attribute::MinSize); - Subtarget = &TM.getSubtarget<AArch64Subtarget>(); + MF.getFunction()->hasFnAttribute(Attribute::OptimizeForSize) || + MF.getFunction()->hasFnAttribute(Attribute::MinSize); + Subtarget = &MF.getSubtarget<AArch64Subtarget>(); return SelectionDAGISel::runOnMachineFunction(MF); } @@ -67,7 +65,7 @@ public: /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for /// inline asm expressions. bool SelectInlineAsmMemoryOperand(const SDValue &Op, - char ConstraintCode, + unsigned ConstraintID, std::vector<SDValue> &OutOps) override; SDNode *SelectMLAV64LaneV128(SDNode *N); @@ -134,8 +132,8 @@ public: /// Generic helper for the createDTuple/createQTuple /// functions. Those should almost always be called instead. - SDValue createTuple(ArrayRef<SDValue> Vecs, unsigned RegClassIDs[], - unsigned SubRegs[]); + SDValue createTuple(ArrayRef<SDValue> Vecs, const unsigned RegClassIDs[], + const unsigned SubRegs[]); SDNode *SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc, bool isExt); @@ -213,13 +211,20 @@ static bool isOpcWithIntImmediate(const SDNode *N, unsigned Opc, } bool AArch64DAGToDAGISel::SelectInlineAsmMemoryOperand( - const SDValue &Op, char ConstraintCode, std::vector<SDValue> &OutOps) { - assert(ConstraintCode == 'm' && "unexpected asm memory constraint"); - // Require the address to be in a register. That is safe for all AArch64 - // variants and it is hard to do anything much smarter without knowing - // how the operand is used. - OutOps.push_back(Op); - return false; + const SDValue &Op, unsigned ConstraintID, std::vector<SDValue> &OutOps) { + switch(ConstraintID) { + default: + llvm_unreachable("Unexpected asm memory constraint"); + case InlineAsm::Constraint_i: + case InlineAsm::Constraint_m: + case InlineAsm::Constraint_Q: + // Require the address to be in a register. That is safe for all AArch64 + // variants and it is hard to do anything much smarter without knowing + // how the operand is used. + OutOps.push_back(Op); + return false; + } + return true; } /// SelectArithImmed - Select an immediate value that can be represented as @@ -247,8 +252,9 @@ bool AArch64DAGToDAGISel::SelectArithImmed(SDValue N, SDValue &Val, return false; unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt); - Val = CurDAG->getTargetConstant(Immed, MVT::i32); - Shift = CurDAG->getTargetConstant(ShVal, MVT::i32); + SDLoc dl(N); + Val = CurDAG->getTargetConstant(Immed, dl, MVT::i32); + Shift = CurDAG->getTargetConstant(ShVal, dl, MVT::i32); return true; } @@ -281,7 +287,8 @@ bool AArch64DAGToDAGISel::SelectNegArithImmed(SDValue N, SDValue &Val, return false; Immed &= 0xFFFFFFULL; - return SelectArithImmed(CurDAG->getConstant(Immed, MVT::i32), Val, Shift); + return SelectArithImmed(CurDAG->getConstant(Immed, SDLoc(N), MVT::i32), Val, + Shift); } /// getShiftTypeForNode - Translate a shift node to the corresponding @@ -301,7 +308,7 @@ static AArch64_AM::ShiftExtendType getShiftTypeForNode(SDValue N) { } } -/// \brief Determine wether it is worth to fold V into an extended register. +/// \brief Determine whether it is worth to fold V into an extended register. bool AArch64DAGToDAGISel::isWorthFolding(SDValue V) const { // it hurts if the value is used at least twice, unless we are optimizing // for code size. @@ -329,7 +336,7 @@ bool AArch64DAGToDAGISel::SelectShiftedRegister(SDValue N, bool AllowROR, unsigned ShVal = AArch64_AM::getShifterImm(ShType, Val); Reg = N.getOperand(0); - Shift = CurDAG->getTargetConstant(ShVal, MVT::i32); + Shift = CurDAG->getTargetConstant(ShVal, SDLoc(N), MVT::i32); return isWorthFolding(N); } @@ -430,6 +437,7 @@ static bool checkV64LaneV128(SDValue Op0, SDValue Op1, SDValue &StdOp, /// is a lane in the upper half of a 128-bit vector. Recognize and select this /// so that we don't emit unnecessary lane extracts. SDNode *AArch64DAGToDAGISel::SelectMLAV64LaneV128(SDNode *N) { + SDLoc dl(N); SDValue Op0 = N->getOperand(0); SDValue Op1 = N->getOperand(1); SDValue MLAOp1; // Will hold ordinary multiplicand for MLA. @@ -446,7 +454,7 @@ SDNode *AArch64DAGToDAGISel::SelectMLAV64LaneV128(SDNode *N) { return nullptr; } - SDValue LaneIdxVal = CurDAG->getTargetConstant(LaneIdx, MVT::i64); + SDValue LaneIdxVal = CurDAG->getTargetConstant(LaneIdx, dl, MVT::i64); SDValue Ops[] = { Op0, MLAOp1, MLAOp2, LaneIdxVal }; @@ -469,10 +477,11 @@ SDNode *AArch64DAGToDAGISel::SelectMLAV64LaneV128(SDNode *N) { break; } - return CurDAG->getMachineNode(MLAOpc, SDLoc(N), N->getValueType(0), Ops); + return CurDAG->getMachineNode(MLAOpc, dl, N->getValueType(0), Ops); } SDNode *AArch64DAGToDAGISel::SelectMULLV64LaneV128(unsigned IntNo, SDNode *N) { + SDLoc dl(N); SDValue SMULLOp0; SDValue SMULLOp1; int LaneIdx; @@ -481,7 +490,7 @@ SDNode *AArch64DAGToDAGISel::SelectMULLV64LaneV128(unsigned IntNo, SDNode *N) { LaneIdx)) return nullptr; - SDValue LaneIdxVal = CurDAG->getTargetConstant(LaneIdx, MVT::i64); + SDValue LaneIdxVal = CurDAG->getTargetConstant(LaneIdx, dl, MVT::i64); SDValue Ops[] = { SMULLOp0, SMULLOp1, LaneIdxVal }; @@ -512,7 +521,7 @@ SDNode *AArch64DAGToDAGISel::SelectMULLV64LaneV128(unsigned IntNo, SDNode *N) { } else llvm_unreachable("Unrecognized intrinsic."); - return CurDAG->getMachineNode(SMULLOpc, SDLoc(N), N->getValueType(0), Ops); + return CurDAG->getMachineNode(SMULLOpc, dl, N->getValueType(0), Ops); } /// Instructions that accept extend modifiers like UXTW expect the register @@ -523,9 +532,10 @@ static SDValue narrowIfNeeded(SelectionDAG *CurDAG, SDValue N) { if (N.getValueType() == MVT::i32) return N; - SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, MVT::i32); + SDLoc dl(N); + SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32); MachineSDNode *Node = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, - SDLoc(N), MVT::i32, N, SubReg); + dl, MVT::i32, N, SubReg); return SDValue(Node, 0); } @@ -565,7 +575,8 @@ bool AArch64DAGToDAGISel::SelectArithExtendedRegister(SDValue N, SDValue &Reg, // (harmlessly) synthesize one by injected an EXTRACT_SUBREG here. assert(Ext != AArch64_AM::UXTX && Ext != AArch64_AM::SXTX); Reg = narrowIfNeeded(CurDAG, Reg); - Shift = CurDAG->getTargetConstant(getArithExtendImm(Ext, ShiftVal), MVT::i32); + Shift = CurDAG->getTargetConstant(getArithExtendImm(Ext, ShiftVal), SDLoc(N), + MVT::i32); return isWorthFolding(N); } @@ -595,11 +606,12 @@ static bool isWorthFoldingADDlow(SDValue N) { /// reference, which determines the scale. bool AArch64DAGToDAGISel::SelectAddrModeIndexed(SDValue N, unsigned Size, SDValue &Base, SDValue &OffImm) { + SDLoc dl(N); const TargetLowering *TLI = getTargetLowering(); if (N.getOpcode() == ISD::FrameIndex) { int FI = cast<FrameIndexSDNode>(N)->getIndex(); Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy()); - OffImm = CurDAG->getTargetConstant(0, MVT::i64); + OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64); return true; } @@ -632,7 +644,7 @@ bool AArch64DAGToDAGISel::SelectAddrModeIndexed(SDValue N, unsigned Size, int FI = cast<FrameIndexSDNode>(Base)->getIndex(); Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy()); } - OffImm = CurDAG->getTargetConstant(RHSC >> Scale, MVT::i64); + OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64); return true; } } @@ -648,7 +660,7 @@ bool AArch64DAGToDAGISel::SelectAddrModeIndexed(SDValue N, unsigned Size, // add x0, Xbase, #offset // ldr x0, [x0] Base = N; - OffImm = CurDAG->getTargetConstant(0, MVT::i64); + OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64); return true; } @@ -675,7 +687,7 @@ bool AArch64DAGToDAGISel::SelectAddrModeUnscaled(SDValue N, unsigned Size, const TargetLowering *TLI = getTargetLowering(); Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy()); } - OffImm = CurDAG->getTargetConstant(RHSC, MVT::i64); + OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i64); return true; } } @@ -683,12 +695,12 @@ bool AArch64DAGToDAGISel::SelectAddrModeUnscaled(SDValue N, unsigned Size, } static SDValue Widen(SelectionDAG *CurDAG, SDValue N) { - SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, MVT::i32); + SDLoc dl(N); + SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32); SDValue ImpDef = SDValue( - CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, SDLoc(N), MVT::i64), - 0); + CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, MVT::i64), 0); MachineSDNode *Node = CurDAG->getMachineNode( - TargetOpcode::INSERT_SUBREG, SDLoc(N), MVT::i64, ImpDef, N, SubReg); + TargetOpcode::INSERT_SUBREG, dl, MVT::i64, ImpDef, N, SubReg); return SDValue(Node, 0); } @@ -702,6 +714,7 @@ bool AArch64DAGToDAGISel::SelectExtendedSHL(SDValue N, unsigned Size, if (!CSD || (CSD->getZExtValue() & 0x7) != CSD->getZExtValue()) return false; + SDLoc dl(N); if (WantExtend) { AArch64_AM::ShiftExtendType Ext = getExtendTypeForNode(N.getOperand(0), true); @@ -709,10 +722,11 @@ bool AArch64DAGToDAGISel::SelectExtendedSHL(SDValue N, unsigned Size, return false; Offset = narrowIfNeeded(CurDAG, N.getOperand(0).getOperand(0)); - SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, MVT::i32); + SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl, + MVT::i32); } else { Offset = N.getOperand(0); - SignExtend = CurDAG->getTargetConstant(0, MVT::i32); + SignExtend = CurDAG->getTargetConstant(0, dl, MVT::i32); } unsigned LegalShiftVal = Log2_32(Size); @@ -735,6 +749,7 @@ bool AArch64DAGToDAGISel::SelectAddrModeWRO(SDValue N, unsigned Size, return false; SDValue LHS = N.getOperand(0); SDValue RHS = N.getOperand(1); + SDLoc dl(N); // We don't want to match immediate adds here, because they are better lowered // to the register-immediate addressing modes. @@ -757,7 +772,7 @@ bool AArch64DAGToDAGISel::SelectAddrModeWRO(SDValue N, unsigned Size, if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL && SelectExtendedSHL(RHS, Size, true, Offset, SignExtend)) { Base = LHS; - DoShift = CurDAG->getTargetConstant(true, MVT::i32); + DoShift = CurDAG->getTargetConstant(true, dl, MVT::i32); return true; } @@ -765,12 +780,12 @@ bool AArch64DAGToDAGISel::SelectAddrModeWRO(SDValue N, unsigned Size, if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL && SelectExtendedSHL(LHS, Size, true, Offset, SignExtend)) { Base = RHS; - DoShift = CurDAG->getTargetConstant(true, MVT::i32); + DoShift = CurDAG->getTargetConstant(true, dl, MVT::i32); return true; } // There was no shift, whatever else we find. - DoShift = CurDAG->getTargetConstant(false, MVT::i32); + DoShift = CurDAG->getTargetConstant(false, dl, MVT::i32); AArch64_AM::ShiftExtendType Ext = AArch64_AM::InvalidShiftExtend; // Try to match an unshifted extend on the LHS. @@ -779,7 +794,8 @@ bool AArch64DAGToDAGISel::SelectAddrModeWRO(SDValue N, unsigned Size, AArch64_AM::InvalidShiftExtend) { Base = RHS; Offset = narrowIfNeeded(CurDAG, LHS.getOperand(0)); - SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, MVT::i32); + SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl, + MVT::i32); if (isWorthFolding(LHS)) return true; } @@ -790,7 +806,8 @@ bool AArch64DAGToDAGISel::SelectAddrModeWRO(SDValue N, unsigned Size, AArch64_AM::InvalidShiftExtend) { Base = LHS; Offset = narrowIfNeeded(CurDAG, RHS.getOperand(0)); - SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, MVT::i32); + SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl, + MVT::i32); if (isWorthFolding(RHS)) return true; } @@ -821,6 +838,7 @@ bool AArch64DAGToDAGISel::SelectAddrModeXRO(SDValue N, unsigned Size, return false; SDValue LHS = N.getOperand(0); SDValue RHS = N.getOperand(1); + SDLoc DL(N); // Check if this particular node is reused in any non-memory related // operation. If yes, do not try to fold this node into the address @@ -843,7 +861,7 @@ bool AArch64DAGToDAGISel::SelectAddrModeXRO(SDValue N, unsigned Size, // MOV X0, WideImmediate // LDR X2, [BaseReg, X0] if (isa<ConstantSDNode>(RHS)) { - int64_t ImmOff = (int64_t)dyn_cast<ConstantSDNode>(RHS)->getZExtValue(); + int64_t ImmOff = (int64_t)cast<ConstantSDNode>(RHS)->getZExtValue(); unsigned Scale = Log2_32(Size); // Skip the immediate can be seleced by load/store addressing mode. // Also skip the immediate can be encoded by a single ADD (SUB is also @@ -852,7 +870,6 @@ bool AArch64DAGToDAGISel::SelectAddrModeXRO(SDValue N, unsigned Size, isPreferredADD(ImmOff) || isPreferredADD(-ImmOff)) return false; - SDLoc DL(N.getNode()); SDValue Ops[] = { RHS }; SDNode *MOVI = CurDAG->getMachineNode(AArch64::MOVi64imm, DL, MVT::i64, Ops); @@ -868,7 +885,7 @@ bool AArch64DAGToDAGISel::SelectAddrModeXRO(SDValue N, unsigned Size, if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL && SelectExtendedSHL(RHS, Size, false, Offset, SignExtend)) { Base = LHS; - DoShift = CurDAG->getTargetConstant(true, MVT::i32); + DoShift = CurDAG->getTargetConstant(true, DL, MVT::i32); return true; } @@ -876,40 +893,40 @@ bool AArch64DAGToDAGISel::SelectAddrModeXRO(SDValue N, unsigned Size, if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL && SelectExtendedSHL(LHS, Size, false, Offset, SignExtend)) { Base = RHS; - DoShift = CurDAG->getTargetConstant(true, MVT::i32); + DoShift = CurDAG->getTargetConstant(true, DL, MVT::i32); return true; } // Match any non-shifted, non-extend, non-immediate add expression. Base = LHS; Offset = RHS; - SignExtend = CurDAG->getTargetConstant(false, MVT::i32); - DoShift = CurDAG->getTargetConstant(false, MVT::i32); + SignExtend = CurDAG->getTargetConstant(false, DL, MVT::i32); + DoShift = CurDAG->getTargetConstant(false, DL, MVT::i32); // Reg1 + Reg2 is free: no check needed. return true; } SDValue AArch64DAGToDAGISel::createDTuple(ArrayRef<SDValue> Regs) { - static unsigned RegClassIDs[] = { + static const unsigned RegClassIDs[] = { AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID}; - static unsigned SubRegs[] = { AArch64::dsub0, AArch64::dsub1, - AArch64::dsub2, AArch64::dsub3 }; + static const unsigned SubRegs[] = {AArch64::dsub0, AArch64::dsub1, + AArch64::dsub2, AArch64::dsub3}; return createTuple(Regs, RegClassIDs, SubRegs); } SDValue AArch64DAGToDAGISel::createQTuple(ArrayRef<SDValue> Regs) { - static unsigned RegClassIDs[] = { + static const unsigned RegClassIDs[] = { AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID}; - static unsigned SubRegs[] = { AArch64::qsub0, AArch64::qsub1, - AArch64::qsub2, AArch64::qsub3 }; + static const unsigned SubRegs[] = {AArch64::qsub0, AArch64::qsub1, + AArch64::qsub2, AArch64::qsub3}; return createTuple(Regs, RegClassIDs, SubRegs); } SDValue AArch64DAGToDAGISel::createTuple(ArrayRef<SDValue> Regs, - unsigned RegClassIDs[], - unsigned SubRegs[]) { + const unsigned RegClassIDs[], + const unsigned SubRegs[]) { // There's no special register-class for a vector-list of 1 element: it's just // a vector. if (Regs.size() == 1) @@ -917,18 +934,18 @@ SDValue AArch64DAGToDAGISel::createTuple(ArrayRef<SDValue> Regs, assert(Regs.size() >= 2 && Regs.size() <= 4); - SDLoc DL(Regs[0].getNode()); + SDLoc DL(Regs[0]); SmallVector<SDValue, 4> Ops; // First operand of REG_SEQUENCE is the desired RegClass. Ops.push_back( - CurDAG->getTargetConstant(RegClassIDs[Regs.size() - 2], MVT::i32)); + CurDAG->getTargetConstant(RegClassIDs[Regs.size() - 2], DL, MVT::i32)); // Then we get pairs of source & subregister-position for the components. for (unsigned i = 0; i < Regs.size(); ++i) { Ops.push_back(Regs[i]); - Ops.push_back(CurDAG->getTargetConstant(SubRegs[i], MVT::i32)); + Ops.push_back(CurDAG->getTargetConstant(SubRegs[i], DL, MVT::i32)); } SDNode *N = @@ -1025,19 +1042,21 @@ SDNode *AArch64DAGToDAGISel::SelectIndexedLoad(SDNode *N, bool &Done) { SDValue Base = LD->getBasePtr(); ConstantSDNode *OffsetOp = cast<ConstantSDNode>(LD->getOffset()); int OffsetVal = (int)OffsetOp->getZExtValue(); - SDValue Offset = CurDAG->getTargetConstant(OffsetVal, MVT::i64); + SDLoc dl(N); + SDValue Offset = CurDAG->getTargetConstant(OffsetVal, dl, MVT::i64); SDValue Ops[] = { Base, Offset, Chain }; - SDNode *Res = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i64, DstVT, + SDNode *Res = CurDAG->getMachineNode(Opcode, dl, MVT::i64, DstVT, MVT::Other, Ops); // Either way, we're replacing the node, so tell the caller that. Done = true; SDValue LoadedVal = SDValue(Res, 1); if (InsertTo64) { - SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, MVT::i32); + SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32); LoadedVal = SDValue(CurDAG->getMachineNode( - AArch64::SUBREG_TO_REG, SDLoc(N), MVT::i64, - CurDAG->getTargetConstant(0, MVT::i64), LoadedVal, SubReg), + AArch64::SUBREG_TO_REG, dl, MVT::i64, + CurDAG->getTargetConstant(0, dl, MVT::i64), LoadedVal, + SubReg), 0); } @@ -1054,13 +1073,10 @@ SDNode *AArch64DAGToDAGISel::SelectLoad(SDNode *N, unsigned NumVecs, EVT VT = N->getValueType(0); SDValue Chain = N->getOperand(0); - SmallVector<SDValue, 6> Ops; - Ops.push_back(N->getOperand(2)); // Mem operand; - Ops.push_back(Chain); + SDValue Ops[] = {N->getOperand(2), // Mem operand; + Chain}; - std::vector<EVT> ResTys; - ResTys.push_back(MVT::Untyped); - ResTys.push_back(MVT::Other); + const EVT ResTys[] = {MVT::Untyped, MVT::Other}; SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); SDValue SuperReg = SDValue(Ld, 0); @@ -1078,15 +1094,12 @@ SDNode *AArch64DAGToDAGISel::SelectPostLoad(SDNode *N, unsigned NumVecs, EVT VT = N->getValueType(0); SDValue Chain = N->getOperand(0); - SmallVector<SDValue, 6> Ops; - Ops.push_back(N->getOperand(1)); // Mem operand - Ops.push_back(N->getOperand(2)); // Incremental - Ops.push_back(Chain); + SDValue Ops[] = {N->getOperand(1), // Mem operand + N->getOperand(2), // Incremental + Chain}; - std::vector<EVT> ResTys; - ResTys.push_back(MVT::i64); // Type of the write back register - ResTys.push_back(MVT::Untyped); - ResTys.push_back(MVT::Other); + const EVT ResTys[] = {MVT::i64, // Type of the write back register + MVT::Untyped, MVT::Other}; SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); @@ -1117,10 +1130,7 @@ SDNode *AArch64DAGToDAGISel::SelectStore(SDNode *N, unsigned NumVecs, SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs); SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs); - SmallVector<SDValue, 6> Ops; - Ops.push_back(RegSeq); - Ops.push_back(N->getOperand(NumVecs + 2)); - Ops.push_back(N->getOperand(0)); + SDValue Ops[] = {RegSeq, N->getOperand(NumVecs + 2), N->getOperand(0)}; SDNode *St = CurDAG->getMachineNode(Opc, dl, N->getValueType(0), Ops); return St; @@ -1130,25 +1140,24 @@ SDNode *AArch64DAGToDAGISel::SelectPostStore(SDNode *N, unsigned NumVecs, unsigned Opc) { SDLoc dl(N); EVT VT = N->getOperand(2)->getValueType(0); - SmallVector<EVT, 2> ResTys; - ResTys.push_back(MVT::i64); // Type of the write back register - ResTys.push_back(MVT::Other); // Type for the Chain + const EVT ResTys[] = {MVT::i64, // Type of the write back register + MVT::Other}; // Type for the Chain // Form a REG_SEQUENCE to force register allocation. bool Is128Bit = VT.getSizeInBits() == 128; SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs); SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs); - SmallVector<SDValue, 6> Ops; - Ops.push_back(RegSeq); - Ops.push_back(N->getOperand(NumVecs + 1)); // base register - Ops.push_back(N->getOperand(NumVecs + 2)); // Incremental - Ops.push_back(N->getOperand(0)); // Chain + SDValue Ops[] = {RegSeq, + N->getOperand(NumVecs + 1), // base register + N->getOperand(NumVecs + 2), // Incremental + N->getOperand(0)}; // Chain SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); return St; } +namespace { /// WidenVector - Given a value in the V64 register class, produce the /// equivalent value in the V128 register class. class WidenVector { @@ -1169,6 +1178,7 @@ public: return DAG.getTargetInsertSubreg(AArch64::dsub, DL, WideTy, Undef, V64Reg); } }; +} // namespace /// NarrowVector - Given a value in the V128 register class, produce the /// equivalent value in the V64 register class. @@ -1197,18 +1207,13 @@ SDNode *AArch64DAGToDAGISel::SelectLoadLane(SDNode *N, unsigned NumVecs, SDValue RegSeq = createQTuple(Regs); - std::vector<EVT> ResTys; - ResTys.push_back(MVT::Untyped); - ResTys.push_back(MVT::Other); + const EVT ResTys[] = {MVT::Untyped, MVT::Other}; unsigned LaneNo = cast<ConstantSDNode>(N->getOperand(NumVecs + 2))->getZExtValue(); - SmallVector<SDValue, 6> Ops; - Ops.push_back(RegSeq); - Ops.push_back(CurDAG->getTargetConstant(LaneNo, MVT::i64)); - Ops.push_back(N->getOperand(NumVecs + 3)); - Ops.push_back(N->getOperand(0)); + SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64), + N->getOperand(NumVecs + 3), N->getOperand(0)}; SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); SDValue SuperReg = SDValue(Ld, 0); @@ -1242,20 +1247,18 @@ SDNode *AArch64DAGToDAGISel::SelectPostLoadLane(SDNode *N, unsigned NumVecs, SDValue RegSeq = createQTuple(Regs); - std::vector<EVT> ResTys; - ResTys.push_back(MVT::i64); // Type of the write back register - ResTys.push_back(MVT::Untyped); - ResTys.push_back(MVT::Other); + const EVT ResTys[] = {MVT::i64, // Type of the write back register + RegSeq->getValueType(0), MVT::Other}; unsigned LaneNo = cast<ConstantSDNode>(N->getOperand(NumVecs + 1))->getZExtValue(); - SmallVector<SDValue, 6> Ops; - Ops.push_back(RegSeq); - Ops.push_back(CurDAG->getTargetConstant(LaneNo, MVT::i64)); // Lane Number - Ops.push_back(N->getOperand(NumVecs + 2)); // Base register - Ops.push_back(N->getOperand(NumVecs + 3)); // Incremental - Ops.push_back(N->getOperand(0)); + SDValue Ops[] = {RegSeq, + CurDAG->getTargetConstant(LaneNo, dl, + MVT::i64), // Lane Number + N->getOperand(NumVecs + 2), // Base register + N->getOperand(NumVecs + 3), // Incremental + N->getOperand(0)}; SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); // Update uses of the write back register @@ -1303,11 +1306,8 @@ SDNode *AArch64DAGToDAGISel::SelectStoreLane(SDNode *N, unsigned NumVecs, unsigned LaneNo = cast<ConstantSDNode>(N->getOperand(NumVecs + 2))->getZExtValue(); - SmallVector<SDValue, 6> Ops; - Ops.push_back(RegSeq); - Ops.push_back(CurDAG->getTargetConstant(LaneNo, MVT::i64)); - Ops.push_back(N->getOperand(NumVecs + 3)); - Ops.push_back(N->getOperand(0)); + SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64), + N->getOperand(NumVecs + 3), N->getOperand(0)}; SDNode *St = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops); // Transfer memoperands. @@ -1333,19 +1333,16 @@ SDNode *AArch64DAGToDAGISel::SelectPostStoreLane(SDNode *N, unsigned NumVecs, SDValue RegSeq = createQTuple(Regs); - SmallVector<EVT, 2> ResTys; - ResTys.push_back(MVT::i64); // Type of the write back register - ResTys.push_back(MVT::Other); + const EVT ResTys[] = {MVT::i64, // Type of the write back register + MVT::Other}; unsigned LaneNo = cast<ConstantSDNode>(N->getOperand(NumVecs + 1))->getZExtValue(); - SmallVector<SDValue, 6> Ops; - Ops.push_back(RegSeq); - Ops.push_back(CurDAG->getTargetConstant(LaneNo, MVT::i64)); - Ops.push_back(N->getOperand(NumVecs + 2)); // Base Register - Ops.push_back(N->getOperand(NumVecs + 3)); // Incremental - Ops.push_back(N->getOperand(0)); + SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64), + N->getOperand(NumVecs + 2), // Base Register + N->getOperand(NumVecs + 3), // Incremental + N->getOperand(0)}; SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); // Transfer memoperands. @@ -1424,12 +1421,17 @@ static bool isBitfieldExtractOpFromAnd(SelectionDAG *CurDAG, SDNode *N, } else return false; - assert((BiggerPattern || (Srl_imm > 0 && Srl_imm < VT.getSizeInBits())) && - "bad amount in shift node!"); + // Bail out on large immediates. This happens when no proper + // combining/constant folding was performed. + if (!BiggerPattern && (Srl_imm <= 0 || Srl_imm >= VT.getSizeInBits())) { + DEBUG((dbgs() << N + << ": Found large shift immediate, this should not happen\n")); + return false; + } LSB = Srl_imm; - MSB = Srl_imm + (VT == MVT::i32 ? CountTrailingOnes_32(And_imm) - : CountTrailingOnes_64(And_imm)) - + MSB = Srl_imm + (VT == MVT::i32 ? countTrailingOnes<uint32_t>(And_imm) + : countTrailingOnes<uint64_t>(And_imm)) - 1; if (ClampMSB) // Since we're moving the extend before the right shift operation, we need @@ -1473,7 +1475,7 @@ static bool isSeveralBitsExtractOpFromShr(SDNode *N, unsigned &Opc, return false; // Check whether we really have several bits extract here. - unsigned BitWide = 64 - CountLeadingOnes_64(~(And_mask >> Srl_imm)); + unsigned BitWide = 64 - countLeadingOnes(~(And_mask >> Srl_imm)); if (BitWide && isMask_64(And_mask >> Srl_imm)) { if (N->getValueType(0) == MVT::i32) Opc = AArch64::UBFMWri; @@ -1529,7 +1531,14 @@ static bool isBitfieldExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0, } else return false; - assert(Shl_imm < VT.getSizeInBits() && "bad amount in shift node!"); + // Missing combines/constant folding may have left us with strange + // constants. + if (Shl_imm >= VT.getSizeInBits()) { + DEBUG((dbgs() << N + << ": Found large shift immediate, this should not happen\n")); + return false; + } + uint64_t Srl_imm = 0; if (!isIntImmediate(N->getOperand(1), Srl_imm)) return false; @@ -1596,23 +1605,24 @@ SDNode *AArch64DAGToDAGISel::SelectBitfieldExtractOp(SDNode *N) { return nullptr; EVT VT = N->getValueType(0); + SDLoc dl(N); // If the bit extract operation is 64bit but the original type is 32bit, we // need to add one EXTRACT_SUBREG. if ((Opc == AArch64::SBFMXri || Opc == AArch64::UBFMXri) && VT == MVT::i32) { - SDValue Ops64[] = {Opd0, CurDAG->getTargetConstant(LSB, MVT::i64), - CurDAG->getTargetConstant(MSB, MVT::i64)}; + SDValue Ops64[] = {Opd0, CurDAG->getTargetConstant(LSB, dl, MVT::i64), + CurDAG->getTargetConstant(MSB, dl, MVT::i64)}; - SDNode *BFM = CurDAG->getMachineNode(Opc, SDLoc(N), MVT::i64, Ops64); - SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, MVT::i32); + SDNode *BFM = CurDAG->getMachineNode(Opc, dl, MVT::i64, Ops64); + SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32); MachineSDNode *Node = - CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, SDLoc(N), MVT::i32, + CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::i32, SDValue(BFM, 0), SubReg); return Node; } - SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(LSB, VT), - CurDAG->getTargetConstant(MSB, VT)}; + SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(LSB, dl, VT), + CurDAG->getTargetConstant(MSB, dl, VT)}; return CurDAG->SelectNodeTo(N, Opc, VT, Ops); } @@ -1816,6 +1826,7 @@ static SDValue getLeftShift(SelectionDAG *CurDAG, SDValue Op, int ShlAmount) { return Op; EVT VT = Op.getValueType(); + SDLoc dl(Op); unsigned BitWidth = VT.getSizeInBits(); unsigned UBFMOpc = BitWidth == 32 ? AArch64::UBFMWri : AArch64::UBFMXri; @@ -1823,16 +1834,16 @@ static SDValue getLeftShift(SelectionDAG *CurDAG, SDValue Op, int ShlAmount) { if (ShlAmount > 0) { // LSL wD, wN, #Amt == UBFM wD, wN, #32-Amt, #31-Amt ShiftNode = CurDAG->getMachineNode( - UBFMOpc, SDLoc(Op), VT, Op, - CurDAG->getTargetConstant(BitWidth - ShlAmount, VT), - CurDAG->getTargetConstant(BitWidth - 1 - ShlAmount, VT)); + UBFMOpc, dl, VT, Op, + CurDAG->getTargetConstant(BitWidth - ShlAmount, dl, VT), + CurDAG->getTargetConstant(BitWidth - 1 - ShlAmount, dl, VT)); } else { // LSR wD, wN, #Amt == UBFM wD, wN, #Amt, #32-1 assert(ShlAmount < 0 && "expected right shift"); int ShrAmount = -ShlAmount; ShiftNode = CurDAG->getMachineNode( - UBFMOpc, SDLoc(Op), VT, Op, CurDAG->getTargetConstant(ShrAmount, VT), - CurDAG->getTargetConstant(BitWidth - 1, VT)); + UBFMOpc, dl, VT, Op, CurDAG->getTargetConstant(ShrAmount, dl, VT), + CurDAG->getTargetConstant(BitWidth - 1, dl, VT)); } return SDValue(ShiftNode, 0); @@ -1872,7 +1883,7 @@ static bool isBitfieldPositioningOp(SelectionDAG *CurDAG, SDValue Op, return false; ShiftAmount = countTrailingZeros(NonZeroBits); - MaskWidth = CountTrailingOnes_64(NonZeroBits >> ShiftAmount); + MaskWidth = countTrailingOnes(NonZeroBits >> ShiftAmount); // BFI encompasses sufficiently many nodes that it's worth inserting an extra // LSL/LSR if the mask in NonZeroBits doesn't quite match up with the ISD::SHL @@ -1997,10 +2008,11 @@ SDNode *AArch64DAGToDAGISel::SelectBitfieldInsertOp(SDNode *N) { return nullptr; EVT VT = N->getValueType(0); + SDLoc dl(N); SDValue Ops[] = { Opd0, Opd1, - CurDAG->getTargetConstant(LSB, VT), - CurDAG->getTargetConstant(MSB, VT) }; + CurDAG->getTargetConstant(LSB, dl, VT), + CurDAG->getTargetConstant(MSB, dl, VT) }; return CurDAG->SelectNodeTo(N, Opc, VT, Ops); } @@ -2098,7 +2110,7 @@ AArch64DAGToDAGISel::SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos, // finding FBits, but it must still be in range. if (FBits == 0 || FBits > RegWidth) return false; - FixedPos = CurDAG->getTargetConstant(FBits, MVT::i32); + FixedPos = CurDAG->getTargetConstant(FBits, SDLoc(N), MVT::i32); return true; } @@ -2213,8 +2225,9 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) { unsigned Shifter = AArch64_AM::getShifterImm(AArch64_AM::LSL, 0); const TargetLowering *TLI = getTargetLowering(); SDValue TFI = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy()); - SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, MVT::i32), - CurDAG->getTargetConstant(Shifter, MVT::i32) }; + SDLoc DL(Node); + SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, DL, MVT::i32), + CurDAG->getTargetConstant(Shifter, DL, MVT::i32) }; return CurDAG->SelectNodeTo(Node, AArch64::ADDXri, MVT::i64, Ops); } case ISD::INTRINSIC_W_CHAIN: { @@ -2250,11 +2263,7 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) { SDValue MemAddr = Node->getOperand(4); // Place arguments in the right order. - SmallVector<SDValue, 7> Ops; - Ops.push_back(ValLo); - Ops.push_back(ValHi); - Ops.push_back(MemAddr); - Ops.push_back(Chain); + SDValue Ops[] = {ValLo, ValHi, MemAddr, Chain}; SDNode *St = CurDAG->getMachineNode(Op, DL, MVT::i32, MVT::Other, Ops); // Transfer memoperands. diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp index 6458d56..e6108c3 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -25,6 +25,7 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/IR/Function.h" +#include "llvm/IR/GetElementPtrTypeIterator.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/Type.h" #include "llvm/Support/CommandLine.h" @@ -75,10 +76,9 @@ cl::opt<bool> EnableAArch64ELFLocalDynamicTLSGeneration( cl::desc("Allow AArch64 Local Dynamic TLS code generation"), cl::init(false)); - -AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM) - : TargetLowering(TM) { - Subtarget = &TM.getSubtarget<AArch64Subtarget>(); +AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, + const AArch64Subtarget &STI) + : TargetLowering(TM), Subtarget(&STI) { // AArch64 doesn't have comparisons which set GPRs or setcc instructions, so // we have to make something up. Arbitrarily, choose ZeroOrOne. @@ -120,7 +120,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM) } // Compute derived properties from the register classes - computeRegisterProperties(); + computeRegisterProperties(Subtarget->getRegisterInfo()); // Provide all sorts of operation actions setOperationAction(ISD::GlobalAddress, MVT::i64, Custom); @@ -282,14 +282,39 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM) setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom); setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom); - // f16 is storage-only, so we promote operations to f32 if we know this is - // valid, and ignore them otherwise. The operations not mentioned here will - // fail to select, but this is not a major problem as no source language - // should be emitting native f16 operations yet. - setOperationAction(ISD::FADD, MVT::f16, Promote); - setOperationAction(ISD::FDIV, MVT::f16, Promote); - setOperationAction(ISD::FMUL, MVT::f16, Promote); - setOperationAction(ISD::FSUB, MVT::f16, Promote); + // f16 is a storage-only type, always promote it to f32. + setOperationAction(ISD::SETCC, MVT::f16, Promote); + setOperationAction(ISD::BR_CC, MVT::f16, Promote); + setOperationAction(ISD::SELECT_CC, MVT::f16, Promote); + setOperationAction(ISD::SELECT, MVT::f16, Promote); + setOperationAction(ISD::FADD, MVT::f16, Promote); + setOperationAction(ISD::FSUB, MVT::f16, Promote); + setOperationAction(ISD::FMUL, MVT::f16, Promote); + setOperationAction(ISD::FDIV, MVT::f16, Promote); + setOperationAction(ISD::FREM, MVT::f16, Promote); + setOperationAction(ISD::FMA, MVT::f16, Promote); + setOperationAction(ISD::FNEG, MVT::f16, Promote); + setOperationAction(ISD::FABS, MVT::f16, Promote); + setOperationAction(ISD::FCEIL, MVT::f16, Promote); + setOperationAction(ISD::FCOPYSIGN, MVT::f16, Promote); + setOperationAction(ISD::FCOS, MVT::f16, Promote); + setOperationAction(ISD::FFLOOR, MVT::f16, Promote); + setOperationAction(ISD::FNEARBYINT, MVT::f16, Promote); + setOperationAction(ISD::FPOW, MVT::f16, Promote); + setOperationAction(ISD::FPOWI, MVT::f16, Promote); + setOperationAction(ISD::FRINT, MVT::f16, Promote); + setOperationAction(ISD::FSIN, MVT::f16, Promote); + setOperationAction(ISD::FSINCOS, MVT::f16, Promote); + setOperationAction(ISD::FSQRT, MVT::f16, Promote); + setOperationAction(ISD::FEXP, MVT::f16, Promote); + setOperationAction(ISD::FEXP2, MVT::f16, Promote); + setOperationAction(ISD::FLOG, MVT::f16, Promote); + setOperationAction(ISD::FLOG2, MVT::f16, Promote); + setOperationAction(ISD::FLOG10, MVT::f16, Promote); + setOperationAction(ISD::FROUND, MVT::f16, Promote); + setOperationAction(ISD::FTRUNC, MVT::f16, Promote); + setOperationAction(ISD::FMINNUM, MVT::f16, Promote); + setOperationAction(ISD::FMAXNUM, MVT::f16, Promote); // v4f16 is also a storage-only type, so promote it to v4f32 when that is // known to be safe. @@ -371,9 +396,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM) setOperationAction(ISD::FLOG10, MVT::v8f16, Expand); // AArch64 has implementations of a lot of rounding-like FP operations. - static MVT RoundingTypes[] = { MVT::f32, MVT::f64}; - for (unsigned I = 0; I < array_lengthof(RoundingTypes); ++I) { - MVT Ty = RoundingTypes[I]; + for (MVT Ty : {MVT::f32, MVT::f64}) { setOperationAction(ISD::FFLOOR, Ty, Legal); setOperationAction(ISD::FNEARBYINT, Ty, Legal); setOperationAction(ISD::FCEIL, Ty, Legal); @@ -469,6 +492,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM) setTargetDAGCombine(ISD::SELECT); setTargetDAGCombine(ISD::VSELECT); + setTargetDAGCombine(ISD::SELECT_CC); setTargetDAGCombine(ISD::INTRINSIC_VOID); setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN); @@ -484,6 +508,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM) // Enable TBZ/TBNZ MaskAndBranchFoldingIsLegal = true; + EnableExtLdPromotion = true; setMinFunctionAlignment(2); @@ -534,11 +559,21 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM) setOperationAction(ISD::SINT_TO_FP, MVT::v4i8, Promote); setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Promote); setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Promote); + // i8 and i16 vector elements also need promotion to i32 for v8i8 or v8i16 + // -> v8f16 conversions. + setOperationAction(ISD::SINT_TO_FP, MVT::v8i8, Promote); + setOperationAction(ISD::UINT_TO_FP, MVT::v8i8, Promote); + setOperationAction(ISD::SINT_TO_FP, MVT::v8i16, Promote); + setOperationAction(ISD::UINT_TO_FP, MVT::v8i16, Promote); // Similarly, there is no direct i32 -> f64 vector conversion instruction. setOperationAction(ISD::SINT_TO_FP, MVT::v2i32, Custom); setOperationAction(ISD::UINT_TO_FP, MVT::v2i32, Custom); setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Custom); setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Custom); + // Or, direct i32 -> f16 vector conversion. Set it so custom, so the + // conversion happens in two steps: v4i32 -> v4f32 -> v4f16 + setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Custom); + setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Custom); // AArch64 doesn't have MUL.2d: setOperationAction(ISD::MUL, MVT::v2i64, Expand); @@ -570,9 +605,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM) } // AArch64 has implementations of a lot of rounding-like FP operations. - static MVT RoundingVecTypes[] = {MVT::v2f32, MVT::v4f32, MVT::v2f64 }; - for (unsigned I = 0; I < array_lengthof(RoundingVecTypes); ++I) { - MVT Ty = RoundingVecTypes[I]; + for (MVT Ty : {MVT::v2f32, MVT::v4f32, MVT::v2f64}) { setOperationAction(ISD::FFLOOR, Ty, Legal); setOperationAction(ISD::FNEARBYINT, Ty, Legal); setOperationAction(ISD::FCEIL, Ty, Legal); @@ -647,6 +680,12 @@ void AArch64TargetLowering::addTypeForNEON(EVT VT, EVT PromotedBitwiseVT) { setOperationAction(ISD::FP_TO_SINT, VT.getSimpleVT(), Custom); setOperationAction(ISD::FP_TO_UINT, VT.getSimpleVT(), Custom); + // [SU][MIN|MAX] are available for all NEON types apart from i64. + if (!VT.isFloatingPoint() && + VT.getSimpleVT() != MVT::v2i64 && VT.getSimpleVT() != MVT::v1i64) + for (unsigned Opcode : {ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX}) + setOperationAction(Opcode, VT.getSimpleVT(), Legal); + if (Subtarget->isLittleEndian()) { for (unsigned im = (unsigned)ISD::PRE_INC; im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) { @@ -739,13 +778,6 @@ MVT AArch64TargetLowering::getScalarShiftAmountTy(EVT LHSTy) const { return MVT::i64; } -unsigned AArch64TargetLowering::getMaximalGlobalOffset() const { - // FIXME: On AArch64, this depends on the type. - // Basically, the addressable offsets are up to 4095 * Ty.getSizeInBytes(). - // and the offset has to be a multiple of the related size in bytes. - return 4095; -} - FastISel * AArch64TargetLowering::createFastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo) const { @@ -753,9 +785,8 @@ AArch64TargetLowering::createFastISel(FunctionLoweringInfo &funcInfo, } const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const { - switch (Opcode) { - default: - return nullptr; + switch ((AArch64ISD::NodeType)Opcode) { + case AArch64ISD::FIRST_NUMBER: break; case AArch64ISD::CALL: return "AArch64ISD::CALL"; case AArch64ISD::ADRP: return "AArch64ISD::ADRP"; case AArch64ISD::ADDlow: return "AArch64ISD::ADDlow"; @@ -827,6 +858,12 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const { case AArch64ISD::FCMGTz: return "AArch64ISD::FCMGTz"; case AArch64ISD::FCMLEz: return "AArch64ISD::FCMLEz"; case AArch64ISD::FCMLTz: return "AArch64ISD::FCMLTz"; + case AArch64ISD::SADDV: return "AArch64ISD::SADDV"; + case AArch64ISD::UADDV: return "AArch64ISD::UADDV"; + case AArch64ISD::SMINV: return "AArch64ISD::SMINV"; + case AArch64ISD::UMINV: return "AArch64ISD::UMINV"; + case AArch64ISD::SMAXV: return "AArch64ISD::SMAXV"; + case AArch64ISD::UMAXV: return "AArch64ISD::UMAXV"; case AArch64ISD::NOT: return "AArch64ISD::NOT"; case AArch64ISD::BIT: return "AArch64ISD::BIT"; case AArch64ISD::CBZ: return "AArch64ISD::CBZ"; @@ -834,6 +871,7 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const { case AArch64ISD::TBZ: return "AArch64ISD::TBZ"; case AArch64ISD::TBNZ: return "AArch64ISD::TBNZ"; case AArch64ISD::TC_RETURN: return "AArch64ISD::TC_RETURN"; + case AArch64ISD::PREFETCH: return "AArch64ISD::PREFETCH"; case AArch64ISD::SITOF: return "AArch64ISD::SITOF"; case AArch64ISD::UITOF: return "AArch64ISD::UITOF"; case AArch64ISD::NVCAST: return "AArch64ISD::NVCAST"; @@ -869,6 +907,7 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const { case AArch64ISD::SMULL: return "AArch64ISD::SMULL"; case AArch64ISD::UMULL: return "AArch64ISD::UMULL"; } + return nullptr; } MachineBasicBlock * @@ -886,9 +925,8 @@ AArch64TargetLowering::EmitF128CSEL(MachineInstr *MI, // EndBB: // Dest = PHI [IfTrue, TrueBB], [IfFalse, OrigBB] - const TargetInstrInfo *TII = - getTargetMachine().getSubtargetImpl()->getInstrInfo(); MachineFunction *MF = MBB->getParent(); + const TargetInstrInfo *TII = Subtarget->getInstrInfo(); const BasicBlock *LLVM_BB = MBB->getBasicBlock(); DebugLoc DL = MI->getDebugLoc(); MachineFunction::iterator It = MBB; @@ -1151,7 +1189,7 @@ static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, isLegalArithImmed(C - 1ULL))) { CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT; C = (VT == MVT::i32) ? (uint32_t)(C - 1) : C - 1; - RHS = DAG.getConstant(C, VT); + RHS = DAG.getConstant(C, dl, VT); } break; case ISD::SETULT: @@ -1161,7 +1199,7 @@ static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, (VT == MVT::i64 && C != 0ULL && isLegalArithImmed(C - 1ULL))) { CC = (CC == ISD::SETULT) ? ISD::SETULE : ISD::SETUGT; C = (VT == MVT::i32) ? (uint32_t)(C - 1) : C - 1; - RHS = DAG.getConstant(C, VT); + RHS = DAG.getConstant(C, dl, VT); } break; case ISD::SETLE: @@ -1172,7 +1210,7 @@ static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, isLegalArithImmed(C + 1ULL))) { CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE; C = (VT == MVT::i32) ? (uint32_t)(C + 1) : C + 1; - RHS = DAG.getConstant(C, VT); + RHS = DAG.getConstant(C, dl, VT); } break; case ISD::SETULE: @@ -1183,7 +1221,7 @@ static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, isLegalArithImmed(C + 1ULL))) { CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE; C = (VT == MVT::i32) ? (uint32_t)(C + 1) : C + 1; - RHS = DAG.getConstant(C, VT); + RHS = DAG.getConstant(C, dl, VT); } break; } @@ -1217,10 +1255,11 @@ static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, LHS.getValueType(), LHS, DAG.getValueType(MVT::i16)); Cmp = emitComparison(SExt, - DAG.getConstant(ValueofRHS, RHS.getValueType()), + DAG.getConstant(ValueofRHS, dl, + RHS.getValueType()), CC, dl, DAG); AArch64CC = changeIntCCToAArch64CC(CC); - AArch64cc = DAG.getConstant(AArch64CC, MVT::i32); + AArch64cc = DAG.getConstant(AArch64CC, dl, MVT::i32); return Cmp; } } @@ -1228,7 +1267,7 @@ static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, } Cmp = emitComparison(LHS, RHS, CC, dl, DAG); AArch64CC = changeIntCCToAArch64CC(CC); - AArch64cc = DAG.getConstant(AArch64CC, MVT::i32); + AArch64cc = DAG.getConstant(AArch64CC, dl, MVT::i32); return Cmp; } @@ -1264,7 +1303,7 @@ getAArch64XALUOOp(AArch64CC::CondCode &CC, SDValue Op, SelectionDAG &DAG) { case ISD::SMULO: case ISD::UMULO: { CC = AArch64CC::NE; - bool IsSigned = (Op.getOpcode() == ISD::SMULO) ? true : false; + bool IsSigned = Op.getOpcode() == ISD::SMULO; if (Op.getValueType() == MVT::i32) { unsigned ExtendOpc = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; // For a 32 bit multiply with overflow check we want the instruction @@ -1275,7 +1314,7 @@ getAArch64XALUOOp(AArch64CC::CondCode &CC, SDValue Op, SelectionDAG &DAG) { RHS = DAG.getNode(ExtendOpc, DL, MVT::i64, RHS); SDValue Mul = DAG.getNode(ISD::MUL, DL, MVT::i64, LHS, RHS); SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Mul, - DAG.getConstant(0, MVT::i64)); + DAG.getConstant(0, DL, MVT::i64)); // On AArch64 the upper 32 bits are always zero extended for a 32 bit // operation. We need to clear out the upper 32 bits, because we used a // widening multiply that wrote all 64 bits. In the end this should be a @@ -1288,10 +1327,10 @@ getAArch64XALUOOp(AArch64CC::CondCode &CC, SDValue Op, SelectionDAG &DAG) { // check we have to arithmetic shift right the 32nd bit of the result by // 31 bits. Then we compare the result to the upper 32 bits. SDValue UpperBits = DAG.getNode(ISD::SRL, DL, MVT::i64, Add, - DAG.getConstant(32, MVT::i64)); + DAG.getConstant(32, DL, MVT::i64)); UpperBits = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, UpperBits); SDValue LowerBits = DAG.getNode(ISD::SRA, DL, MVT::i32, Value, - DAG.getConstant(31, MVT::i64)); + DAG.getConstant(31, DL, MVT::i64)); // It is important that LowerBits is last, otherwise the arithmetic // shift will not be folded into the compare (SUBS). SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32); @@ -1304,10 +1343,11 @@ getAArch64XALUOOp(AArch64CC::CondCode &CC, SDValue Op, SelectionDAG &DAG) { // pattern: // (i64 AArch64ISD::SUBS i64 0, (i64 srl i64 %Mul, i64 32) SDValue UpperBits = DAG.getNode(ISD::SRL, DL, MVT::i64, Mul, - DAG.getConstant(32, MVT::i64)); + DAG.getConstant(32, DL, MVT::i64)); SDVTList VTs = DAG.getVTList(MVT::i64, MVT::i32); Overflow = - DAG.getNode(AArch64ISD::SUBS, DL, VTs, DAG.getConstant(0, MVT::i64), + DAG.getNode(AArch64ISD::SUBS, DL, VTs, + DAG.getConstant(0, DL, MVT::i64), UpperBits).getValue(1); } break; @@ -1318,7 +1358,7 @@ getAArch64XALUOOp(AArch64CC::CondCode &CC, SDValue Op, SelectionDAG &DAG) { if (IsSigned) { SDValue UpperBits = DAG.getNode(ISD::MULHS, DL, MVT::i64, LHS, RHS); SDValue LowerBits = DAG.getNode(ISD::SRA, DL, MVT::i64, Value, - DAG.getConstant(63, MVT::i64)); + DAG.getConstant(63, DL, MVT::i64)); // It is important that LowerBits is last, otherwise the arithmetic // shift will not be folded into the compare (SUBS). SDVTList VTs = DAG.getVTList(MVT::i64, MVT::i32); @@ -1328,7 +1368,8 @@ getAArch64XALUOOp(AArch64CC::CondCode &CC, SDValue Op, SelectionDAG &DAG) { SDValue UpperBits = DAG.getNode(ISD::MULHU, DL, MVT::i64, LHS, RHS); SDVTList VTs = DAG.getVTList(MVT::i64, MVT::i32); Overflow = - DAG.getNode(AArch64ISD::SUBS, DL, VTs, DAG.getConstant(0, MVT::i64), + DAG.getNode(AArch64ISD::SUBS, DL, VTs, + DAG.getConstant(0, DL, MVT::i64), UpperBits).getValue(1); } break; @@ -1347,10 +1388,7 @@ getAArch64XALUOOp(AArch64CC::CondCode &CC, SDValue Op, SelectionDAG &DAG) { SDValue AArch64TargetLowering::LowerF128Call(SDValue Op, SelectionDAG &DAG, RTLIB::Libcall Call) const { - SmallVector<SDValue, 2> Ops; - for (unsigned i = 0, e = Op->getNumOperands(); i != e; ++i) - Ops.push_back(Op.getOperand(i)); - + SmallVector<SDValue, 2> Ops(Op->op_begin(), Op->op_end()); return makeLibCall(DAG, Call, MVT::f128, &Ops[0], Ops.size(), false, SDLoc(Op)).first; } @@ -1405,7 +1443,7 @@ static SDValue LowerXOR(SDValue Op, SelectionDAG &DAG) { FVal = Other; TVal = DAG.getNode(ISD::XOR, dl, Other.getValueType(), Other, - DAG.getConstant(-1ULL, Other.getValueType())); + DAG.getConstant(-1ULL, dl, Other.getValueType())); return DAG.getNode(AArch64ISD::CSEL, dl, Sel.getValueType(), FVal, TVal, CCVal, Cmp); @@ -1455,24 +1493,25 @@ static SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) { if (!DAG.getTargetLoweringInfo().isTypeLegal(Op.getValueType())) return SDValue(); + SDLoc dl(Op); AArch64CC::CondCode CC; // The actual operation that sets the overflow or carry flag. SDValue Value, Overflow; std::tie(Value, Overflow) = getAArch64XALUOOp(CC, Op, DAG); // We use 0 and 1 as false and true values. - SDValue TVal = DAG.getConstant(1, MVT::i32); - SDValue FVal = DAG.getConstant(0, MVT::i32); + SDValue TVal = DAG.getConstant(1, dl, MVT::i32); + SDValue FVal = DAG.getConstant(0, dl, MVT::i32); // We use an inverted condition, because the conditional select is inverted // too. This will allow it to be selected to a single instruction: // CSINC Wd, WZR, WZR, invert(cond). - SDValue CCVal = DAG.getConstant(getInvertedCondCode(CC), MVT::i32); - Overflow = DAG.getNode(AArch64ISD::CSEL, SDLoc(Op), MVT::i32, FVal, TVal, + SDValue CCVal = DAG.getConstant(getInvertedCondCode(CC), dl, MVT::i32); + Overflow = DAG.getNode(AArch64ISD::CSEL, dl, MVT::i32, FVal, TVal, CCVal, Overflow); SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32); - return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op), VTs, Value, Overflow); + return DAG.getNode(ISD::MERGE_VALUES, dl, VTs, Value, Overflow); } // Prefetch operands are: @@ -1503,7 +1542,7 @@ static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG) { (Locality << 1) | // Cache level bits (unsigned)IsStream; // Stream bit return DAG.getNode(AArch64ISD::PREFETCH, DL, MVT::Other, Op.getOperand(0), - DAG.getConstant(PrfOp, MVT::i32), Op.getOperand(1)); + DAG.getConstant(PrfOp, DL, MVT::i32), Op.getOperand(1)); } SDValue AArch64TargetLowering::LowerFP_EXTEND(SDValue Op, @@ -1567,6 +1606,14 @@ SDValue AArch64TargetLowering::LowerFP_TO_INT(SDValue Op, if (Op.getOperand(0).getValueType().isVector()) return LowerVectorFP_TO_INT(Op, DAG); + // f16 conversions are promoted to f32. + if (Op.getOperand(0).getValueType() == MVT::f16) { + SDLoc dl(Op); + return DAG.getNode( + Op.getOpcode(), dl, Op.getValueType(), + DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, Op.getOperand(0))); + } + if (Op.getOperand(0).getValueType() != MVT::f128) { // It's legal except when f128 is involved return Op; @@ -1578,10 +1625,7 @@ SDValue AArch64TargetLowering::LowerFP_TO_INT(SDValue Op, else LC = RTLIB::getFPTOUINT(Op.getOperand(0).getValueType(), Op.getValueType()); - SmallVector<SDValue, 2> Ops; - for (unsigned i = 0, e = Op->getNumOperands(); i != e; ++i) - Ops.push_back(Op.getOperand(i)); - + SmallVector<SDValue, 2> Ops(Op->op_begin(), Op->op_end()); return makeLibCall(DAG, LC, Op.getValueType(), &Ops[0], Ops.size(), false, SDLoc(Op)).first; } @@ -1600,7 +1644,7 @@ static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) { MVT::getVectorVT(MVT::getFloatingPointVT(InVT.getScalarSizeInBits()), InVT.getVectorNumElements()); In = DAG.getNode(Op.getOpcode(), dl, CastVT, In); - return DAG.getNode(ISD::FP_ROUND, dl, VT, In, DAG.getIntPtrConstant(0)); + return DAG.getNode(ISD::FP_ROUND, dl, VT, In, DAG.getIntPtrConstant(0, dl)); } if (VT.getSizeInBits() > InVT.getSizeInBits()) { @@ -1619,6 +1663,15 @@ SDValue AArch64TargetLowering::LowerINT_TO_FP(SDValue Op, if (Op.getValueType().isVector()) return LowerVectorINT_TO_FP(Op, DAG); + // f16 conversions are promoted to f32. + if (Op.getValueType() == MVT::f16) { + SDLoc dl(Op); + return DAG.getNode( + ISD::FP_ROUND, dl, MVT::f16, + DAG.getNode(Op.getOpcode(), dl, MVT::f32, Op.getOperand(0)), + DAG.getIntPtrConstant(0, dl)); + } + // i128 conversions are libcalls. if (Op.getOperand(0).getValueType() == MVT::i128) return SDValue(); @@ -1679,7 +1732,7 @@ static SDValue LowerBITCAST(SDValue Op, SelectionDAG &DAG) { Op = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Op); return SDValue( DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, MVT::f16, Op, - DAG.getTargetConstant(AArch64::hsub, MVT::i32)), + DAG.getTargetConstant(AArch64::hsub, DL, MVT::i32)), 0); } @@ -1753,6 +1806,7 @@ static SDValue skipExtensionForVectorMULL(SDNode *N, SelectionDAG &DAG) { assert(N->getOpcode() == ISD::BUILD_VECTOR && "expected BUILD_VECTOR"); EVT VT = N->getValueType(0); + SDLoc dl(N); unsigned EltSize = VT.getVectorElementType().getSizeInBits() / 2; unsigned NumElts = VT.getVectorNumElements(); MVT TruncVT = MVT::getIntegerVT(EltSize); @@ -1762,9 +1816,9 @@ static SDValue skipExtensionForVectorMULL(SDNode *N, SelectionDAG &DAG) { const APInt &CInt = C->getAPIntValue(); // Element types smaller than 32 bits are not legal, so use i32 elements. // The values are implicitly truncated so sext vs. zext doesn't matter. - Ops.push_back(DAG.getConstant(CInt.zextOrTrunc(32), MVT::i32)); + Ops.push_back(DAG.getConstant(CInt.zextOrTrunc(32), dl, MVT::i32)); } - return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), + return DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::getVectorVT(TruncVT, NumElts), Ops); } @@ -2219,8 +2273,7 @@ void AArch64TargetLowering::saveVarArgRegisters(CCState &CCInfo, AArch64::X3, AArch64::X4, AArch64::X5, AArch64::X6, AArch64::X7 }; static const unsigned NumGPRArgRegs = array_lengthof(GPRArgRegs); - unsigned FirstVariadicGPR = - CCInfo.getFirstUnallocated(GPRArgRegs, NumGPRArgRegs); + unsigned FirstVariadicGPR = CCInfo.getFirstUnallocated(GPRArgRegs); unsigned GPRSaveSize = 8 * (NumGPRArgRegs - FirstVariadicGPR); int GPRIdx = 0; @@ -2237,7 +2290,7 @@ void AArch64TargetLowering::saveVarArgRegisters(CCState &CCInfo, MachinePointerInfo::getStack(i * 8), false, false, 0); MemOps.push_back(Store); FIN = DAG.getNode(ISD::ADD, DL, getPointerTy(), FIN, - DAG.getConstant(8, getPointerTy())); + DAG.getConstant(8, DL, getPointerTy())); } } FuncInfo->setVarArgsGPRIndex(GPRIdx); @@ -2248,8 +2301,7 @@ void AArch64TargetLowering::saveVarArgRegisters(CCState &CCInfo, AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3, AArch64::Q4, AArch64::Q5, AArch64::Q6, AArch64::Q7}; static const unsigned NumFPRArgRegs = array_lengthof(FPRArgRegs); - unsigned FirstVariadicFPR = - CCInfo.getFirstUnallocated(FPRArgRegs, NumFPRArgRegs); + unsigned FirstVariadicFPR = CCInfo.getFirstUnallocated(FPRArgRegs); unsigned FPRSaveSize = 16 * (NumFPRArgRegs - FirstVariadicFPR); int FPRIdx = 0; @@ -2267,7 +2319,7 @@ void AArch64TargetLowering::saveVarArgRegisters(CCState &CCInfo, MachinePointerInfo::getStack(i * 16), false, false, 0); MemOps.push_back(Store); FIN = DAG.getNode(ISD::ADD, DL, getPointerTy(), FIN, - DAG.getConstant(16, getPointerTy())); + DAG.getConstant(16, DL, getPointerTy())); } } FuncInfo->setVarArgsFPRIndex(FPRIdx); @@ -2619,8 +2671,9 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI, // Adjust the stack pointer for the new arguments... // These operations are automatically eliminated by the prolog/epilog pass if (!IsSibCall) - Chain = - DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true), DL); + Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, DL, + true), + DL); SDValue StackPtr = DAG.getCopyFromReg(Chain, DL, AArch64::SP, getPointerTy()); @@ -2690,7 +2743,7 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI, } unsigned LocMemOffset = VA.getLocMemOffset(); int32_t Offset = LocMemOffset + BEAlign; - SDValue PtrOff = DAG.getIntPtrConstant(Offset); + SDValue PtrOff = DAG.getIntPtrConstant(Offset, DL); PtrOff = DAG.getNode(ISD::ADD, DL, getPointerTy(), StackPtr, PtrOff); if (IsTailCall) { @@ -2705,7 +2758,7 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI, // clobbered. Chain = addTokenForArgument(Chain, DAG, MF.getFrameInfo(), FI); } else { - SDValue PtrOff = DAG.getIntPtrConstant(Offset); + SDValue PtrOff = DAG.getIntPtrConstant(Offset, DL); DstAddr = DAG.getNode(ISD::ADD, DL, getPointerTy(), StackPtr, PtrOff); DstInfo = MachinePointerInfo::getStack(LocMemOffset); @@ -2713,11 +2766,12 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI, if (Outs[i].Flags.isByVal()) { SDValue SizeNode = - DAG.getConstant(Outs[i].Flags.getByValSize(), MVT::i64); + DAG.getConstant(Outs[i].Flags.getByValSize(), DL, MVT::i64); SDValue Cpy = DAG.getMemcpy( Chain, DL, DstAddr, Arg, SizeNode, Outs[i].Flags.getByValAlign(), - /*isVol = */ false, - /*AlwaysInline = */ false, DstInfo, MachinePointerInfo()); + /*isVol = */ false, /*AlwaysInline = */ false, + /*isTailCall = */ false, + DstInfo, MachinePointerInfo()); MemOpChains.push_back(Cpy); } else { @@ -2782,8 +2836,8 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI, // we've carefully laid out the parameters so that when sp is reset they'll be // in the correct location. if (IsTailCall && !IsSibCall) { - Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true), - DAG.getIntPtrConstant(0, true), InFlag, DL); + Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, DL, true), + DAG.getIntPtrConstant(0, DL, true), InFlag, DL); InFlag = Chain.getValue(1); } @@ -2795,7 +2849,7 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI, // Each tail call may have to adjust the stack by a different amount, so // this information must travel along with the operation for eventual // consumption by emitEpilogue. - Ops.push_back(DAG.getTargetConstant(FPDiff, MVT::i32)); + Ops.push_back(DAG.getTargetConstant(FPDiff, DL, MVT::i32)); } // Add argument registers to the end of the list so that they are known live @@ -2806,19 +2860,16 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI, // Add a register mask operand representing the call-preserved registers. const uint32_t *Mask; - const TargetRegisterInfo *TRI = - getTargetMachine().getSubtargetImpl()->getRegisterInfo(); - const AArch64RegisterInfo *ARI = - static_cast<const AArch64RegisterInfo *>(TRI); + const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo(); if (IsThisReturn) { // For 'this' returns, use the X0-preserving mask if applicable - Mask = ARI->getThisReturnPreservedMask(CallConv); + Mask = TRI->getThisReturnPreservedMask(MF, CallConv); if (!Mask) { IsThisReturn = false; - Mask = ARI->getCallPreservedMask(CallConv); + Mask = TRI->getCallPreservedMask(MF, CallConv); } } else - Mask = ARI->getCallPreservedMask(CallConv); + Mask = TRI->getCallPreservedMask(MF, CallConv); assert(Mask && "Missing call preserved mask for calling convention"); Ops.push_back(DAG.getRegisterMask(Mask)); @@ -2830,8 +2881,10 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI, // If we're doing a tall call, use a TC_RETURN here rather than an // actual call instruction. - if (IsTailCall) + if (IsTailCall) { + MF.getFrameInfo()->setHasTailCall(); return DAG.getNode(AArch64ISD::TC_RETURN, DL, NodeTys, Ops); + } // Returns a chain and a flag for retval copy to use. Chain = DAG.getNode(AArch64ISD::CALL, DL, NodeTys, Ops); @@ -2841,8 +2894,8 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI, ? RoundUpToAlignment(NumBytes, 16) : 0; - Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true), - DAG.getIntPtrConstant(CalleePopBytes, true), + Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, DL, true), + DAG.getIntPtrConstant(CalleePopBytes, DL, true), InFlag, DL); if (!Ins.empty()) InFlag = Chain.getValue(1); @@ -2958,7 +3011,7 @@ SDValue AArch64TargetLowering::LowerGlobalAddress(SDValue Op, /*isInvariant=*/ true, 8); if (GN->getOffset() != 0) return DAG.getNode(ISD::ADD, DL, PtrVT, GlobalAddr, - DAG.getConstant(GN->getOffset(), PtrVT)); + DAG.getConstant(GN->getOffset(), DL, PtrVT)); return GlobalAddr; } @@ -3038,11 +3091,8 @@ AArch64TargetLowering::LowerDarwinGlobalTLSAddress(SDValue Op, // TLS calls preserve all registers except those that absolutely must be // trashed: X0 (it takes an argument), LR (it's a call) and NZCV (let's not be // silly). - const TargetRegisterInfo *TRI = - getTargetMachine().getSubtargetImpl()->getRegisterInfo(); - const AArch64RegisterInfo *ARI = - static_cast<const AArch64RegisterInfo *>(TRI); - const uint32_t *Mask = ARI->getTLSCallPreservedMask(); + const uint32_t *Mask = + Subtarget->getRegisterInfo()->getTLSCallPreservedMask(); // Finally, we can make the call. This is just a degenerate version of a // normal AArch64 call node: x0 takes the address of the descriptor, and @@ -3125,11 +3175,13 @@ AArch64TargetLowering::LowerELFGlobalTLSAddress(SDValue Op, SDValue TPWithOff_lo = SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, ThreadBase, - HiVar, DAG.getTargetConstant(0, MVT::i32)), + HiVar, + DAG.getTargetConstant(0, DL, MVT::i32)), 0); SDValue TPWithOff = SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, TPWithOff_lo, - LoVar, DAG.getTargetConstant(0, MVT::i32)), + LoVar, + DAG.getTargetConstant(0, DL, MVT::i32)), 0); return TPWithOff; } else if (Model == TLSModel::InitialExec) { @@ -3165,10 +3217,10 @@ AArch64TargetLowering::LowerELFGlobalTLSAddress(SDValue Op, AArch64II::MO_TLS | AArch64II::MO_PAGEOFF | AArch64II::MO_NC); TPOff = SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, TPOff, HiVar, - DAG.getTargetConstant(0, MVT::i32)), + DAG.getTargetConstant(0, DL, MVT::i32)), 0); TPOff = SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, TPOff, LoVar, - DAG.getTargetConstant(0, MVT::i32)), + DAG.getTargetConstant(0, DL, MVT::i32)), 0); } else if (Model == TLSModel::GeneralDynamic) { // The call needs a relocation too for linker relaxation. It doesn't make @@ -3211,7 +3263,7 @@ SDValue AArch64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const { // If softenSetCCOperands returned a scalar, we need to compare the result // against zero to select between true and false values. if (!RHS.getNode()) { - RHS = DAG.getConstant(0, LHS.getValueType()); + RHS = DAG.getConstant(0, dl, LHS.getValueType()); CC = ISD::SETNE; } } @@ -3236,10 +3288,10 @@ SDValue AArch64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const { if (CC == ISD::SETNE) OFCC = getInvertedCondCode(OFCC); - SDValue CCVal = DAG.getConstant(OFCC, MVT::i32); + SDValue CCVal = DAG.getConstant(OFCC, dl, MVT::i32); - return DAG.getNode(AArch64ISD::BRCOND, SDLoc(LHS), MVT::Other, Chain, Dest, - CCVal, Overflow); + return DAG.getNode(AArch64ISD::BRCOND, dl, MVT::Other, Chain, Dest, CCVal, + Overflow); } if (LHS.getValueType().isInteger()) { @@ -3261,7 +3313,8 @@ SDValue AArch64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const { SDValue Test = LHS.getOperand(0); uint64_t Mask = LHS.getConstantOperandVal(1); return DAG.getNode(AArch64ISD::TBZ, dl, MVT::Other, Chain, Test, - DAG.getConstant(Log2_64(Mask), MVT::i64), Dest); + DAG.getConstant(Log2_64(Mask), dl, MVT::i64), + Dest); } return DAG.getNode(AArch64ISD::CBZ, dl, MVT::Other, Chain, LHS, Dest); @@ -3276,7 +3329,8 @@ SDValue AArch64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const { SDValue Test = LHS.getOperand(0); uint64_t Mask = LHS.getConstantOperandVal(1); return DAG.getNode(AArch64ISD::TBNZ, dl, MVT::Other, Chain, Test, - DAG.getConstant(Log2_64(Mask), MVT::i64), Dest); + DAG.getConstant(Log2_64(Mask), dl, MVT::i64), + Dest); } return DAG.getNode(AArch64ISD::CBNZ, dl, MVT::Other, Chain, LHS, Dest); @@ -3286,7 +3340,7 @@ SDValue AArch64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const { // becomes redundant. This would also increase register pressure. uint64_t Mask = LHS.getValueType().getSizeInBits() - 1; return DAG.getNode(AArch64ISD::TBNZ, dl, MVT::Other, Chain, LHS, - DAG.getConstant(Mask, MVT::i64), Dest); + DAG.getConstant(Mask, dl, MVT::i64), Dest); } } if (RHSC && RHSC->getSExtValue() == -1 && CC == ISD::SETGT && @@ -3296,7 +3350,7 @@ SDValue AArch64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const { // becomes redundant. This would also increase register pressure. uint64_t Mask = LHS.getValueType().getSizeInBits() - 1; return DAG.getNode(AArch64ISD::TBZ, dl, MVT::Other, Chain, LHS, - DAG.getConstant(Mask, MVT::i64), Dest); + DAG.getConstant(Mask, dl, MVT::i64), Dest); } SDValue CCVal; @@ -3312,11 +3366,11 @@ SDValue AArch64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const { SDValue Cmp = emitComparison(LHS, RHS, CC, dl, DAG); AArch64CC::CondCode CC1, CC2; changeFPCCToAArch64CC(CC, CC1, CC2); - SDValue CC1Val = DAG.getConstant(CC1, MVT::i32); + SDValue CC1Val = DAG.getConstant(CC1, dl, MVT::i32); SDValue BR1 = DAG.getNode(AArch64ISD::BRCOND, dl, MVT::Other, Chain, Dest, CC1Val, Cmp); if (CC2 != AArch64CC::AL) { - SDValue CC2Val = DAG.getConstant(CC2, MVT::i32); + SDValue CC2Val = DAG.getConstant(CC2, dl, MVT::i32); return DAG.getNode(AArch64ISD::BRCOND, dl, MVT::Other, BR1, Dest, CC2Val, Cmp); } @@ -3336,7 +3390,8 @@ SDValue AArch64TargetLowering::LowerFCOPYSIGN(SDValue Op, if (SrcVT == MVT::f32 && VT == MVT::f64) In2 = DAG.getNode(ISD::FP_EXTEND, DL, VT, In2); else if (SrcVT == MVT::f64 && VT == MVT::f32) - In2 = DAG.getNode(ISD::FP_ROUND, DL, VT, In2, DAG.getIntPtrConstant(0)); + In2 = DAG.getNode(ISD::FP_ROUND, DL, VT, In2, + DAG.getIntPtrConstant(0, DL)); else // FIXME: Src type is different, bail out for now. Can VT really be a // vector type? @@ -3345,11 +3400,12 @@ SDValue AArch64TargetLowering::LowerFCOPYSIGN(SDValue Op, EVT VecVT; EVT EltVT; - SDValue EltMask, VecVal1, VecVal2; + uint64_t EltMask; + SDValue VecVal1, VecVal2; if (VT == MVT::f32 || VT == MVT::v2f32 || VT == MVT::v4f32) { EltVT = MVT::i32; VecVT = MVT::v4i32; - EltMask = DAG.getConstant(0x80000000ULL, EltVT); + EltMask = 0x80000000ULL; if (!VT.isVector()) { VecVal1 = DAG.getTargetInsertSubreg(AArch64::ssub, DL, VecVT, @@ -3367,7 +3423,7 @@ SDValue AArch64TargetLowering::LowerFCOPYSIGN(SDValue Op, // We want to materialize a mask with the the high bit set, but the AdvSIMD // immediate moves cannot materialize that in a single instruction for // 64-bit elements. Instead, materialize zero and then negate it. - EltMask = DAG.getConstant(0, EltVT); + EltMask = 0; if (!VT.isVector()) { VecVal1 = DAG.getTargetInsertSubreg(AArch64::dsub, DL, VecVT, @@ -3382,11 +3438,7 @@ SDValue AArch64TargetLowering::LowerFCOPYSIGN(SDValue Op, llvm_unreachable("Invalid type for copysign!"); } - std::vector<SDValue> BuildVectorOps; - for (unsigned i = 0; i < VecVT.getVectorNumElements(); ++i) - BuildVectorOps.push_back(EltMask); - - SDValue BuildVec = DAG.getNode(ISD::BUILD_VECTOR, DL, VecVT, BuildVectorOps); + SDValue BuildVec = DAG.getConstant(EltMask, DL, VecVT); // If we couldn't materialize the mask above, then the mask vector will be // the zero vector, and we need to negate it here. @@ -3408,8 +3460,8 @@ SDValue AArch64TargetLowering::LowerFCOPYSIGN(SDValue Op, } SDValue AArch64TargetLowering::LowerCTPOP(SDValue Op, SelectionDAG &DAG) const { - if (DAG.getMachineFunction().getFunction()->getAttributes().hasAttribute( - AttributeSet::FunctionIndex, Attribute::NoImplicitFloat)) + if (DAG.getMachineFunction().getFunction()->hasFnAttribute( + Attribute::NoImplicitFloat)) return SDValue(); if (!Subtarget->hasNEON()) @@ -3426,21 +3478,15 @@ SDValue AArch64TargetLowering::LowerCTPOP(SDValue Op, SelectionDAG &DAG) const { SDValue Val = Op.getOperand(0); SDLoc DL(Op); EVT VT = Op.getValueType(); - SDValue ZeroVec = DAG.getUNDEF(MVT::v8i8); - SDValue VecVal; - if (VT == MVT::i32) { - VecVal = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Val); - VecVal = DAG.getTargetInsertSubreg(AArch64::ssub, DL, MVT::v8i8, ZeroVec, - VecVal); - } else { - VecVal = DAG.getNode(ISD::BITCAST, DL, MVT::v8i8, Val); - } + if (VT == MVT::i32) + Val = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Val); + Val = DAG.getNode(ISD::BITCAST, DL, MVT::v8i8, Val); - SDValue CtPop = DAG.getNode(ISD::CTPOP, DL, MVT::v8i8, VecVal); + SDValue CtPop = DAG.getNode(ISD::CTPOP, DL, MVT::v8i8, Val); SDValue UaddLV = DAG.getNode( ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32, - DAG.getConstant(Intrinsic::aarch64_neon_uaddlv, MVT::i32), CtPop); + DAG.getConstant(Intrinsic::aarch64_neon_uaddlv, DL, MVT::i32), CtPop); if (VT == MVT::i64) UaddLV = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, UaddLV); @@ -3459,8 +3505,8 @@ SDValue AArch64TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { // We chose ZeroOrOneBooleanContents, so use zero and one. EVT VT = Op.getValueType(); - SDValue TVal = DAG.getConstant(1, VT); - SDValue FVal = DAG.getConstant(0, VT); + SDValue TVal = DAG.getConstant(1, dl, VT); + SDValue FVal = DAG.getConstant(0, dl, VT); // Handle f128 first, since one possible outcome is a normal integer // comparison which gets picked up by the next if statement. @@ -3497,7 +3543,7 @@ SDValue AArch64TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { changeFPCCToAArch64CC(CC, CC1, CC2); if (CC2 == AArch64CC::AL) { changeFPCCToAArch64CC(ISD::getSetCCInverse(CC, false), CC1, CC2); - SDValue CC1Val = DAG.getConstant(CC1, MVT::i32); + SDValue CC1Val = DAG.getConstant(CC1, dl, MVT::i32); // Note that we inverted the condition above, so we reverse the order of // the true and false operands here. This will allow the setcc to be @@ -3510,11 +3556,11 @@ SDValue AArch64TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { // of the first as the RHS. We're effectively OR'ing the two CC's together. // FIXME: It would be nice if we could match the two CSELs to two CSINCs. - SDValue CC1Val = DAG.getConstant(CC1, MVT::i32); + SDValue CC1Val = DAG.getConstant(CC1, dl, MVT::i32); SDValue CS1 = DAG.getNode(AArch64ISD::CSEL, dl, VT, TVal, FVal, CC1Val, Cmp); - SDValue CC2Val = DAG.getConstant(CC2, MVT::i32); + SDValue CC2Val = DAG.getConstant(CC2, dl, MVT::i32); return DAG.getNode(AArch64ISD::CSEL, dl, VT, TVal, CS1, CC2Val, Cmp); } } @@ -3529,7 +3575,8 @@ SDValue AArch64TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { /// operations would *not* be semantically equivalent. static bool selectCCOpsAreFMaxCompatible(SDValue Cmp, SDValue Result) { if (Cmp == Result) - return true; + return (Cmp.getValueType() == MVT::f32 || + Cmp.getValueType() == MVT::f64); ConstantFPSDNode *CCmp = dyn_cast<ConstantFPSDNode>(Cmp); ConstantFPSDNode *CResult = dyn_cast<ConstantFPSDNode>(Result); @@ -3544,49 +3591,10 @@ static bool selectCCOpsAreFMaxCompatible(SDValue Cmp, SDValue Result) { return Result->getOpcode() == ISD::FP_EXTEND && Result->getOperand(0) == Cmp; } -SDValue AArch64TargetLowering::LowerSELECT(SDValue Op, - SelectionDAG &DAG) const { - SDValue CC = Op->getOperand(0); - SDValue TVal = Op->getOperand(1); - SDValue FVal = Op->getOperand(2); - SDLoc DL(Op); - - unsigned Opc = CC.getOpcode(); - // Optimize {s|u}{add|sub|mul}.with.overflow feeding into a select - // instruction. - if (CC.getResNo() == 1 && - (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO || - Opc == ISD::USUBO || Opc == ISD::SMULO || Opc == ISD::UMULO)) { - // Only lower legal XALUO ops. - if (!DAG.getTargetLoweringInfo().isTypeLegal(CC->getValueType(0))) - return SDValue(); - - AArch64CC::CondCode OFCC; - SDValue Value, Overflow; - std::tie(Value, Overflow) = getAArch64XALUOOp(OFCC, CC.getValue(0), DAG); - SDValue CCVal = DAG.getConstant(OFCC, MVT::i32); - - return DAG.getNode(AArch64ISD::CSEL, DL, Op.getValueType(), TVal, FVal, - CCVal, Overflow); - } - - if (CC.getOpcode() == ISD::SETCC) - return DAG.getSelectCC(DL, CC.getOperand(0), CC.getOperand(1), TVal, FVal, - cast<CondCodeSDNode>(CC.getOperand(2))->get()); - else - return DAG.getSelectCC(DL, CC, DAG.getConstant(0, CC.getValueType()), TVal, - FVal, ISD::SETNE); -} - -SDValue AArch64TargetLowering::LowerSELECT_CC(SDValue Op, +SDValue AArch64TargetLowering::LowerSELECT_CC(ISD::CondCode CC, SDValue LHS, + SDValue RHS, SDValue TVal, + SDValue FVal, SDLoc dl, SelectionDAG &DAG) const { - ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get(); - SDValue LHS = Op.getOperand(0); - SDValue RHS = Op.getOperand(1); - SDValue TVal = Op.getOperand(2); - SDValue FVal = Op.getOperand(3); - SDLoc dl(Op); - // Handle f128 first, because it will result in a comparison of some RTLIB // call result against zero. if (LHS.getValueType() == MVT::f128) { @@ -3595,7 +3603,7 @@ SDValue AArch64TargetLowering::LowerSELECT_CC(SDValue Op, // If softenSetCCOperands returned a scalar, we need to compare the result // against zero to select between true and false values. if (!RHS.getNode()) { - RHS = DAG.getConstant(0, LHS.getValueType()); + RHS = DAG.getConstant(0, dl, LHS.getValueType()); CC = ISD::SETNE; } } @@ -3694,67 +3702,27 @@ SDValue AArch64TargetLowering::LowerSELECT_CC(SDValue Op, SDValue CCVal; SDValue Cmp = getAArch64Cmp(LHS, RHS, CC, CCVal, DAG, dl); - EVT VT = Op.getValueType(); + EVT VT = TVal.getValueType(); return DAG.getNode(Opcode, dl, VT, TVal, FVal, CCVal, Cmp); } // Now we know we're dealing with FP values. assert(LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64); assert(LHS.getValueType() == RHS.getValueType()); - EVT VT = Op.getValueType(); - - // Try to match this select into a max/min operation, which have dedicated - // opcode in the instruction set. - // FIXME: This is not correct in the presence of NaNs, so we only enable this - // in no-NaNs mode. - if (getTargetMachine().Options.NoNaNsFPMath) { - SDValue MinMaxLHS = TVal, MinMaxRHS = FVal; - if (selectCCOpsAreFMaxCompatible(LHS, MinMaxRHS) && - selectCCOpsAreFMaxCompatible(RHS, MinMaxLHS)) { - CC = ISD::getSetCCSwappedOperands(CC); - std::swap(MinMaxLHS, MinMaxRHS); - } - - if (selectCCOpsAreFMaxCompatible(LHS, MinMaxLHS) && - selectCCOpsAreFMaxCompatible(RHS, MinMaxRHS)) { - switch (CC) { - default: - break; - case ISD::SETGT: - case ISD::SETGE: - case ISD::SETUGT: - case ISD::SETUGE: - case ISD::SETOGT: - case ISD::SETOGE: - return DAG.getNode(AArch64ISD::FMAX, dl, VT, MinMaxLHS, MinMaxRHS); - break; - case ISD::SETLT: - case ISD::SETLE: - case ISD::SETULT: - case ISD::SETULE: - case ISD::SETOLT: - case ISD::SETOLE: - return DAG.getNode(AArch64ISD::FMIN, dl, VT, MinMaxLHS, MinMaxRHS); - break; - } - } - } - - // If that fails, we'll need to perform an FCMP + CSEL sequence. Go ahead - // and do the comparison. + EVT VT = TVal.getValueType(); SDValue Cmp = emitComparison(LHS, RHS, CC, dl, DAG); // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't totally // clean. Some of them require two CSELs to implement. AArch64CC::CondCode CC1, CC2; changeFPCCToAArch64CC(CC, CC1, CC2); - SDValue CC1Val = DAG.getConstant(CC1, MVT::i32); + SDValue CC1Val = DAG.getConstant(CC1, dl, MVT::i32); SDValue CS1 = DAG.getNode(AArch64ISD::CSEL, dl, VT, TVal, FVal, CC1Val, Cmp); // If we need a second CSEL, emit it, using the output of the first as the // RHS. We're effectively OR'ing the two CC's together. if (CC2 != AArch64CC::AL) { - SDValue CC2Val = DAG.getConstant(CC2, MVT::i32); + SDValue CC2Val = DAG.getConstant(CC2, dl, MVT::i32); return DAG.getNode(AArch64ISD::CSEL, dl, VT, TVal, CS1, CC2Val, Cmp); } @@ -3762,6 +3730,58 @@ SDValue AArch64TargetLowering::LowerSELECT_CC(SDValue Op, return CS1; } +SDValue AArch64TargetLowering::LowerSELECT_CC(SDValue Op, + SelectionDAG &DAG) const { + ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get(); + SDValue LHS = Op.getOperand(0); + SDValue RHS = Op.getOperand(1); + SDValue TVal = Op.getOperand(2); + SDValue FVal = Op.getOperand(3); + SDLoc DL(Op); + return LowerSELECT_CC(CC, LHS, RHS, TVal, FVal, DL, DAG); +} + +SDValue AArch64TargetLowering::LowerSELECT(SDValue Op, + SelectionDAG &DAG) const { + SDValue CCVal = Op->getOperand(0); + SDValue TVal = Op->getOperand(1); + SDValue FVal = Op->getOperand(2); + SDLoc DL(Op); + + unsigned Opc = CCVal.getOpcode(); + // Optimize {s|u}{add|sub|mul}.with.overflow feeding into a select + // instruction. + if (CCVal.getResNo() == 1 && + (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO || + Opc == ISD::USUBO || Opc == ISD::SMULO || Opc == ISD::UMULO)) { + // Only lower legal XALUO ops. + if (!DAG.getTargetLoweringInfo().isTypeLegal(CCVal->getValueType(0))) + return SDValue(); + + AArch64CC::CondCode OFCC; + SDValue Value, Overflow; + std::tie(Value, Overflow) = getAArch64XALUOOp(OFCC, CCVal.getValue(0), DAG); + SDValue CCVal = DAG.getConstant(OFCC, DL, MVT::i32); + + return DAG.getNode(AArch64ISD::CSEL, DL, Op.getValueType(), TVal, FVal, + CCVal, Overflow); + } + + // Lower it the same way as we would lower a SELECT_CC node. + ISD::CondCode CC; + SDValue LHS, RHS; + if (CCVal.getOpcode() == ISD::SETCC) { + LHS = CCVal.getOperand(0); + RHS = CCVal.getOperand(1); + CC = cast<CondCodeSDNode>(CCVal->getOperand(2))->get(); + } else { + LHS = CCVal; + RHS = DAG.getConstant(0, DL, CCVal.getValueType()); + CC = ISD::SETNE; + } + return LowerSELECT_CC(CC, LHS, RHS, TVal, FVal, DL, DAG); +} + SDValue AArch64TargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const { // Jump table entries as PC relative offsets. No additional tweaking @@ -3892,11 +3912,11 @@ SDValue AArch64TargetLowering::LowerAAPCS_VASTART(SDValue Op, SDValue GRTop, GRTopAddr; GRTopAddr = DAG.getNode(ISD::ADD, DL, getPointerTy(), VAList, - DAG.getConstant(8, getPointerTy())); + DAG.getConstant(8, DL, getPointerTy())); GRTop = DAG.getFrameIndex(FuncInfo->getVarArgsGPRIndex(), getPointerTy()); GRTop = DAG.getNode(ISD::ADD, DL, getPointerTy(), GRTop, - DAG.getConstant(GPRSize, getPointerTy())); + DAG.getConstant(GPRSize, DL, getPointerTy())); MemOps.push_back(DAG.getStore(Chain, DL, GRTop, GRTopAddr, MachinePointerInfo(SV, 8), false, false, 8)); @@ -3907,11 +3927,11 @@ SDValue AArch64TargetLowering::LowerAAPCS_VASTART(SDValue Op, if (FPRSize > 0) { SDValue VRTop, VRTopAddr; VRTopAddr = DAG.getNode(ISD::ADD, DL, getPointerTy(), VAList, - DAG.getConstant(16, getPointerTy())); + DAG.getConstant(16, DL, getPointerTy())); VRTop = DAG.getFrameIndex(FuncInfo->getVarArgsFPRIndex(), getPointerTy()); VRTop = DAG.getNode(ISD::ADD, DL, getPointerTy(), VRTop, - DAG.getConstant(FPRSize, getPointerTy())); + DAG.getConstant(FPRSize, DL, getPointerTy())); MemOps.push_back(DAG.getStore(Chain, DL, VRTop, VRTopAddr, MachinePointerInfo(SV, 16), false, false, 8)); @@ -3919,15 +3939,17 @@ SDValue AArch64TargetLowering::LowerAAPCS_VASTART(SDValue Op, // int __gr_offs at offset 24 SDValue GROffsAddr = DAG.getNode(ISD::ADD, DL, getPointerTy(), VAList, - DAG.getConstant(24, getPointerTy())); - MemOps.push_back(DAG.getStore(Chain, DL, DAG.getConstant(-GPRSize, MVT::i32), + DAG.getConstant(24, DL, getPointerTy())); + MemOps.push_back(DAG.getStore(Chain, DL, + DAG.getConstant(-GPRSize, DL, MVT::i32), GROffsAddr, MachinePointerInfo(SV, 24), false, false, 4)); // int __vr_offs at offset 28 SDValue VROffsAddr = DAG.getNode(ISD::ADD, DL, getPointerTy(), VAList, - DAG.getConstant(28, getPointerTy())); - MemOps.push_back(DAG.getStore(Chain, DL, DAG.getConstant(-FPRSize, MVT::i32), + DAG.getConstant(28, DL, getPointerTy())); + MemOps.push_back(DAG.getStore(Chain, DL, + DAG.getConstant(-FPRSize, DL, MVT::i32), VROffsAddr, MachinePointerInfo(SV, 28), false, false, 4)); @@ -3944,13 +3966,15 @@ SDValue AArch64TargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const { // AAPCS has three pointers and two ints (= 32 bytes), Darwin has single // pointer. + SDLoc DL(Op); unsigned VaListSize = Subtarget->isTargetDarwin() ? 8 : 32; const Value *DestSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue(); const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4))->getValue(); - return DAG.getMemcpy(Op.getOperand(0), SDLoc(Op), Op.getOperand(1), - Op.getOperand(2), DAG.getConstant(VaListSize, MVT::i32), - 8, false, false, MachinePointerInfo(DestSV), + return DAG.getMemcpy(Op.getOperand(0), DL, Op.getOperand(1), + Op.getOperand(2), + DAG.getConstant(VaListSize, DL, MVT::i32), + 8, false, false, false, MachinePointerInfo(DestSV), MachinePointerInfo(SrcSV)); } @@ -3972,9 +3996,9 @@ SDValue AArch64TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const { if (Align > 8) { assert(((Align & (Align - 1)) == 0) && "Expected Align to be a power of 2"); VAList = DAG.getNode(ISD::ADD, DL, getPointerTy(), VAList, - DAG.getConstant(Align - 1, getPointerTy())); + DAG.getConstant(Align - 1, DL, getPointerTy())); VAList = DAG.getNode(ISD::AND, DL, getPointerTy(), VAList, - DAG.getConstant(-(int64_t)Align, getPointerTy())); + DAG.getConstant(-(int64_t)Align, DL, getPointerTy())); } Type *ArgTy = VT.getTypeForEVT(*DAG.getContext()); @@ -3994,7 +4018,7 @@ SDValue AArch64TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const { // Increment the pointer, VAList, to the next vaarg SDValue VANext = DAG.getNode(ISD::ADD, DL, getPointerTy(), VAList, - DAG.getConstant(ArgSize, getPointerTy())); + DAG.getConstant(ArgSize, DL, getPointerTy())); // Store the incremented VAList to the legalized pointer SDValue APStore = DAG.getStore(Chain, DL, VANext, Addr, MachinePointerInfo(V), false, false, 0); @@ -4006,7 +4030,7 @@ SDValue AArch64TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const { MachinePointerInfo(), false, false, false, 0); // Round the value down to an f32. SDValue NarrowFP = DAG.getNode(ISD::FP_ROUND, DL, VT, WideFP.getValue(0), - DAG.getIntPtrConstant(1)); + DAG.getIntPtrConstant(1, DL)); SDValue Ops[] = { NarrowFP, WideFP.getValue(1) }; // Merge the rounded value with the chain output of the load. return DAG.getMergeValues(Ops, DL); @@ -4055,7 +4079,7 @@ SDValue AArch64TargetLowering::LowerRETURNADDR(SDValue Op, unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); if (Depth) { SDValue FrameAddr = LowerFRAMEADDR(Op, DAG); - SDValue Offset = DAG.getConstant(8, getPointerTy()); + SDValue Offset = DAG.getConstant(8, DL, getPointerTy()); return DAG.getLoad(VT, DL, DAG.getEntryNode(), DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset), MachinePointerInfo(), false, false, false, 0); @@ -4083,15 +4107,15 @@ SDValue AArch64TargetLowering::LowerShiftRightParts(SDValue Op, assert(Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS); SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i64, - DAG.getConstant(VTBits, MVT::i64), ShAmt); + DAG.getConstant(VTBits, dl, MVT::i64), ShAmt); SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, ShAmt); SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i64, ShAmt, - DAG.getConstant(VTBits, MVT::i64)); + DAG.getConstant(VTBits, dl, MVT::i64)); SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, RevShAmt); - SDValue Cmp = emitComparison(ExtraShAmt, DAG.getConstant(0, MVT::i64), + SDValue Cmp = emitComparison(ExtraShAmt, DAG.getConstant(0, dl, MVT::i64), ISD::SETGE, dl, DAG); - SDValue CCVal = DAG.getConstant(AArch64CC::GE, MVT::i32); + SDValue CCVal = DAG.getConstant(AArch64CC::GE, dl, MVT::i32); SDValue FalseValLo = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2); SDValue TrueValLo = DAG.getNode(Opc, dl, VT, ShOpHi, ExtraShAmt); @@ -4103,8 +4127,9 @@ SDValue AArch64TargetLowering::LowerShiftRightParts(SDValue Op, SDValue FalseValHi = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt); SDValue TrueValHi = Opc == ISD::SRA ? DAG.getNode(Opc, dl, VT, ShOpHi, - DAG.getConstant(VTBits - 1, MVT::i64)) - : DAG.getConstant(0, VT); + DAG.getConstant(VTBits - 1, dl, + MVT::i64)) + : DAG.getConstant(0, dl, VT); SDValue Hi = DAG.getNode(AArch64ISD::CSEL, dl, VT, TrueValHi, FalseValHi, CCVal, Cmp); @@ -4127,24 +4152,24 @@ SDValue AArch64TargetLowering::LowerShiftLeftParts(SDValue Op, assert(Op.getOpcode() == ISD::SHL_PARTS); SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i64, - DAG.getConstant(VTBits, MVT::i64), ShAmt); + DAG.getConstant(VTBits, dl, MVT::i64), ShAmt); SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, RevShAmt); SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i64, ShAmt, - DAG.getConstant(VTBits, MVT::i64)); + DAG.getConstant(VTBits, dl, MVT::i64)); SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, ShAmt); SDValue Tmp3 = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ExtraShAmt); SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2); - SDValue Cmp = emitComparison(ExtraShAmt, DAG.getConstant(0, MVT::i64), + SDValue Cmp = emitComparison(ExtraShAmt, DAG.getConstant(0, dl, MVT::i64), ISD::SETGE, dl, DAG); - SDValue CCVal = DAG.getConstant(AArch64CC::GE, MVT::i32); + SDValue CCVal = DAG.getConstant(AArch64CC::GE, dl, MVT::i32); SDValue Hi = DAG.getNode(AArch64ISD::CSEL, dl, VT, Tmp3, FalseVal, CCVal, Cmp); // AArch64 shifts of larger than register sizes are wrapped rather than // clamped, so we can't just emit "lo << a" if a is too big. - SDValue TrueValLo = DAG.getConstant(0, VT); + SDValue TrueValLo = DAG.getConstant(0, dl, VT); SDValue FalseValLo = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt); SDValue Lo = DAG.getNode(AArch64ISD::CSEL, dl, VT, TrueValLo, FalseValLo, CCVal, Cmp); @@ -4258,7 +4283,8 @@ AArch64TargetLowering::getSingleConstraintMatchWeight( std::pair<unsigned, const TargetRegisterClass *> AArch64TargetLowering::getRegForInlineAsmConstraint( - const std::string &Constraint, MVT VT) const { + const TargetRegisterInfo *TRI, const std::string &Constraint, + MVT VT) const { if (Constraint.size() == 1) { switch (Constraint[0]) { case 'r': @@ -4287,7 +4313,7 @@ AArch64TargetLowering::getRegForInlineAsmConstraint( // Use the default implementation in TargetLowering to convert the register // constraint into a member of a register class. std::pair<unsigned, const TargetRegisterClass *> Res; - Res = TargetLowering::getRegForInlineAsmConstraint(Constraint, VT); + Res = TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT); // Not found as a standard register? if (!Res.second) { @@ -4436,7 +4462,7 @@ void AArch64TargetLowering::LowerAsmOperandForConstraint( } // All assembler immediates are 64-bit integers. - Result = DAG.getTargetConstant(CVal, MVT::i64); + Result = DAG.getTargetConstant(CVal, SDLoc(Op), MVT::i64); break; } @@ -4462,7 +4488,7 @@ static SDValue WidenVector(SDValue V64Reg, SelectionDAG &DAG) { SDLoc DL(V64Reg); return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, WideTy, DAG.getUNDEF(WideTy), - V64Reg, DAG.getConstant(0, MVT::i32)); + V64Reg, DAG.getConstant(0, DL, MVT::i32)); } /// getExtFactor - Determine the adjustment factor for the position when @@ -4594,25 +4620,26 @@ SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op, // The extraction can just take the second half Src.ShuffleVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec, - DAG.getConstant(NumSrcElts, MVT::i64)); + DAG.getConstant(NumSrcElts, dl, MVT::i64)); Src.WindowBase = -NumSrcElts; } else if (Src.MaxElt < NumSrcElts) { // The extraction can just take the first half Src.ShuffleVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec, - DAG.getConstant(0, MVT::i64)); + DAG.getConstant(0, dl, MVT::i64)); } else { // An actual VEXT is needed SDValue VEXTSrc1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec, - DAG.getConstant(0, MVT::i64)); + DAG.getConstant(0, dl, MVT::i64)); SDValue VEXTSrc2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec, - DAG.getConstant(NumSrcElts, MVT::i64)); + DAG.getConstant(NumSrcElts, dl, MVT::i64)); unsigned Imm = Src.MinElt * getExtFactor(VEXTSrc1); Src.ShuffleVec = DAG.getNode(AArch64ISD::EXT, dl, DestVT, VEXTSrc1, - VEXTSrc2, DAG.getConstant(Imm, MVT::i32)); + VEXTSrc2, + DAG.getConstant(Imm, dl, MVT::i32)); Src.WindowBase = -Src.MinElt; } } @@ -4947,11 +4974,11 @@ static SDValue tryFormConcatFromShuffle(SDValue Op, SelectionDAG &DAG) { VT.getVectorNumElements() / 2); if (SplitV0) { V0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, CastVT, V0, - DAG.getConstant(0, MVT::i64)); + DAG.getConstant(0, DL, MVT::i64)); } if (V1.getValueType().getSizeInBits() == 128) { V1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, CastVT, V1, - DAG.getConstant(0, MVT::i64)); + DAG.getConstant(0, DL, MVT::i64)); } return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, V0, V1); } @@ -5018,7 +5045,7 @@ static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS, unsigned Opcode; if (EltTy == MVT::i8) Opcode = AArch64ISD::DUPLANE8; - else if (EltTy == MVT::i16) + else if (EltTy == MVT::i16 || EltTy == MVT::f16) Opcode = AArch64ISD::DUPLANE16; else if (EltTy == MVT::i32 || EltTy == MVT::f32) Opcode = AArch64ISD::DUPLANE32; @@ -5029,7 +5056,7 @@ static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS, if (VT.getSizeInBits() == 64) OpLHS = WidenVector(OpLHS, DAG); - SDValue Lane = DAG.getConstant(OpNum - OP_VDUP0, MVT::i64); + SDValue Lane = DAG.getConstant(OpNum - OP_VDUP0, dl, MVT::i64); return DAG.getNode(Opcode, dl, VT, OpLHS, Lane); } case OP_VEXT1: @@ -5037,7 +5064,7 @@ static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS, case OP_VEXT3: { unsigned Imm = (OpNum - OP_VEXT1 + 1) * getExtFactor(OpLHS); return DAG.getNode(AArch64ISD::EXT, dl, VT, OpLHS, OpRHS, - DAG.getConstant(Imm, MVT::i32)); + DAG.getConstant(Imm, dl, MVT::i32)); } case OP_VUZPL: return DAG.getNode(AArch64ISD::UZP1, dl, DAG.getVTList(VT, VT), OpLHS, @@ -5074,7 +5101,7 @@ static SDValue GenerateTBL(SDValue Op, ArrayRef<int> ShuffleMask, for (int Val : ShuffleMask) { for (unsigned Byte = 0; Byte < BytesPerElt; ++Byte) { unsigned Offset = Byte + Val * BytesPerElt; - TBLMask.push_back(DAG.getConstant(Offset, MVT::i32)); + TBLMask.push_back(DAG.getConstant(Offset, DL, MVT::i32)); } } @@ -5094,7 +5121,7 @@ static SDValue GenerateTBL(SDValue Op, ArrayRef<int> ShuffleMask, V1Cst = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v16i8, V1Cst, V1Cst); Shuffle = DAG.getNode( ISD::INTRINSIC_WO_CHAIN, DL, IndexVT, - DAG.getConstant(Intrinsic::aarch64_neon_tbl1, MVT::i32), V1Cst, + DAG.getConstant(Intrinsic::aarch64_neon_tbl1, DL, MVT::i32), V1Cst, DAG.getNode(ISD::BUILD_VECTOR, DL, IndexVT, makeArrayRef(TBLMask.data(), IndexLen))); } else { @@ -5102,7 +5129,7 @@ static SDValue GenerateTBL(SDValue Op, ArrayRef<int> ShuffleMask, V1Cst = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v16i8, V1Cst, V2Cst); Shuffle = DAG.getNode( ISD::INTRINSIC_WO_CHAIN, DL, IndexVT, - DAG.getConstant(Intrinsic::aarch64_neon_tbl1, MVT::i32), V1Cst, + DAG.getConstant(Intrinsic::aarch64_neon_tbl1, DL, MVT::i32), V1Cst, DAG.getNode(ISD::BUILD_VECTOR, DL, IndexVT, makeArrayRef(TBLMask.data(), IndexLen))); } else { @@ -5114,7 +5141,8 @@ static SDValue GenerateTBL(SDValue Op, ArrayRef<int> ShuffleMask, // &TBLMask[0], IndexLen)); Shuffle = DAG.getNode( ISD::INTRINSIC_WO_CHAIN, DL, IndexVT, - DAG.getConstant(Intrinsic::aarch64_neon_tbl2, MVT::i32), V1Cst, V2Cst, + DAG.getConstant(Intrinsic::aarch64_neon_tbl2, DL, MVT::i32), + V1Cst, V2Cst, DAG.getNode(ISD::BUILD_VECTOR, DL, IndexVT, makeArrayRef(TBLMask.data(), IndexLen))); } @@ -5183,7 +5211,7 @@ SDValue AArch64TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, } else if (VT.getSizeInBits() == 64) V1 = WidenVector(V1, DAG); - return DAG.getNode(Opcode, dl, VT, V1, DAG.getConstant(Lane, MVT::i64)); + return DAG.getNode(Opcode, dl, VT, V1, DAG.getConstant(Lane, dl, MVT::i64)); } if (isREVMask(ShuffleMask, VT, 64)) @@ -5200,12 +5228,12 @@ SDValue AArch64TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, std::swap(V1, V2); Imm *= getExtFactor(V1); return DAG.getNode(AArch64ISD::EXT, dl, V1.getValueType(), V1, V2, - DAG.getConstant(Imm, MVT::i32)); + DAG.getConstant(Imm, dl, MVT::i32)); } else if (V2->getOpcode() == ISD::UNDEF && isSingletonEXTMask(ShuffleMask, VT, Imm)) { Imm *= getExtFactor(V1); return DAG.getNode(AArch64ISD::EXT, dl, V1.getValueType(), V1, V1, - DAG.getConstant(Imm, MVT::i32)); + DAG.getConstant(Imm, dl, MVT::i32)); } unsigned WhichResult; @@ -5244,7 +5272,7 @@ SDValue AArch64TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, int NumInputElements = V1.getValueType().getVectorNumElements(); if (isINSMask(ShuffleMask, NumInputElements, DstIsLeft, Anomaly)) { SDValue DstVec = DstIsLeft ? V1 : V2; - SDValue DstLaneV = DAG.getConstant(Anomaly, MVT::i64); + SDValue DstLaneV = DAG.getConstant(Anomaly, dl, MVT::i64); SDValue SrcVec = V1; int SrcLane = ShuffleMask[Anomaly]; @@ -5252,7 +5280,7 @@ SDValue AArch64TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SrcVec = V2; SrcLane -= VT.getVectorNumElements(); } - SDValue SrcLaneV = DAG.getConstant(SrcLane, MVT::i64); + SDValue SrcLaneV = DAG.getConstant(SrcLane, dl, MVT::i64); EVT ScalarVT = VT.getVectorElementType(); @@ -5342,8 +5370,8 @@ SDValue AArch64TargetLowering::LowerVectorAND(SDValue Op, CnstVal = AArch64_AM::encodeAdvSIMDModImmType1(CnstVal); MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32; SDValue Mov = DAG.getNode(AArch64ISD::BICi, dl, MovTy, LHS, - DAG.getConstant(CnstVal, MVT::i32), - DAG.getConstant(0, MVT::i32)); + DAG.getConstant(CnstVal, dl, MVT::i32), + DAG.getConstant(0, dl, MVT::i32)); return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov); } @@ -5351,8 +5379,8 @@ SDValue AArch64TargetLowering::LowerVectorAND(SDValue Op, CnstVal = AArch64_AM::encodeAdvSIMDModImmType2(CnstVal); MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32; SDValue Mov = DAG.getNode(AArch64ISD::BICi, dl, MovTy, LHS, - DAG.getConstant(CnstVal, MVT::i32), - DAG.getConstant(8, MVT::i32)); + DAG.getConstant(CnstVal, dl, MVT::i32), + DAG.getConstant(8, dl, MVT::i32)); return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov); } @@ -5360,8 +5388,8 @@ SDValue AArch64TargetLowering::LowerVectorAND(SDValue Op, CnstVal = AArch64_AM::encodeAdvSIMDModImmType3(CnstVal); MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32; SDValue Mov = DAG.getNode(AArch64ISD::BICi, dl, MovTy, LHS, - DAG.getConstant(CnstVal, MVT::i32), - DAG.getConstant(16, MVT::i32)); + DAG.getConstant(CnstVal, dl, MVT::i32), + DAG.getConstant(16, dl, MVT::i32)); return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov); } @@ -5369,8 +5397,8 @@ SDValue AArch64TargetLowering::LowerVectorAND(SDValue Op, CnstVal = AArch64_AM::encodeAdvSIMDModImmType4(CnstVal); MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32; SDValue Mov = DAG.getNode(AArch64ISD::BICi, dl, MovTy, LHS, - DAG.getConstant(CnstVal, MVT::i32), - DAG.getConstant(24, MVT::i32)); + DAG.getConstant(CnstVal, dl, MVT::i32), + DAG.getConstant(24, dl, MVT::i32)); return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov); } @@ -5378,8 +5406,8 @@ SDValue AArch64TargetLowering::LowerVectorAND(SDValue Op, CnstVal = AArch64_AM::encodeAdvSIMDModImmType5(CnstVal); MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v8i16 : MVT::v4i16; SDValue Mov = DAG.getNode(AArch64ISD::BICi, dl, MovTy, LHS, - DAG.getConstant(CnstVal, MVT::i32), - DAG.getConstant(0, MVT::i32)); + DAG.getConstant(CnstVal, dl, MVT::i32), + DAG.getConstant(0, dl, MVT::i32)); return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov); } @@ -5387,8 +5415,8 @@ SDValue AArch64TargetLowering::LowerVectorAND(SDValue Op, CnstVal = AArch64_AM::encodeAdvSIMDModImmType6(CnstVal); MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v8i16 : MVT::v4i16; SDValue Mov = DAG.getNode(AArch64ISD::BICi, dl, MovTy, LHS, - DAG.getConstant(CnstVal, MVT::i32), - DAG.getConstant(8, MVT::i32)); + DAG.getConstant(CnstVal, dl, MVT::i32), + DAG.getConstant(8, dl, MVT::i32)); return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov); } } @@ -5492,7 +5520,8 @@ static SDValue tryLowerToSLI(SDNode *N, SelectionDAG &DAG) { IsShiftRight ? Intrinsic::aarch64_neon_vsri : Intrinsic::aarch64_neon_vsli; SDValue ResultSLI = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT, - DAG.getConstant(Intrin, MVT::i32), X, Y, Shift.getOperand(1)); + DAG.getConstant(Intrin, DL, MVT::i32), X, Y, + Shift.getOperand(1)); DEBUG(dbgs() << "aarch64-lower: transformed: \n"); DEBUG(N->dump(&DAG)); @@ -5542,8 +5571,8 @@ SDValue AArch64TargetLowering::LowerVectorOR(SDValue Op, CnstVal = AArch64_AM::encodeAdvSIMDModImmType1(CnstVal); MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32; SDValue Mov = DAG.getNode(AArch64ISD::ORRi, dl, MovTy, LHS, - DAG.getConstant(CnstVal, MVT::i32), - DAG.getConstant(0, MVT::i32)); + DAG.getConstant(CnstVal, dl, MVT::i32), + DAG.getConstant(0, dl, MVT::i32)); return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov); } @@ -5551,8 +5580,8 @@ SDValue AArch64TargetLowering::LowerVectorOR(SDValue Op, CnstVal = AArch64_AM::encodeAdvSIMDModImmType2(CnstVal); MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32; SDValue Mov = DAG.getNode(AArch64ISD::ORRi, dl, MovTy, LHS, - DAG.getConstant(CnstVal, MVT::i32), - DAG.getConstant(8, MVT::i32)); + DAG.getConstant(CnstVal, dl, MVT::i32), + DAG.getConstant(8, dl, MVT::i32)); return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov); } @@ -5560,8 +5589,8 @@ SDValue AArch64TargetLowering::LowerVectorOR(SDValue Op, CnstVal = AArch64_AM::encodeAdvSIMDModImmType3(CnstVal); MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32; SDValue Mov = DAG.getNode(AArch64ISD::ORRi, dl, MovTy, LHS, - DAG.getConstant(CnstVal, MVT::i32), - DAG.getConstant(16, MVT::i32)); + DAG.getConstant(CnstVal, dl, MVT::i32), + DAG.getConstant(16, dl, MVT::i32)); return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov); } @@ -5569,8 +5598,8 @@ SDValue AArch64TargetLowering::LowerVectorOR(SDValue Op, CnstVal = AArch64_AM::encodeAdvSIMDModImmType4(CnstVal); MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32; SDValue Mov = DAG.getNode(AArch64ISD::ORRi, dl, MovTy, LHS, - DAG.getConstant(CnstVal, MVT::i32), - DAG.getConstant(24, MVT::i32)); + DAG.getConstant(CnstVal, dl, MVT::i32), + DAG.getConstant(24, dl, MVT::i32)); return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov); } @@ -5578,8 +5607,8 @@ SDValue AArch64TargetLowering::LowerVectorOR(SDValue Op, CnstVal = AArch64_AM::encodeAdvSIMDModImmType5(CnstVal); MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v8i16 : MVT::v4i16; SDValue Mov = DAG.getNode(AArch64ISD::ORRi, dl, MovTy, LHS, - DAG.getConstant(CnstVal, MVT::i32), - DAG.getConstant(0, MVT::i32)); + DAG.getConstant(CnstVal, dl, MVT::i32), + DAG.getConstant(0, dl, MVT::i32)); return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov); } @@ -5587,8 +5616,8 @@ SDValue AArch64TargetLowering::LowerVectorOR(SDValue Op, CnstVal = AArch64_AM::encodeAdvSIMDModImmType6(CnstVal); MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v8i16 : MVT::v4i16; SDValue Mov = DAG.getNode(AArch64ISD::ORRi, dl, MovTy, LHS, - DAG.getConstant(CnstVal, MVT::i32), - DAG.getConstant(8, MVT::i32)); + DAG.getConstant(CnstVal, dl, MVT::i32), + DAG.getConstant(8, dl, MVT::i32)); return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov); } } @@ -5623,7 +5652,7 @@ static SDValue NormalizeBuildVector(SDValue Op, if (Lane.getOpcode() == ISD::Constant) { APInt LowBits(EltTy.getSizeInBits(), cast<ConstantSDNode>(Lane)->getZExtValue()); - Lane = DAG.getConstant(LowBits.getZExtValue(), MVT::i32); + Lane = DAG.getConstant(LowBits.getZExtValue(), dl, MVT::i32); } Ops.push_back(Lane); } @@ -5661,13 +5690,13 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op, CnstVal = AArch64_AM::encodeAdvSIMDModImmType10(CnstVal); if (VT.getSizeInBits() == 128) { SDValue Mov = DAG.getNode(AArch64ISD::MOVIedit, dl, MVT::v2i64, - DAG.getConstant(CnstVal, MVT::i32)); + DAG.getConstant(CnstVal, dl, MVT::i32)); return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov); } // Support the V64 version via subregister insertion. SDValue Mov = DAG.getNode(AArch64ISD::MOVIedit, dl, MVT::f64, - DAG.getConstant(CnstVal, MVT::i32)); + DAG.getConstant(CnstVal, dl, MVT::i32)); return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov); } @@ -5675,8 +5704,8 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op, CnstVal = AArch64_AM::encodeAdvSIMDModImmType1(CnstVal); MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32; SDValue Mov = DAG.getNode(AArch64ISD::MOVIshift, dl, MovTy, - DAG.getConstant(CnstVal, MVT::i32), - DAG.getConstant(0, MVT::i32)); + DAG.getConstant(CnstVal, dl, MVT::i32), + DAG.getConstant(0, dl, MVT::i32)); return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov); } @@ -5684,8 +5713,8 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op, CnstVal = AArch64_AM::encodeAdvSIMDModImmType2(CnstVal); MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32; SDValue Mov = DAG.getNode(AArch64ISD::MOVIshift, dl, MovTy, - DAG.getConstant(CnstVal, MVT::i32), - DAG.getConstant(8, MVT::i32)); + DAG.getConstant(CnstVal, dl, MVT::i32), + DAG.getConstant(8, dl, MVT::i32)); return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov); } @@ -5693,8 +5722,8 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op, CnstVal = AArch64_AM::encodeAdvSIMDModImmType3(CnstVal); MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32; SDValue Mov = DAG.getNode(AArch64ISD::MOVIshift, dl, MovTy, - DAG.getConstant(CnstVal, MVT::i32), - DAG.getConstant(16, MVT::i32)); + DAG.getConstant(CnstVal, dl, MVT::i32), + DAG.getConstant(16, dl, MVT::i32)); return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov); } @@ -5702,8 +5731,8 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op, CnstVal = AArch64_AM::encodeAdvSIMDModImmType4(CnstVal); MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32; SDValue Mov = DAG.getNode(AArch64ISD::MOVIshift, dl, MovTy, - DAG.getConstant(CnstVal, MVT::i32), - DAG.getConstant(24, MVT::i32)); + DAG.getConstant(CnstVal, dl, MVT::i32), + DAG.getConstant(24, dl, MVT::i32)); return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov); } @@ -5711,8 +5740,8 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op, CnstVal = AArch64_AM::encodeAdvSIMDModImmType5(CnstVal); MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v8i16 : MVT::v4i16; SDValue Mov = DAG.getNode(AArch64ISD::MOVIshift, dl, MovTy, - DAG.getConstant(CnstVal, MVT::i32), - DAG.getConstant(0, MVT::i32)); + DAG.getConstant(CnstVal, dl, MVT::i32), + DAG.getConstant(0, dl, MVT::i32)); return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov); } @@ -5720,8 +5749,8 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op, CnstVal = AArch64_AM::encodeAdvSIMDModImmType6(CnstVal); MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v8i16 : MVT::v4i16; SDValue Mov = DAG.getNode(AArch64ISD::MOVIshift, dl, MovTy, - DAG.getConstant(CnstVal, MVT::i32), - DAG.getConstant(8, MVT::i32)); + DAG.getConstant(CnstVal, dl, MVT::i32), + DAG.getConstant(8, dl, MVT::i32)); return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov); } @@ -5729,8 +5758,8 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op, CnstVal = AArch64_AM::encodeAdvSIMDModImmType7(CnstVal); MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32; SDValue Mov = DAG.getNode(AArch64ISD::MOVImsl, dl, MovTy, - DAG.getConstant(CnstVal, MVT::i32), - DAG.getConstant(264, MVT::i32)); + DAG.getConstant(CnstVal, dl, MVT::i32), + DAG.getConstant(264, dl, MVT::i32)); return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov); } @@ -5738,8 +5767,8 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op, CnstVal = AArch64_AM::encodeAdvSIMDModImmType8(CnstVal); MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32; SDValue Mov = DAG.getNode(AArch64ISD::MOVImsl, dl, MovTy, - DAG.getConstant(CnstVal, MVT::i32), - DAG.getConstant(272, MVT::i32)); + DAG.getConstant(CnstVal, dl, MVT::i32), + DAG.getConstant(272, dl, MVT::i32)); return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov); } @@ -5747,7 +5776,7 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op, CnstVal = AArch64_AM::encodeAdvSIMDModImmType9(CnstVal); MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v16i8 : MVT::v8i8; SDValue Mov = DAG.getNode(AArch64ISD::MOVI, dl, MovTy, - DAG.getConstant(CnstVal, MVT::i32)); + DAG.getConstant(CnstVal, dl, MVT::i32)); return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov); } @@ -5756,7 +5785,7 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op, CnstVal = AArch64_AM::encodeAdvSIMDModImmType11(CnstVal); MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4f32 : MVT::v2f32; SDValue Mov = DAG.getNode(AArch64ISD::FMOV, dl, MovTy, - DAG.getConstant(CnstVal, MVT::i32)); + DAG.getConstant(CnstVal, dl, MVT::i32)); return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov); } @@ -5764,7 +5793,7 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op, VT.getSizeInBits() == 128) { CnstVal = AArch64_AM::encodeAdvSIMDModImmType12(CnstVal); SDValue Mov = DAG.getNode(AArch64ISD::FMOV, dl, MVT::v2f64, - DAG.getConstant(CnstVal, MVT::i32)); + DAG.getConstant(CnstVal, dl, MVT::i32)); return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov); } @@ -5774,8 +5803,8 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op, CnstVal = AArch64_AM::encodeAdvSIMDModImmType1(CnstVal); MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32; SDValue Mov = DAG.getNode(AArch64ISD::MVNIshift, dl, MovTy, - DAG.getConstant(CnstVal, MVT::i32), - DAG.getConstant(0, MVT::i32)); + DAG.getConstant(CnstVal, dl, MVT::i32), + DAG.getConstant(0, dl, MVT::i32)); return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov); } @@ -5783,8 +5812,8 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op, CnstVal = AArch64_AM::encodeAdvSIMDModImmType2(CnstVal); MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32; SDValue Mov = DAG.getNode(AArch64ISD::MVNIshift, dl, MovTy, - DAG.getConstant(CnstVal, MVT::i32), - DAG.getConstant(8, MVT::i32)); + DAG.getConstant(CnstVal, dl, MVT::i32), + DAG.getConstant(8, dl, MVT::i32)); return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov); } @@ -5792,8 +5821,8 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op, CnstVal = AArch64_AM::encodeAdvSIMDModImmType3(CnstVal); MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32; SDValue Mov = DAG.getNode(AArch64ISD::MVNIshift, dl, MovTy, - DAG.getConstant(CnstVal, MVT::i32), - DAG.getConstant(16, MVT::i32)); + DAG.getConstant(CnstVal, dl, MVT::i32), + DAG.getConstant(16, dl, MVT::i32)); return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov); } @@ -5801,8 +5830,8 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op, CnstVal = AArch64_AM::encodeAdvSIMDModImmType4(CnstVal); MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32; SDValue Mov = DAG.getNode(AArch64ISD::MVNIshift, dl, MovTy, - DAG.getConstant(CnstVal, MVT::i32), - DAG.getConstant(24, MVT::i32)); + DAG.getConstant(CnstVal, dl, MVT::i32), + DAG.getConstant(24, dl, MVT::i32)); return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov); } @@ -5810,8 +5839,8 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op, CnstVal = AArch64_AM::encodeAdvSIMDModImmType5(CnstVal); MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v8i16 : MVT::v4i16; SDValue Mov = DAG.getNode(AArch64ISD::MVNIshift, dl, MovTy, - DAG.getConstant(CnstVal, MVT::i32), - DAG.getConstant(0, MVT::i32)); + DAG.getConstant(CnstVal, dl, MVT::i32), + DAG.getConstant(0, dl, MVT::i32)); return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov); } @@ -5819,8 +5848,8 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op, CnstVal = AArch64_AM::encodeAdvSIMDModImmType6(CnstVal); MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v8i16 : MVT::v4i16; SDValue Mov = DAG.getNode(AArch64ISD::MVNIshift, dl, MovTy, - DAG.getConstant(CnstVal, MVT::i32), - DAG.getConstant(8, MVT::i32)); + DAG.getConstant(CnstVal, dl, MVT::i32), + DAG.getConstant(8, dl, MVT::i32)); return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov); } @@ -5828,8 +5857,8 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op, CnstVal = AArch64_AM::encodeAdvSIMDModImmType7(CnstVal); MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32; SDValue Mov = DAG.getNode(AArch64ISD::MVNImsl, dl, MovTy, - DAG.getConstant(CnstVal, MVT::i32), - DAG.getConstant(264, MVT::i32)); + DAG.getConstant(CnstVal, dl, MVT::i32), + DAG.getConstant(264, dl, MVT::i32)); return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov); } @@ -5837,8 +5866,8 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op, CnstVal = AArch64_AM::encodeAdvSIMDModImmType8(CnstVal); MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32; SDValue Mov = DAG.getNode(AArch64ISD::MVNImsl, dl, MovTy, - DAG.getConstant(CnstVal, MVT::i32), - DAG.getConstant(272, MVT::i32)); + DAG.getConstant(CnstVal, dl, MVT::i32), + DAG.getConstant(272, dl, MVT::i32)); return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov); } } @@ -5921,8 +5950,10 @@ FailedModImm: if (VT.getVectorElementType().isFloatingPoint()) { SmallVector<SDValue, 8> Ops; - MVT NewType = - (VT.getVectorElementType() == MVT::f32) ? MVT::i32 : MVT::i64; + EVT EltTy = VT.getVectorElementType(); + assert ((EltTy == MVT::f16 || EltTy == MVT::f32 || EltTy == MVT::f64) && + "Unsupported floating-point vector type"); + MVT NewType = MVT::getIntegerVT(EltTy.getSizeInBits()); for (unsigned i = 0; i < NumElts; ++i) Ops.push_back(DAG.getNode(ISD::BITCAST, dl, NewType, Op.getOperand(i))); EVT VecVT = EVT::getVectorVT(*DAG.getContext(), NewType, NumElts); @@ -5942,7 +5973,7 @@ FailedModImm: // Now insert the non-constant lanes. for (unsigned i = 0; i < NumElts; ++i) { SDValue V = Op.getOperand(i); - SDValue LaneIdx = DAG.getConstant(i, MVT::i64); + SDValue LaneIdx = DAG.getConstant(i, dl, MVT::i64); if (!isa<ConstantSDNode>(V) && !isa<ConstantFPSDNode>(V)) { // Note that type legalization likely mucked about with the VT of the // source operand, so we may have to convert it here before inserting. @@ -5984,7 +6015,7 @@ FailedModImm: unsigned SubIdx = ElemSize == 32 ? AArch64::ssub : AArch64::dsub; MachineSDNode *N = DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, dl, VT, Vec, Op0, - DAG.getTargetConstant(SubIdx, MVT::i32)); + DAG.getTargetConstant(SubIdx, dl, MVT::i32)); Vec = SDValue(N, 0); ++i; } @@ -5992,7 +6023,7 @@ FailedModImm: SDValue V = Op.getOperand(i); if (V.getOpcode() == ISD::UNDEF) continue; - SDValue LaneIdx = DAG.getConstant(i, MVT::i64); + SDValue LaneIdx = DAG.getConstant(i, dl, MVT::i64); Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Vec, V, LaneIdx); } return Vec; @@ -6215,10 +6246,11 @@ SDValue AArch64TargetLowering::LowerVectorSRA_SRL_SHL(SDValue Op, case ISD::SHL: if (isVShiftLImm(Op.getOperand(1), VT, false, Cnt) && Cnt < EltSize) - return DAG.getNode(AArch64ISD::VSHL, SDLoc(Op), VT, Op.getOperand(0), - DAG.getConstant(Cnt, MVT::i32)); + return DAG.getNode(AArch64ISD::VSHL, DL, VT, Op.getOperand(0), + DAG.getConstant(Cnt, DL, MVT::i32)); return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT, - DAG.getConstant(Intrinsic::aarch64_neon_ushl, MVT::i32), + DAG.getConstant(Intrinsic::aarch64_neon_ushl, DL, + MVT::i32), Op.getOperand(0), Op.getOperand(1)); case ISD::SRA: case ISD::SRL: @@ -6227,8 +6259,8 @@ SDValue AArch64TargetLowering::LowerVectorSRA_SRL_SHL(SDValue Op, Cnt < EltSize) { unsigned Opc = (Op.getOpcode() == ISD::SRA) ? AArch64ISD::VASHR : AArch64ISD::VLSHR; - return DAG.getNode(Opc, SDLoc(Op), VT, Op.getOperand(0), - DAG.getConstant(Cnt, MVT::i32)); + return DAG.getNode(Opc, DL, VT, Op.getOperand(0), + DAG.getConstant(Cnt, DL, MVT::i32)); } // Right shift register. Note, there is not a shift right register @@ -6240,7 +6272,8 @@ SDValue AArch64TargetLowering::LowerVectorSRA_SRL_SHL(SDValue Op, SDValue NegShift = DAG.getNode(AArch64ISD::NEG, DL, VT, Op.getOperand(1)); SDValue NegShiftLeft = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT, - DAG.getConstant(Opc, MVT::i32), Op.getOperand(0), NegShift); + DAG.getConstant(Opc, DL, MVT::i32), Op.getOperand(0), + NegShift); return NegShiftLeft; } @@ -6521,6 +6554,34 @@ bool AArch64TargetLowering::isTruncateFree(EVT VT1, EVT VT2) const { return NumBits1 > NumBits2; } +/// Check if it is profitable to hoist instruction in then/else to if. +/// Not profitable if I and it's user can form a FMA instruction +/// because we prefer FMSUB/FMADD. +bool AArch64TargetLowering::isProfitableToHoist(Instruction *I) const { + if (I->getOpcode() != Instruction::FMul) + return true; + + if (I->getNumUses() != 1) + return true; + + Instruction *User = I->user_back(); + + if (User && + !(User->getOpcode() == Instruction::FSub || + User->getOpcode() == Instruction::FAdd)) + return true; + + const TargetOptions &Options = getTargetMachine().Options; + EVT VT = getValueType(User->getOperand(0)->getType()); + + if (isFMAFasterThanFMulAndFAdd(VT) && + isOperationLegalOrCustom(ISD::FMA, VT) && + (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath)) + return false; + + return true; +} + // All 32-bit GPR operations implicitly zero the high-half of the corresponding // 64-bit GPR. bool AArch64TargetLowering::isZExtFree(Type *Ty1, Type *Ty2) const { @@ -6553,6 +6614,59 @@ bool AArch64TargetLowering::isZExtFree(SDValue Val, EVT VT2) const { VT1.getSizeInBits() <= 32); } +bool AArch64TargetLowering::isExtFreeImpl(const Instruction *Ext) const { + if (isa<FPExtInst>(Ext)) + return false; + + // Vector types are next free. + if (Ext->getType()->isVectorTy()) + return false; + + for (const Use &U : Ext->uses()) { + // The extension is free if we can fold it with a left shift in an + // addressing mode or an arithmetic operation: add, sub, and cmp. + + // Is there a shift? + const Instruction *Instr = cast<Instruction>(U.getUser()); + + // Is this a constant shift? + switch (Instr->getOpcode()) { + case Instruction::Shl: + if (!isa<ConstantInt>(Instr->getOperand(1))) + return false; + break; + case Instruction::GetElementPtr: { + gep_type_iterator GTI = gep_type_begin(Instr); + std::advance(GTI, U.getOperandNo()); + Type *IdxTy = *GTI; + // This extension will end up with a shift because of the scaling factor. + // 8-bit sized types have a scaling factor of 1, thus a shift amount of 0. + // Get the shift amount based on the scaling factor: + // log2(sizeof(IdxTy)) - log2(8). + uint64_t ShiftAmt = + countTrailingZeros(getDataLayout()->getTypeStoreSizeInBits(IdxTy)) - 3; + // Is the constant foldable in the shift of the addressing mode? + // I.e., shift amount is between 1 and 4 inclusive. + if (ShiftAmt == 0 || ShiftAmt > 4) + return false; + break; + } + case Instruction::Trunc: + // Check if this is a noop. + // trunc(sext ty1 to ty2) to ty1. + if (Instr->getType() == Ext->getOperand(0)->getType()) + continue; + // FALL THROUGH. + default: + return false; + } + + // At this point we can use the bfm family, so this extension is free + // for that use. + } + return true; +} + bool AArch64TargetLowering::hasPairedLoad(Type *LoadedType, unsigned &RequiredAligment) const { if (!LoadedType->isIntegerTy() && !LoadedType->isFloatTy()) @@ -6591,13 +6705,22 @@ EVT AArch64TargetLowering::getOptimalMemOpType(uint64_t Size, unsigned DstAlign, bool Fast; const Function *F = MF.getFunction(); if (Subtarget->hasFPARMv8() && !IsMemset && Size >= 16 && - !F->getAttributes().hasAttribute(AttributeSet::FunctionIndex, - Attribute::NoImplicitFloat) && + !F->hasFnAttribute(Attribute::NoImplicitFloat) && (memOpAlign(SrcAlign, DstAlign, 16) || (allowsMisalignedMemoryAccesses(MVT::f128, 0, 1, &Fast) && Fast))) return MVT::f128; - return Size >= 8 ? MVT::i64 : MVT::i32; + if (Size >= 8 && + (memOpAlign(SrcAlign, DstAlign, 8) || + (allowsMisalignedMemoryAccesses(MVT::i64, 0, 1, &Fast) && Fast))) + return MVT::i64; + + if (Size >= 4 && + (memOpAlign(SrcAlign, DstAlign, 4) || + (allowsMisalignedMemoryAccesses(MVT::i32, 0, 1, &Fast) && Fast))) + return MVT::i32; + + return MVT::Other; } // 12-bit optionally shifted immediates are legal for adds. @@ -6748,7 +6871,7 @@ bool AArch64TargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm, unsigned LZ = countLeadingZeros((uint64_t)Val); unsigned Shift = (63 - LZ) / 16; // MOVZ is free so return true for one or fewer MOVK. - return (Shift < 3) ? true : false; + return Shift < 3; } // Generate SUBS and CSEL for integer abs. @@ -6766,14 +6889,14 @@ static SDValue performIntegerAbsCombine(SDNode *N, SelectionDAG &DAG) { N1.getOpcode() == ISD::SRA && N1.getOperand(0) == N0.getOperand(0)) if (ConstantSDNode *Y1C = dyn_cast<ConstantSDNode>(N1.getOperand(1))) if (Y1C->getAPIntValue() == VT.getSizeInBits() - 1) { - SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, VT), + SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), N0.getOperand(0)); // Generate SUBS & CSEL. SDValue Cmp = DAG.getNode(AArch64ISD::SUBS, DL, DAG.getVTList(VT, MVT::i32), - N0.getOperand(0), DAG.getConstant(0, VT)); + N0.getOperand(0), DAG.getConstant(0, DL, VT)); return DAG.getNode(AArch64ISD::CSEL, DL, VT, N0.getOperand(0), Neg, - DAG.getConstant(AArch64CC::PL, MVT::i32), + DAG.getConstant(AArch64CC::PL, DL, MVT::i32), SDValue(Cmp.getNode(), 1)); } return SDValue(); @@ -6802,8 +6925,8 @@ AArch64TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor, SDLoc DL(N); SDValue N0 = N->getOperand(0); unsigned Lg2 = Divisor.countTrailingZeros(); - SDValue Zero = DAG.getConstant(0, VT); - SDValue Pow2MinusOne = DAG.getConstant((1ULL << Lg2) - 1, VT); + SDValue Zero = DAG.getConstant(0, DL, VT); + SDValue Pow2MinusOne = DAG.getConstant((1ULL << Lg2) - 1, DL, VT); // Add (N0 < 0) ? Pow2 - 1 : 0; SDValue CCVal; @@ -6819,7 +6942,7 @@ AArch64TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor, // Divide by pow2. SDValue SRA = - DAG.getNode(ISD::SRA, DL, VT, CSel, DAG.getConstant(Lg2, MVT::i64)); + DAG.getNode(ISD::SRA, DL, VT, CSel, DAG.getConstant(Lg2, DL, MVT::i64)); // If we're dividing by a positive value, we're done. Otherwise, we must // negate the result. @@ -6828,7 +6951,7 @@ AArch64TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor, if (Created) Created->push_back(SRA.getNode()); - return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, VT), SRA); + return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), SRA); } static SDValue performMulCombine(SDNode *N, SelectionDAG &DAG, @@ -6845,45 +6968,46 @@ static SDValue performMulCombine(SDNode *N, SelectionDAG &DAG, if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1))) { APInt Value = C->getAPIntValue(); EVT VT = N->getValueType(0); + SDLoc DL(N); if (Value.isNonNegative()) { // (mul x, 2^N + 1) => (add (shl x, N), x) APInt VM1 = Value - 1; if (VM1.isPowerOf2()) { SDValue ShiftedVal = - DAG.getNode(ISD::SHL, SDLoc(N), VT, N->getOperand(0), - DAG.getConstant(VM1.logBase2(), MVT::i64)); - return DAG.getNode(ISD::ADD, SDLoc(N), VT, ShiftedVal, + DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0), + DAG.getConstant(VM1.logBase2(), DL, MVT::i64)); + return DAG.getNode(ISD::ADD, DL, VT, ShiftedVal, N->getOperand(0)); } // (mul x, 2^N - 1) => (sub (shl x, N), x) APInt VP1 = Value + 1; if (VP1.isPowerOf2()) { SDValue ShiftedVal = - DAG.getNode(ISD::SHL, SDLoc(N), VT, N->getOperand(0), - DAG.getConstant(VP1.logBase2(), MVT::i64)); - return DAG.getNode(ISD::SUB, SDLoc(N), VT, ShiftedVal, + DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0), + DAG.getConstant(VP1.logBase2(), DL, MVT::i64)); + return DAG.getNode(ISD::SUB, DL, VT, ShiftedVal, N->getOperand(0)); } } else { - // (mul x, -(2^N + 1)) => - (add (shl x, N), x) - APInt VNM1 = -Value - 1; - if (VNM1.isPowerOf2()) { - SDValue ShiftedVal = - DAG.getNode(ISD::SHL, SDLoc(N), VT, N->getOperand(0), - DAG.getConstant(VNM1.logBase2(), MVT::i64)); - SDValue Add = - DAG.getNode(ISD::ADD, SDLoc(N), VT, ShiftedVal, N->getOperand(0)); - return DAG.getNode(ISD::SUB, SDLoc(N), VT, DAG.getConstant(0, VT), Add); - } // (mul x, -(2^N - 1)) => (sub x, (shl x, N)) APInt VNP1 = -Value + 1; if (VNP1.isPowerOf2()) { SDValue ShiftedVal = - DAG.getNode(ISD::SHL, SDLoc(N), VT, N->getOperand(0), - DAG.getConstant(VNP1.logBase2(), MVT::i64)); - return DAG.getNode(ISD::SUB, SDLoc(N), VT, N->getOperand(0), + DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0), + DAG.getConstant(VNP1.logBase2(), DL, MVT::i64)); + return DAG.getNode(ISD::SUB, DL, VT, N->getOperand(0), ShiftedVal); } + // (mul x, -(2^N + 1)) => - (add (shl x, N), x) + APInt VNM1 = -Value - 1; + if (VNM1.isPowerOf2()) { + SDValue ShiftedVal = + DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0), + DAG.getConstant(VNM1.logBase2(), DL, MVT::i64)); + SDValue Add = + DAG.getNode(ISD::ADD, DL, VT, ShiftedVal, N->getOperand(0)); + return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Add); + } } } return SDValue(); @@ -7037,7 +7161,7 @@ static SDValue tryCombineToEXTR(SDNode *N, } return DAG.getNode(AArch64ISD::EXTR, DL, VT, LHS, RHS, - DAG.getConstant(ShiftRHS, MVT::i64)); + DAG.getConstant(ShiftRHS, DL, MVT::i64)); } static SDValue tryCombineToBSL(SDNode *N, @@ -7165,10 +7289,10 @@ static SDValue performBitcastCombine(SDNode *N, SDLoc dl(N); unsigned NumElements = VT.getVectorNumElements(); if (idx) { - SDValue HalfIdx = DAG.getConstant(NumElements, MVT::i64); + SDValue HalfIdx = DAG.getConstant(NumElements, dl, MVT::i64); return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, Source, HalfIdx); } else { - SDValue SubReg = DAG.getTargetConstant(AArch64::dsub, MVT::i32); + SDValue SubReg = DAG.getTargetConstant(AArch64::dsub, dl, MVT::i32); return SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, VT, Source, SubReg), 0); @@ -7178,22 +7302,55 @@ static SDValue performBitcastCombine(SDNode *N, static SDValue performConcatVectorsCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG) { + SDLoc dl(N); + EVT VT = N->getValueType(0); + SDValue N0 = N->getOperand(0), N1 = N->getOperand(1); + + // Optimize concat_vectors of truncated vectors, where the intermediate + // type is illegal, to avoid said illegality, e.g., + // (v4i16 (concat_vectors (v2i16 (truncate (v2i64))), + // (v2i16 (truncate (v2i64))))) + // -> + // (v4i16 (truncate (vector_shuffle (v4i32 (bitcast (v2i64))), + // (v4i32 (bitcast (v2i64))), + // <0, 2, 4, 6>))) + // This isn't really target-specific, but ISD::TRUNCATE legality isn't keyed + // on both input and result type, so we might generate worse code. + // On AArch64 we know it's fine for v2i64->v4i16 and v4i32->v8i8. + if (N->getNumOperands() == 2 && + N0->getOpcode() == ISD::TRUNCATE && + N1->getOpcode() == ISD::TRUNCATE) { + SDValue N00 = N0->getOperand(0); + SDValue N10 = N1->getOperand(0); + EVT N00VT = N00.getValueType(); + + if (N00VT == N10.getValueType() && + (N00VT == MVT::v2i64 || N00VT == MVT::v4i32) && + N00VT.getScalarSizeInBits() == 4 * VT.getScalarSizeInBits()) { + MVT MidVT = (N00VT == MVT::v2i64 ? MVT::v4i32 : MVT::v8i16); + SmallVector<int, 8> Mask(MidVT.getVectorNumElements()); + for (size_t i = 0; i < Mask.size(); ++i) + Mask[i] = i * 2; + return DAG.getNode(ISD::TRUNCATE, dl, VT, + DAG.getVectorShuffle( + MidVT, dl, + DAG.getNode(ISD::BITCAST, dl, MidVT, N00), + DAG.getNode(ISD::BITCAST, dl, MidVT, N10), Mask)); + } + } + // Wait 'til after everything is legalized to try this. That way we have // legal vector types and such. if (DCI.isBeforeLegalizeOps()) return SDValue(); - SDLoc dl(N); - EVT VT = N->getValueType(0); - // If we see a (concat_vectors (v1x64 A), (v1x64 A)) it's really a vector // splat. The indexed instructions are going to be expecting a DUPLANE64, so // canonicalise to that. - if (N->getOperand(0) == N->getOperand(1) && VT.getVectorNumElements() == 2) { + if (N0 == N1 && VT.getVectorNumElements() == 2) { assert(VT.getVectorElementType().getSizeInBits() == 64); - return DAG.getNode(AArch64ISD::DUPLANE64, dl, VT, - WidenVector(N->getOperand(0), DAG), - DAG.getConstant(0, MVT::i64)); + return DAG.getNode(AArch64ISD::DUPLANE64, dl, VT, WidenVector(N0, DAG), + DAG.getConstant(0, dl, MVT::i64)); } // Canonicalise concat_vectors so that the right-hand vector has as few @@ -7205,10 +7362,9 @@ static SDValue performConcatVectorsCombine(SDNode *N, // becomes // (bitconvert (concat_vectors (v4i16 (bitconvert LHS)), RHS)) - SDValue Op1 = N->getOperand(1); - if (Op1->getOpcode() != ISD::BITCAST) + if (N1->getOpcode() != ISD::BITCAST) return SDValue(); - SDValue RHS = Op1->getOperand(0); + SDValue RHS = N1->getOperand(0); MVT RHSTy = RHS.getValueType().getSimpleVT(); // If the RHS is not a vector, this is not the pattern we're looking for. if (!RHSTy.isVector()) @@ -7218,10 +7374,10 @@ static SDValue performConcatVectorsCombine(SDNode *N, MVT ConcatTy = MVT::getVectorVT(RHSTy.getVectorElementType(), RHSTy.getVectorNumElements() * 2); - return DAG.getNode( - ISD::BITCAST, dl, VT, - DAG.getNode(ISD::CONCAT_VECTORS, dl, ConcatTy, - DAG.getNode(ISD::BITCAST, dl, RHSTy, N->getOperand(0)), RHS)); + return DAG.getNode(ISD::BITCAST, dl, VT, + DAG.getNode(ISD::CONCAT_VECTORS, dl, ConcatTy, + DAG.getNode(ISD::BITCAST, dl, RHSTy, N0), + RHS)); } static SDValue tryCombineFixedPointConvert(SDNode *N, @@ -7310,15 +7466,16 @@ static SDValue tryExtendDUPToExtractHigh(SDValue N, SelectionDAG &DAG) { unsigned NumElems = NarrowTy.getVectorNumElements(); MVT NewDUPVT = MVT::getVectorVT(ElementTy, NumElems * 2); + SDLoc dl(N); SDValue NewDUP; if (IsDUPLANE) - NewDUP = DAG.getNode(N.getOpcode(), SDLoc(N), NewDUPVT, N.getOperand(0), + NewDUP = DAG.getNode(N.getOpcode(), dl, NewDUPVT, N.getOperand(0), N.getOperand(1)); else - NewDUP = DAG.getNode(AArch64ISD::DUP, SDLoc(N), NewDUPVT, N.getOperand(0)); + NewDUP = DAG.getNode(AArch64ISD::DUP, dl, NewDUPVT, N.getOperand(0)); - return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N.getNode()), NarrowTy, - NewDUP, DAG.getConstant(NumElems, MVT::i64)); + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, NarrowTy, NewDUP, + DAG.getConstant(NumElems, dl, MVT::i64)); } static bool isEssentiallyExtractSubvector(SDValue N) { @@ -7443,7 +7600,8 @@ static SDValue performSetccAddFolding(SDNode *Op, SelectionDAG &DAG) { SDLoc dl(Op); if (InfoAndKind.IsAArch64) { CCVal = DAG.getConstant( - AArch64CC::getInvertedCondCode(InfoAndKind.Info.AArch64.CC), MVT::i32); + AArch64CC::getInvertedCondCode(InfoAndKind.Info.AArch64.CC), dl, + MVT::i32); Cmp = *InfoAndKind.Info.AArch64.Cmp; } else Cmp = getAArch64Cmp(*InfoAndKind.Info.Generic.Opnd0, @@ -7452,7 +7610,7 @@ static SDValue performSetccAddFolding(SDNode *Op, SelectionDAG &DAG) { CCVal, DAG, dl); EVT VT = Op->getValueType(0); - LHS = DAG.getNode(ISD::ADD, dl, VT, RHS, DAG.getConstant(1, VT)); + LHS = DAG.getNode(ISD::ADD, dl, VT, RHS, DAG.getConstant(1, dl, VT)); return DAG.getNode(AArch64ISD::CSEL, dl, VT, RHS, LHS, CCVal, Cmp); } @@ -7592,12 +7750,15 @@ static SDValue tryCombineShiftImm(unsigned IID, SDNode *N, SelectionDAG &DAG) { break; } - if (IsRightShift && ShiftAmount <= -1 && ShiftAmount >= -(int)ElemBits) - return DAG.getNode(Opcode, SDLoc(N), N->getValueType(0), N->getOperand(1), - DAG.getConstant(-ShiftAmount, MVT::i32)); - else if (!IsRightShift && ShiftAmount >= 0 && ShiftAmount < ElemBits) - return DAG.getNode(Opcode, SDLoc(N), N->getValueType(0), N->getOperand(1), - DAG.getConstant(ShiftAmount, MVT::i32)); + if (IsRightShift && ShiftAmount <= -1 && ShiftAmount >= -(int)ElemBits) { + SDLoc dl(N); + return DAG.getNode(Opcode, dl, N->getValueType(0), N->getOperand(1), + DAG.getConstant(-ShiftAmount, dl, MVT::i32)); + } else if (!IsRightShift && ShiftAmount >= 0 && ShiftAmount < ElemBits) { + SDLoc dl(N); + return DAG.getNode(Opcode, dl, N->getValueType(0), N->getOperand(1), + DAG.getConstant(ShiftAmount, dl, MVT::i32)); + } return SDValue(); } @@ -7618,6 +7779,16 @@ static SDValue tryCombineCRC32(unsigned Mask, SDNode *N, SelectionDAG &DAG) { N->getOperand(0), N->getOperand(1), AndN.getOperand(0)); } +static SDValue combineAcrossLanesIntrinsic(unsigned Opc, SDNode *N, + SelectionDAG &DAG) { + SDLoc dl(N); + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, N->getValueType(0), + DAG.getNode(Opc, dl, + N->getOperand(1).getSimpleValueType(), + N->getOperand(1)), + DAG.getConstant(0, dl, MVT::i64)); +} + static SDValue performIntrinsicCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget) { @@ -7630,6 +7801,18 @@ static SDValue performIntrinsicCombine(SDNode *N, case Intrinsic::aarch64_neon_vcvtfxu2fp: return tryCombineFixedPointConvert(N, DCI, DAG); break; + case Intrinsic::aarch64_neon_saddv: + return combineAcrossLanesIntrinsic(AArch64ISD::SADDV, N, DAG); + case Intrinsic::aarch64_neon_uaddv: + return combineAcrossLanesIntrinsic(AArch64ISD::UADDV, N, DAG); + case Intrinsic::aarch64_neon_sminv: + return combineAcrossLanesIntrinsic(AArch64ISD::SMINV, N, DAG); + case Intrinsic::aarch64_neon_uminv: + return combineAcrossLanesIntrinsic(AArch64ISD::UMINV, N, DAG); + case Intrinsic::aarch64_neon_smaxv: + return combineAcrossLanesIntrinsic(AArch64ISD::SMAXV, N, DAG); + case Intrinsic::aarch64_neon_umaxv: + return combineAcrossLanesIntrinsic(AArch64ISD::UMAXV, N, DAG); case Intrinsic::aarch64_neon_fmax: return DAG.getNode(AArch64ISD::FMAX, SDLoc(N), N->getValueType(0), N->getOperand(1), N->getOperand(2)); @@ -7744,9 +7927,9 @@ static SDValue performExtendCombine(SDNode *N, EVT InNVT = EVT::getVectorVT(*DAG.getContext(), SrcVT.getVectorElementType(), LoVT.getVectorNumElements()); Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InNVT, Src, - DAG.getConstant(0, MVT::i64)); + DAG.getConstant(0, DL, MVT::i64)); Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InNVT, Src, - DAG.getConstant(InNVT.getVectorNumElements(), MVT::i64)); + DAG.getConstant(InNVT.getVectorNumElements(), DL, MVT::i64)); Lo = DAG.getNode(N->getOpcode(), DL, LoVT, Lo); Hi = DAG.getNode(N->getOpcode(), DL, HiVT, Hi); @@ -7806,7 +7989,7 @@ static SDValue replaceSplatVectorStore(SelectionDAG &DAG, StoreSDNode *St) { unsigned Offset = EltOffset; while (--NumVecElts) { SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i64, BasePtr, - DAG.getConstant(Offset, MVT::i64)); + DAG.getConstant(Offset, DL, MVT::i64)); NewST1 = DAG.getStore(NewST1.getValue(0), DL, SplatVal, OffsetPtr, St->getPointerInfo(), St->isVolatile(), St->isNonTemporal(), Alignment); @@ -7827,14 +8010,13 @@ static SDValue performSTORECombine(SDNode *N, return SDValue(); // Cyclone has bad performance on unaligned 16B stores when crossing line and - // page boundries. We want to split such stores. + // page boundaries. We want to split such stores. if (!Subtarget->isCyclone()) return SDValue(); // Don't split at Oz. MachineFunction &MF = DAG.getMachineFunction(); - bool IsMinSize = MF.getFunction()->getAttributes().hasAttribute( - AttributeSet::FunctionIndex, Attribute::MinSize); + bool IsMinSize = MF.getFunction()->hasFnAttribute(Attribute::MinSize); if (IsMinSize) return SDValue(); @@ -7868,15 +8050,15 @@ static SDValue performSTORECombine(SDNode *N, EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(), NumElts); SDValue SubVector0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, StVal, - DAG.getConstant(0, MVT::i64)); + DAG.getConstant(0, DL, MVT::i64)); SDValue SubVector1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, StVal, - DAG.getConstant(NumElts, MVT::i64)); + DAG.getConstant(NumElts, DL, MVT::i64)); SDValue BasePtr = S->getBasePtr(); SDValue NewST1 = DAG.getStore(S->getChain(), DL, SubVector0, BasePtr, S->getPointerInfo(), S->isVolatile(), S->isNonTemporal(), S->getAlignment()); SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i64, BasePtr, - DAG.getConstant(8, MVT::i64)); + DAG.getConstant(8, DL, MVT::i64)); return DAG.getStore(NewST1.getValue(0), DL, SubVector1, OffsetPtr, S->getPointerInfo(), S->isVolatile(), S->isNonTemporal(), S->getAlignment()); @@ -7944,6 +8126,13 @@ static SDValue performPostLD1Combine(SDNode *N, Inc = DAG.getRegister(AArch64::XZR, MVT::i64); } + // Finally, check that the vector doesn't depend on the load. + // Again, this would create a cycle. + // The load depending on the vector is fine, as that's the case for the + // LD1*post we'll eventually generate anyway. + if (LoadSDN->isPredecessorOf(Vector.getNode())) + continue; + SmallVector<SDValue, 8> Ops; Ops.push_back(LD->getOperand(0)); // Chain if (IsLaneOp) { @@ -7961,7 +8150,7 @@ static SDValue performPostLD1Combine(SDNode *N, LoadSDN->getMemOperand()); // Update the uses. - std::vector<SDValue> NewResults; + SmallVector<SDValue, 2> NewResults; NewResults.push_back(SDValue(LD, 0)); // The result of load NewResults.push_back(SDValue(UpdN.getNode(), 2)); // Chain DCI.CombineTo(LD, NewResults); @@ -8455,13 +8644,21 @@ static SDValue performVSelectCombine(SDNode *N, SelectionDAG &DAG) { /// the compare-mask instructions rather than going via NZCV, even if LHS and /// RHS are really scalar. This replaces any scalar setcc in the above pattern /// with a vector one followed by a DUP shuffle on the result. -static SDValue performSelectCombine(SDNode *N, SelectionDAG &DAG) { +static SDValue performSelectCombine(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI) { + SelectionDAG &DAG = DCI.DAG; SDValue N0 = N->getOperand(0); EVT ResVT = N->getValueType(0); - if (N0.getOpcode() != ISD::SETCC || N0.getValueType() != MVT::i1) + if (N0.getOpcode() != ISD::SETCC) return SDValue(); + // Make sure the SETCC result is either i1 (initial DAG), or i32, the lowered + // scalar SetCCResultType. We also don't expect vectors, because we assume + // that selects fed by vector SETCCs are canonicalized to VSELECT. + assert((N0.getValueType() == MVT::i1 || N0.getValueType() == MVT::i32) && + "Scalar-SETCC feeding SELECT has unexpected result type!"); + // If NumMaskElts == 0, the comparison is larger than select result. The // largest real NEON comparison is 64-bits per lane, which means the result is // at most 32-bits and an illegal vector. Just bail out for now. @@ -8479,6 +8676,16 @@ static SDValue performSelectCombine(SDNode *N, SelectionDAG &DAG) { SrcVT = EVT::getVectorVT(*DAG.getContext(), SrcVT, NumMaskElts); EVT CCVT = SrcVT.changeVectorElementTypeToInteger(); + // Also bail out if the vector CCVT isn't the same size as ResVT. + // This can happen if the SETCC operand size doesn't divide the ResVT size + // (e.g., f64 vs v3f32). + if (CCVT.getSizeInBits() != ResVT.getSizeInBits()) + return SDValue(); + + // Make sure we didn't create illegal types, if we're not supposed to. + assert(DCI.isBeforeLegalize() || + DAG.getTargetLoweringInfo().isTypeLegal(SrcVT)); + // First perform a vector comparison, where lane 0 is the one we're interested // in. SDLoc DL(N0); @@ -8497,6 +8704,75 @@ static SDValue performSelectCombine(SDNode *N, SelectionDAG &DAG) { return DAG.getSelect(DL, ResVT, Mask, N->getOperand(1), N->getOperand(2)); } +/// performSelectCCCombine - Target-specific DAG combining for ISD::SELECT_CC +/// to match FMIN/FMAX patterns. +static SDValue performSelectCCCombine(SDNode *N, SelectionDAG &DAG) { + // Try to use FMIN/FMAX instructions for FP selects like "x < y ? x : y". + // Unless the NoNaNsFPMath option is set, be careful about NaNs: + // vmax/vmin return NaN if either operand is a NaN; + // only do the transformation when it matches that behavior. + + SDValue CondLHS = N->getOperand(0); + SDValue CondRHS = N->getOperand(1); + SDValue LHS = N->getOperand(2); + SDValue RHS = N->getOperand(3); + ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(4))->get(); + + unsigned Opcode; + bool IsReversed; + if (selectCCOpsAreFMaxCompatible(CondLHS, LHS) && + selectCCOpsAreFMaxCompatible(CondRHS, RHS)) { + IsReversed = false; // x CC y ? x : y + } else if (selectCCOpsAreFMaxCompatible(CondRHS, LHS) && + selectCCOpsAreFMaxCompatible(CondLHS, RHS)) { + IsReversed = true ; // x CC y ? y : x + } else { + return SDValue(); + } + + bool IsUnordered = false, IsOrEqual; + switch (CC) { + default: + return SDValue(); + case ISD::SETULT: + case ISD::SETULE: + IsUnordered = true; + case ISD::SETOLT: + case ISD::SETOLE: + case ISD::SETLT: + case ISD::SETLE: + IsOrEqual = (CC == ISD::SETLE || CC == ISD::SETOLE || CC == ISD::SETULE); + Opcode = IsReversed ? AArch64ISD::FMAX : AArch64ISD::FMIN; + break; + + case ISD::SETUGT: + case ISD::SETUGE: + IsUnordered = true; + case ISD::SETOGT: + case ISD::SETOGE: + case ISD::SETGT: + case ISD::SETGE: + IsOrEqual = (CC == ISD::SETGE || CC == ISD::SETOGE || CC == ISD::SETUGE); + Opcode = IsReversed ? AArch64ISD::FMIN : AArch64ISD::FMAX; + break; + } + + // If LHS is NaN, an ordered comparison will be false and the result will be + // the RHS, but FMIN(NaN, RHS) = FMAX(NaN, RHS) = NaN. Avoid this by checking + // that LHS != NaN. Likewise, for unordered comparisons, check for RHS != NaN. + if (!DAG.isKnownNeverNaN(IsUnordered ? RHS : LHS)) + return SDValue(); + + // For xxx-or-equal comparisons, "+0 <= -0" and "-0 >= +0" will both be true, + // but FMIN will return -0, and FMAX will return +0. So FMIN/FMAX can only be + // used for unsafe math or if one of the operands is known to be nonzero. + if (IsOrEqual && !DAG.getTarget().Options.UnsafeFPMath && + !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS))) + return SDValue(); + + return DAG.getNode(Opcode, SDLoc(N), N->getValueType(0), LHS, RHS); +} + SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; @@ -8526,9 +8802,11 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N, case ISD::CONCAT_VECTORS: return performConcatVectorsCombine(N, DCI, DAG); case ISD::SELECT: - return performSelectCombine(N, DAG); + return performSelectCombine(N, DCI); case ISD::VSELECT: return performVSelectCombine(N, DCI.DAG); + case ISD::SELECT_CC: + return performSelectCCCombine(N, DCI.DAG); case ISD::STORE: return performSTORECombine(N, DCI, DAG, Subtarget); case AArch64ISD::BRCOND: @@ -8699,7 +8977,7 @@ static void ReplaceBITCASTResults(SDNode *N, SmallVectorImpl<SDValue> &Results, Op = SDValue( DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, DL, MVT::f32, DAG.getUNDEF(MVT::i32), Op, - DAG.getTargetConstant(AArch64::hsub, MVT::i32)), + DAG.getTargetConstant(AArch64::hsub, DL, MVT::i32)), 0); Op = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Op); Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, Op)); @@ -8760,9 +9038,11 @@ bool AArch64TargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const { } // For the real atomic operations, we have ldxr/stxr up to 128 bits, -bool AArch64TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { +TargetLoweringBase::AtomicRMWExpansionKind +AArch64TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { unsigned Size = AI->getType()->getPrimitiveSizeInBits(); - return Size <= 128; + return Size <= 128 ? AtomicRMWExpansionKind::LLSC + : AtomicRMWExpansionKind::None; } bool AArch64TargetLowering::hasLoadLinkedStoreConditional() const { @@ -8822,7 +9102,7 @@ Value *AArch64TargetLowering::emitStoreConditional(IRBuilder<> &Builder, Value *Lo = Builder.CreateTrunc(Val, Int64Ty, "lo"); Value *Hi = Builder.CreateTrunc(Builder.CreateLShr(Val, 64), Int64Ty, "hi"); Addr = Builder.CreateBitCast(Addr, Type::getInt8PtrTy(M->getContext())); - return Builder.CreateCall3(Stxr, Lo, Hi, Addr); + return Builder.CreateCall(Stxr, {Lo, Hi, Addr}); } Intrinsic::ID Int = @@ -8830,10 +9110,10 @@ Value *AArch64TargetLowering::emitStoreConditional(IRBuilder<> &Builder, Type *Tys[] = { Addr->getType() }; Function *Stxr = Intrinsic::getDeclaration(M, Int, Tys); - return Builder.CreateCall2( - Stxr, Builder.CreateZExtOrBitCast( - Val, Stxr->getFunctionType()->getParamType(0)), - Addr); + return Builder.CreateCall(Stxr, + {Builder.CreateZExtOrBitCast( + Val, Stxr->getFunctionType()->getParamType(0)), + Addr}); } bool AArch64TargetLowering::functionArgumentNeedsConsecutiveRegisters( diff --git a/lib/Target/AArch64/AArch64ISelLowering.h b/lib/Target/AArch64/AArch64ISelLowering.h index 5a19322..0d9b8b7 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.h +++ b/lib/Target/AArch64/AArch64ISelLowering.h @@ -18,13 +18,14 @@ #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/IR/CallingConv.h" +#include "llvm/IR/Instruction.h" #include "llvm/Target/TargetLowering.h" namespace llvm { namespace AArch64ISD { -enum { +enum NodeType : unsigned { FIRST_NUMBER = ISD::BUILTIN_OP_END, WrapperLarge, // 4-instruction MOVZ/MOVK sequence for 64-bit addresses. CALL, // Function call. @@ -140,6 +141,18 @@ enum { FCMLEz, FCMLTz, + // Vector across-lanes addition + // Only the lower result lane is defined. + SADDV, + UADDV, + + // Vector across-lanes min/max + // Only the lower result lane is defined. + SMINV, + UMINV, + SMAXV, + UMAXV, + // Vector bitwise negation NOT, @@ -207,7 +220,8 @@ class AArch64TargetLowering : public TargetLowering { bool RequireStrictAlign; public: - explicit AArch64TargetLowering(const TargetMachine &TM); + explicit AArch64TargetLowering(const TargetMachine &TM, + const AArch64Subtarget &STI); /// Selects the correct CCAssignFn for a given CallingConvention value. CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg) const; @@ -222,7 +236,7 @@ public: MVT getScalarShiftAmountTy(EVT LHSTy) const override; /// allowsMisalignedMemoryAccesses - Returns true if the target allows - /// unaligned memory accesses. of the specified type. + /// unaligned memory accesses of the specified type. bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace = 0, unsigned Align = 1, bool *Fast = nullptr) const override { @@ -244,10 +258,6 @@ public: /// getFunctionAlignment - Return the Log2 alignment of this function. unsigned getFunctionAlignment(const Function *F) const; - /// getMaximalGlobalOffset - Returns the maximal possible offset which can - /// be used for loads / stores from the global. - unsigned getMaximalGlobalOffset() const override; - /// Returns true if a cast between SrcAS and DestAS is a noop. bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override { // Addrspacecasts are always noops. @@ -285,6 +295,8 @@ public: bool isTruncateFree(Type *Ty1, Type *Ty2) const override; bool isTruncateFree(EVT VT1, EVT VT2) const override; + bool isProfitableToHoist(Instruction *I) const override; + bool isZExtFree(Type *Ty1, Type *Ty2) const override; bool isZExtFree(EVT VT1, EVT VT2) const override; bool isZExtFree(SDValue Val, EVT VT2) const override; @@ -335,13 +347,16 @@ public: bool shouldExpandAtomicLoadInIR(LoadInst *LI) const override; bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override; - bool shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override; + TargetLoweringBase::AtomicRMWExpansionKind + shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override; bool useLoadStackGuardNode() const override; TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(EVT VT) const override; private: + bool isExtFreeImpl(const Instruction *Ext) const override; + /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can /// make the right decision when generating code for different targets. const AArch64Subtarget *Subtarget; @@ -405,6 +420,9 @@ private: SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerSELECT_CC(ISD::CondCode CC, SDValue LHS, SDValue RHS, + SDValue TVal, SDValue FVal, SDLoc dl, + SelectionDAG &DAG) const; SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const; SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const; SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const; @@ -453,12 +471,23 @@ private: const char *constraint) const override; std::pair<unsigned, const TargetRegisterClass *> - getRegForInlineAsmConstraint(const std::string &Constraint, + getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, + const std::string &Constraint, MVT VT) const override; void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint, std::vector<SDValue> &Ops, SelectionDAG &DAG) const override; + unsigned getInlineAsmMemConstraint( + const std::string &ConstraintCode) const override { + if (ConstraintCode == "Q") + return InlineAsm::Constraint_Q; + // FIXME: clang has code for 'Ump', 'Utf', 'Usa', and 'Ush' but these are + // followed by llvm_unreachable so we'll leave them unimplemented in + // the backend for now. + return TargetLowering::getInlineAsmMemConstraint(ConstraintCode); + } + bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override; bool mayBeEmittedAsTailCall(CallInst *CI) const override; bool getIndexedAddressParts(SDNode *Op, SDValue &Base, SDValue &Offset, diff --git a/lib/Target/AArch64/AArch64InstrFormats.td b/lib/Target/AArch64/AArch64InstrFormats.td index d295c02..3b8b668 100644 --- a/lib/Target/AArch64/AArch64InstrFormats.td +++ b/lib/Target/AArch64/AArch64InstrFormats.td @@ -441,11 +441,11 @@ def vecshiftL64 : Operand<i32>, ImmLeaf<i32, [{ // instructions for splatting repeating bit patterns across the immediate. def logical_imm32_XFORM : SDNodeXForm<imm, [{ uint64_t enc = AArch64_AM::encodeLogicalImmediate(N->getZExtValue(), 32); - return CurDAG->getTargetConstant(enc, MVT::i32); + return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i32); }]>; def logical_imm64_XFORM : SDNodeXForm<imm, [{ uint64_t enc = AArch64_AM::encodeLogicalImmediate(N->getZExtValue(), 64); - return CurDAG->getTargetConstant(enc, MVT::i32); + return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i32); }]>; let DiagnosticType = "LogicalSecondSource" in { @@ -682,7 +682,7 @@ def fpimm32 : Operand<f32>, }], SDNodeXForm<fpimm, [{ APFloat InVal = N->getValueAPF(); uint32_t enc = AArch64_AM::getFP32Imm(InVal); - return CurDAG->getTargetConstant(enc, MVT::i32); + return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i32); }]>> { let ParserMatchClass = FPImmOperand; let PrintMethod = "printFPImmOperand"; @@ -693,7 +693,7 @@ def fpimm64 : Operand<f64>, }], SDNodeXForm<fpimm, [{ APFloat InVal = N->getValueAPF(); uint32_t enc = AArch64_AM::getFP64Imm(InVal); - return CurDAG->getTargetConstant(enc, MVT::i32); + return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i32); }]>> { let ParserMatchClass = FPImmOperand; let PrintMethod = "printFPImmOperand"; @@ -768,7 +768,7 @@ def simdimmtype10 : Operand<i32>, uint32_t enc = AArch64_AM::encodeAdvSIMDModImmType10(N->getValueAPF() .bitcastToAPInt() .getZExtValue()); - return CurDAG->getTargetConstant(enc, MVT::i32); + return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i32); }]>> { let ParserMatchClass = SIMDImmType10Operand; let PrintMethod = "printSIMDType10Operand"; @@ -1637,10 +1637,16 @@ multiclass AddSub<bit isSub, string mnemonic, SDPatternOperator OpNode = null_frag> { let hasSideEffects = 0, isReMaterializable = 1, isAsCheapAsAMove = 1 in { // Add/Subtract immediate + // Increase the weight of the immediate variant to try to match it before + // the extended register variant. + // We used to match the register variant before the immediate when the + // register argument could be implicitly zero-extended. + let AddedComplexity = 6 in def Wri : BaseAddSubImm<isSub, 0, GPR32sp, GPR32sp, addsub_shifted_imm32, mnemonic, OpNode> { let Inst{31} = 0; } + let AddedComplexity = 6 in def Xri : BaseAddSubImm<isSub, 0, GPR64sp, GPR64sp, addsub_shifted_imm64, mnemonic, OpNode> { let Inst{31} = 1; @@ -2186,7 +2192,8 @@ class BaseCondSelectOp<bit op, bits<2> op2, RegisterClass regtype, string asm, def inv_cond_XFORM : SDNodeXForm<imm, [{ AArch64CC::CondCode CC = static_cast<AArch64CC::CondCode>(N->getZExtValue()); - return CurDAG->getTargetConstant(AArch64CC::getInvertedCondCode(CC), MVT::i32); + return CurDAG->getTargetConstant(AArch64CC::getInvertedCondCode(CC), SDLoc(N), + MVT::i32); }]>; multiclass CondSelectOp<bit op, bits<2> op2, string asm, PatFrag frag> { @@ -3282,6 +3289,10 @@ class LoadStoreExclusiveSimple<bits<2> sz, bit o2, bit L, bit o1, bit o0, : BaseLoadStoreExclusive<sz, o2, L, o1, o0, oops, iops, asm, operands> { bits<5> Rt; bits<5> Rn; + let Inst{20-16} = 0b11111; + let Unpredictable{20-16} = 0b11111; + let Inst{14-10} = 0b11111; + let Unpredictable{14-10} = 0b11111; let Inst{9-5} = Rn; let Inst{4-0} = Rt; @@ -5298,6 +5309,27 @@ class BaseSIMDThreeScalar<bit U, bits<2> size, bits<5> opcode, let Inst{4-0} = Rd; } +let mayStore = 0, mayLoad = 0, hasSideEffects = 0 in +class BaseSIMDThreeScalarTied<bit U, bits<2> size, bit R, bits<5> opcode, + dag oops, dag iops, string asm, + list<dag> pattern> + : I<oops, iops, asm, "\t$Rd, $Rn, $Rm", "$Rd = $dst", pattern>, + Sched<[WriteV]> { + bits<5> Rd; + bits<5> Rn; + bits<5> Rm; + let Inst{31-30} = 0b01; + let Inst{29} = U; + let Inst{28-24} = 0b11110; + let Inst{23-22} = size; + let Inst{21} = R; + let Inst{20-16} = Rm; + let Inst{15-11} = opcode; + let Inst{10} = 1; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; +} + multiclass SIMDThreeScalarD<bit U, bits<5> opc, string asm, SDPatternOperator OpNode> { def v1i64 : BaseSIMDThreeScalar<U, 0b11, opc, FPR64, asm, @@ -5325,6 +5357,16 @@ multiclass SIMDThreeScalarHS<bit U, bits<5> opc, string asm, def v1i16 : BaseSIMDThreeScalar<U, 0b01, opc, FPR16, asm, []>; } +multiclass SIMDThreeScalarHSTied<bit U, bit R, bits<5> opc, string asm, + SDPatternOperator OpNode = null_frag> { + def v1i32: BaseSIMDThreeScalarTied<U, 0b10, R, opc, (outs FPR32:$dst), + (ins FPR32:$Rd, FPR32:$Rn, FPR32:$Rm), + asm, []>; + def v1i16: BaseSIMDThreeScalarTied<U, 0b01, R, opc, (outs FPR16:$dst), + (ins FPR16:$Rd, FPR16:$Rn, FPR16:$Rm), + asm, []>; +} + multiclass SIMDThreeScalarSD<bit U, bit S, bits<5> opc, string asm, SDPatternOperator OpNode = null_frag> { let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in { @@ -5885,7 +5927,7 @@ multiclass SIMDIns { let Inst{20-18} = idx; let Inst{17-16} = 0b10; let Inst{14-12} = idx2; - let Inst{11} = 0; + let Inst{11} = {?}; } def vi32lane : SIMDInsFromElement<".s", v4i32, i32, VectorIndexS> { bits<2> idx; @@ -5893,7 +5935,7 @@ multiclass SIMDIns { let Inst{20-19} = idx; let Inst{18-16} = 0b100; let Inst{14-13} = idx2; - let Inst{12-11} = 0; + let Inst{12-11} = {?,?}; } def vi64lane : SIMDInsFromElement<".d", v2i64, i64, VectorIndexD> { bits<1> idx; @@ -5901,7 +5943,7 @@ multiclass SIMDIns { let Inst{20} = idx; let Inst{19-16} = 0b1000; let Inst{14} = idx2; - let Inst{13-11} = 0; + let Inst{13-11} = {?,?,?}; } // For all forms of the INS instruction, the "mov" mnemonic is the @@ -8517,6 +8559,174 @@ multiclass SIMDLdSt4SingleAliases<string asm> { } // end of 'let Predicates = [HasNEON]' //---------------------------------------------------------------------------- +// AdvSIMD v8.1 Rounding Double Multiply Add/Subtract +//---------------------------------------------------------------------------- + +let Predicates = [HasNEON, HasV8_1a] in { + +class BaseSIMDThreeSameVectorTiedR0<bit Q, bit U, bits<2> size, bits<5> opcode, + RegisterOperand regtype, string asm, + string kind, list<dag> pattern> + : BaseSIMDThreeSameVectorTied<Q, U, size, opcode, regtype, asm, kind, + pattern> { + let Inst{21}=0; +} +multiclass SIMDThreeSameVectorSQRDMLxHTiedHS<bit U, bits<5> opc, string asm, + SDPatternOperator Accum> { + def v4i16 : BaseSIMDThreeSameVectorTiedR0<0, U, 0b01, opc, V64, asm, ".4h", + [(set (v4i16 V64:$dst), + (Accum (v4i16 V64:$Rd), + (v4i16 (int_aarch64_neon_sqrdmulh (v4i16 V64:$Rn), + (v4i16 V64:$Rm)))))]>; + def v8i16 : BaseSIMDThreeSameVectorTiedR0<1, U, 0b01, opc, V128, asm, ".8h", + [(set (v8i16 V128:$dst), + (Accum (v8i16 V128:$Rd), + (v8i16 (int_aarch64_neon_sqrdmulh (v8i16 V128:$Rn), + (v8i16 V128:$Rm)))))]>; + def v2i32 : BaseSIMDThreeSameVectorTiedR0<0, U, 0b10, opc, V64, asm, ".2s", + [(set (v2i32 V64:$dst), + (Accum (v2i32 V64:$Rd), + (v2i32 (int_aarch64_neon_sqrdmulh (v2i32 V64:$Rn), + (v2i32 V64:$Rm)))))]>; + def v4i32 : BaseSIMDThreeSameVectorTiedR0<1, U, 0b10, opc, V128, asm, ".4s", + [(set (v4i32 V128:$dst), + (Accum (v4i32 V128:$Rd), + (v4i32 (int_aarch64_neon_sqrdmulh (v4i32 V128:$Rn), + (v4i32 V128:$Rm)))))]>; +} + +multiclass SIMDIndexedSQRDMLxHSDTied<bit U, bits<4> opc, string asm, + SDPatternOperator Accum> { + def v4i16_indexed : BaseSIMDIndexedTied<0, U, 0, 0b01, opc, + V64, V64, V128_lo, VectorIndexH, + asm, ".4h", ".4h", ".4h", ".h", + [(set (v4i16 V64:$dst), + (Accum (v4i16 V64:$Rd), + (v4i16 (int_aarch64_neon_sqrdmulh + (v4i16 V64:$Rn), + (v4i16 (AArch64duplane16 (v8i16 V128_lo:$Rm), + VectorIndexH:$idx))))))]> { + bits<3> idx; + let Inst{11} = idx{2}; + let Inst{21} = idx{1}; + let Inst{20} = idx{0}; + } + + def v8i16_indexed : BaseSIMDIndexedTied<1, U, 0, 0b01, opc, + V128, V128, V128_lo, VectorIndexH, + asm, ".8h", ".8h", ".8h", ".h", + [(set (v8i16 V128:$dst), + (Accum (v8i16 V128:$Rd), + (v8i16 (int_aarch64_neon_sqrdmulh + (v8i16 V128:$Rn), + (v8i16 (AArch64duplane16 (v8i16 V128_lo:$Rm), + VectorIndexH:$idx))))))]> { + bits<3> idx; + let Inst{11} = idx{2}; + let Inst{21} = idx{1}; + let Inst{20} = idx{0}; + } + + def v2i32_indexed : BaseSIMDIndexedTied<0, U, 0, 0b10, opc, + V64, V64, V128, VectorIndexS, + asm, ".2s", ".2s", ".2s", ".s", + [(set (v2i32 V64:$dst), + (Accum (v2i32 V64:$Rd), + (v2i32 (int_aarch64_neon_sqrdmulh + (v2i32 V64:$Rn), + (v2i32 (AArch64duplane32 (v4i32 V128:$Rm), + VectorIndexS:$idx))))))]> { + bits<2> idx; + let Inst{11} = idx{1}; + let Inst{21} = idx{0}; + } + + // FIXME: it would be nice to use the scalar (v1i32) instruction here, but + // an intermediate EXTRACT_SUBREG would be untyped. + // FIXME: direct EXTRACT_SUBREG from v2i32 to i32 is illegal, that's why we + // got it lowered here as (i32 vector_extract (v4i32 insert_subvector(..))) + def : Pat<(i32 (Accum (i32 FPR32Op:$Rd), + (i32 (vector_extract + (v4i32 (insert_subvector + (undef), + (v2i32 (int_aarch64_neon_sqrdmulh + (v2i32 V64:$Rn), + (v2i32 (AArch64duplane32 + (v4i32 V128:$Rm), + VectorIndexS:$idx)))), + (i32 0))), + (i64 0))))), + (EXTRACT_SUBREG + (v2i32 (!cast<Instruction>(NAME # v2i32_indexed) + (v2i32 (INSERT_SUBREG (v2i32 (IMPLICIT_DEF)), + FPR32Op:$Rd, + ssub)), + V64:$Rn, + V128:$Rm, + VectorIndexS:$idx)), + ssub)>; + + def v4i32_indexed : BaseSIMDIndexedTied<1, U, 0, 0b10, opc, + V128, V128, V128, VectorIndexS, + asm, ".4s", ".4s", ".4s", ".s", + [(set (v4i32 V128:$dst), + (Accum (v4i32 V128:$Rd), + (v4i32 (int_aarch64_neon_sqrdmulh + (v4i32 V128:$Rn), + (v4i32 (AArch64duplane32 (v4i32 V128:$Rm), + VectorIndexS:$idx))))))]> { + bits<2> idx; + let Inst{11} = idx{1}; + let Inst{21} = idx{0}; + } + + // FIXME: it would be nice to use the scalar (v1i32) instruction here, but + // an intermediate EXTRACT_SUBREG would be untyped. + def : Pat<(i32 (Accum (i32 FPR32Op:$Rd), + (i32 (vector_extract + (v4i32 (int_aarch64_neon_sqrdmulh + (v4i32 V128:$Rn), + (v4i32 (AArch64duplane32 + (v4i32 V128:$Rm), + VectorIndexS:$idx)))), + (i64 0))))), + (EXTRACT_SUBREG + (v4i32 (!cast<Instruction>(NAME # v4i32_indexed) + (v4i32 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), + FPR32Op:$Rd, + ssub)), + V128:$Rn, + V128:$Rm, + VectorIndexS:$idx)), + ssub)>; + + def i16_indexed : BaseSIMDIndexedTied<1, U, 1, 0b01, opc, + FPR16Op, FPR16Op, V128_lo, + VectorIndexH, asm, ".h", "", "", ".h", + []> { + bits<3> idx; + let Inst{11} = idx{2}; + let Inst{21} = idx{1}; + let Inst{20} = idx{0}; + } + + def i32_indexed : BaseSIMDIndexedTied<1, U, 1, 0b10, opc, + FPR32Op, FPR32Op, V128, VectorIndexS, + asm, ".s", "", "", ".s", + [(set (i32 FPR32Op:$dst), + (Accum (i32 FPR32Op:$Rd), + (i32 (int_aarch64_neon_sqrdmulh + (i32 FPR32Op:$Rn), + (i32 (vector_extract (v4i32 V128:$Rm), + VectorIndexS:$idx))))))]> { + bits<2> idx; + let Inst{11} = idx{1}; + let Inst{21} = idx{0}; + } +} +} // let Predicates = [HasNeon, HasV8_1a] + +//---------------------------------------------------------------------------- // Crypto extensions //---------------------------------------------------------------------------- diff --git a/lib/Target/AArch64/AArch64InstrInfo.cpp b/lib/Target/AArch64/AArch64InstrInfo.cpp index e582ed4..207c34c 100644 --- a/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -31,7 +31,7 @@ using namespace llvm; AArch64InstrInfo::AArch64InstrInfo(const AArch64Subtarget &STI) : AArch64GenInstrInfo(AArch64::ADJCALLSTACKDOWN, AArch64::ADJCALLSTACKUP), - RI(this, &STI), Subtarget(STI) {} + RI(STI.getTargetTriple()), Subtarget(STI) {} /// GetInstSize - Return the number of bytes of code the specified /// instruction may be. This returns the maximum number of bytes. @@ -617,10 +617,8 @@ AArch64InstrInfo::areMemAccessesTriviallyDisjoint(MachineInstr *MIa, int OffsetA = 0, OffsetB = 0; int WidthA = 0, WidthB = 0; - assert(MIa && (MIa->mayLoad() || MIa->mayStore()) && - "MIa must be a store or a load"); - assert(MIb && (MIb->mayLoad() || MIb->mayStore()) && - "MIb must be a store or a load"); + assert(MIa && MIa->mayLoadOrStore() && "MIa must be a load or store."); + assert(MIb && MIb->mayLoadOrStore() && "MIb must be a load or store."); if (MIa->hasUnmodeledSideEffects() || MIb->hasUnmodeledSideEffects() || MIa->hasOrderedMemoryRef() || MIb->hasOrderedMemoryRef()) @@ -707,9 +705,8 @@ static bool UpdateOperandRegClass(MachineInstr *Instr) { assert(MBB && "Can't get MachineBasicBlock here"); MachineFunction *MF = MBB->getParent(); assert(MF && "Can't get MachineFunction here"); - const TargetMachine *TM = &MF->getTarget(); - const TargetInstrInfo *TII = TM->getSubtargetImpl()->getInstrInfo(); - const TargetRegisterInfo *TRI = TM->getSubtargetImpl()->getRegisterInfo(); + const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); + const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); MachineRegisterInfo *MRI = &MF->getRegInfo(); for (unsigned OpIdx = 0, EndIdx = Instr->getNumOperands(); OpIdx < EndIdx; @@ -1527,7 +1524,7 @@ void AArch64InstrInfo::copyPhysRegTuple( } for (; SubReg != End; SubReg += Incr) { - const MachineInstrBuilder &MIB = BuildMI(MBB, I, DL, get(Opcode)); + const MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opcode)); AddSubReg(MIB, DestReg, Indices[SubReg], RegState::Define, TRI); AddSubReg(MIB, SrcReg, Indices[SubReg], 0, TRI); AddSubReg(MIB, SrcReg, Indices[SubReg], getKillRegState(KillSrc), TRI); @@ -1905,7 +1902,7 @@ void AArch64InstrInfo::storeRegToStackSlot( } assert(Opc && "Unknown register class"); - const MachineInstrBuilder &MI = BuildMI(MBB, MBBI, DL, get(Opc)) + const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DL, get(Opc)) .addReg(SrcReg, getKillRegState(isKill)) .addFrameIndex(FI); @@ -2003,7 +2000,7 @@ void AArch64InstrInfo::loadRegFromStackSlot( } assert(Opc && "Unknown register class"); - const MachineInstrBuilder &MI = BuildMI(MBB, MBBI, DL, get(Opc)) + const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DL, get(Opc)) .addReg(DestReg, getDefRegState(true)) .addFrameIndex(FI); if (Offset) @@ -2069,10 +2066,10 @@ void llvm::emitFrameOffset(MachineBasicBlock &MBB, .setMIFlag(Flag); } -MachineInstr * -AArch64InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI, - const SmallVectorImpl<unsigned> &Ops, - int FrameIndex) const { +MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, + MachineInstr *MI, + ArrayRef<unsigned> Ops, + int FrameIndex) const { // This is a bit of a hack. Consider this instruction: // // %vreg0<def> = COPY %SP; GPR64all:%vreg0 @@ -2367,7 +2364,7 @@ bool llvm::rewriteAArch64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx, void AArch64InstrInfo::getNoopForMachoTarget(MCInst &NopInst) const { NopInst.setOpcode(AArch64::HINT); - NopInst.addOperand(MCOperand::CreateImm(0)); + NopInst.addOperand(MCOperand::createImm(0)); } /// useMachineCombiner - return true when a target supports MachineCombiner bool AArch64InstrInfo::useMachineCombiner() const { diff --git a/lib/Target/AArch64/AArch64InstrInfo.h b/lib/Target/AArch64/AArch64InstrInfo.h index d8f1274..fa4b8b7 100644 --- a/lib/Target/AArch64/AArch64InstrInfo.h +++ b/lib/Target/AArch64/AArch64InstrInfo.h @@ -129,10 +129,9 @@ public: const TargetRegisterInfo *TRI) const override; using TargetInstrInfo::foldMemoryOperandImpl; - MachineInstr * - foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI, - const SmallVectorImpl<unsigned> &Ops, - int FrameIndex) const override; + MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI, + ArrayRef<unsigned> Ops, + int FrameIndex) const override; bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, diff --git a/lib/Target/AArch64/AArch64InstrInfo.td b/lib/Target/AArch64/AArch64InstrInfo.td index a6f09e9..c7d6a69 100644 --- a/lib/Target/AArch64/AArch64InstrInfo.td +++ b/lib/Target/AArch64/AArch64InstrInfo.td @@ -14,6 +14,8 @@ //===----------------------------------------------------------------------===// // ARM Instruction Predicate Definitions. // +def HasV8_1a : Predicate<"Subtarget->hasV8_1aOps()">, + AssemblerPredicate<"HasV8_1aOps", "armv8.1a">; def HasFPARMv8 : Predicate<"Subtarget->hasFPARMv8()">, AssemblerPredicate<"FeatureFPARMv8", "fp-armv8">; def HasNEON : Predicate<"Subtarget->hasNEON()">, @@ -258,6 +260,13 @@ def SDT_AArch64mull : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>, def AArch64smull : SDNode<"AArch64ISD::SMULL", SDT_AArch64mull>; def AArch64umull : SDNode<"AArch64ISD::UMULL", SDT_AArch64mull>; +def AArch64saddv : SDNode<"AArch64ISD::SADDV", SDT_AArch64UnaryVec>; +def AArch64uaddv : SDNode<"AArch64ISD::UADDV", SDT_AArch64UnaryVec>; +def AArch64sminv : SDNode<"AArch64ISD::SMINV", SDT_AArch64UnaryVec>; +def AArch64uminv : SDNode<"AArch64ISD::UMINV", SDT_AArch64UnaryVec>; +def AArch64smaxv : SDNode<"AArch64ISD::SMAXV", SDT_AArch64UnaryVec>; +def AArch64umaxv : SDNode<"AArch64ISD::UMAXV", SDT_AArch64UnaryVec>; + //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// @@ -489,7 +498,7 @@ def i64imm_32bit : ImmLeaf<i64, [{ }]>; def trunc_imm : SDNodeXForm<imm, [{ - return CurDAG->getTargetConstant(N->getZExtValue(), MVT::i32); + return CurDAG->getTargetConstant(N->getZExtValue(), SDLoc(N), MVT::i32); }]>; def : Pat<(i64 i64imm_32bit:$src), @@ -498,12 +507,12 @@ def : Pat<(i64 i64imm_32bit:$src), // Materialize FP constants via MOVi32imm/MOVi64imm (MachO large code model). def bitcast_fpimm_to_i32 : SDNodeXForm<fpimm, [{ return CurDAG->getTargetConstant( - N->getValueAPF().bitcastToAPInt().getZExtValue(), MVT::i32); + N->getValueAPF().bitcastToAPInt().getZExtValue(), SDLoc(N), MVT::i32); }]>; def bitcast_fpimm_to_i64 : SDNodeXForm<fpimm, [{ return CurDAG->getTargetConstant( - N->getValueAPF().bitcastToAPInt().getZExtValue(), MVT::i64); + N->getValueAPF().bitcastToAPInt().getZExtValue(), SDLoc(N), MVT::i64); }]>; @@ -848,57 +857,57 @@ defm UBFM : BitfieldImm<0b10, "ubfm">; def i32shift_a : Operand<i64>, SDNodeXForm<imm, [{ uint64_t enc = (32 - N->getZExtValue()) & 0x1f; - return CurDAG->getTargetConstant(enc, MVT::i64); + return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64); }]>; def i32shift_b : Operand<i64>, SDNodeXForm<imm, [{ uint64_t enc = 31 - N->getZExtValue(); - return CurDAG->getTargetConstant(enc, MVT::i64); + return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64); }]>; // min(7, 31 - shift_amt) def i32shift_sext_i8 : Operand<i64>, SDNodeXForm<imm, [{ uint64_t enc = 31 - N->getZExtValue(); enc = enc > 7 ? 7 : enc; - return CurDAG->getTargetConstant(enc, MVT::i64); + return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64); }]>; // min(15, 31 - shift_amt) def i32shift_sext_i16 : Operand<i64>, SDNodeXForm<imm, [{ uint64_t enc = 31 - N->getZExtValue(); enc = enc > 15 ? 15 : enc; - return CurDAG->getTargetConstant(enc, MVT::i64); + return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64); }]>; def i64shift_a : Operand<i64>, SDNodeXForm<imm, [{ uint64_t enc = (64 - N->getZExtValue()) & 0x3f; - return CurDAG->getTargetConstant(enc, MVT::i64); + return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64); }]>; def i64shift_b : Operand<i64>, SDNodeXForm<imm, [{ uint64_t enc = 63 - N->getZExtValue(); - return CurDAG->getTargetConstant(enc, MVT::i64); + return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64); }]>; // min(7, 63 - shift_amt) def i64shift_sext_i8 : Operand<i64>, SDNodeXForm<imm, [{ uint64_t enc = 63 - N->getZExtValue(); enc = enc > 7 ? 7 : enc; - return CurDAG->getTargetConstant(enc, MVT::i64); + return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64); }]>; // min(15, 63 - shift_amt) def i64shift_sext_i16 : Operand<i64>, SDNodeXForm<imm, [{ uint64_t enc = 63 - N->getZExtValue(); enc = enc > 15 ? 15 : enc; - return CurDAG->getTargetConstant(enc, MVT::i64); + return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64); }]>; // min(31, 63 - shift_amt) def i64shift_sext_i32 : Operand<i64>, SDNodeXForm<imm, [{ uint64_t enc = 63 - N->getZExtValue(); enc = enc > 31 ? 31 : enc; - return CurDAG->getTargetConstant(enc, MVT::i64); + return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64); }]>; def : Pat<(shl GPR32:$Rn, (i64 imm0_31:$imm)), @@ -2305,6 +2314,20 @@ def STLXPX : StoreExclusivePair<0b11, 0, 0, 1, 1, GPR64, "stlxp">; def STXPW : StoreExclusivePair<0b10, 0, 0, 1, 0, GPR32, "stxp">; def STXPX : StoreExclusivePair<0b11, 0, 0, 1, 0, GPR64, "stxp">; +let Predicates = [HasV8_1a] in { + // v8.1a "Limited Order Region" extension load-acquire instructions + def LDLARW : LoadAcquire <0b10, 1, 1, 0, 0, GPR32, "ldlar">; + def LDLARX : LoadAcquire <0b11, 1, 1, 0, 0, GPR64, "ldlar">; + def LDLARB : LoadAcquire <0b00, 1, 1, 0, 0, GPR32, "ldlarb">; + def LDLARH : LoadAcquire <0b01, 1, 1, 0, 0, GPR32, "ldlarh">; + + // v8.1a "Limited Order Region" extension store-release instructions + def STLLRW : StoreRelease <0b10, 1, 0, 0, 0, GPR32, "stllr">; + def STLLRX : StoreRelease <0b11, 1, 0, 0, 0, GPR64, "stllr">; + def STLLRB : StoreRelease <0b00, 1, 0, 0, 0, GPR32, "stllrb">; + def STLLRH : StoreRelease <0b01, 1, 0, 0, 0, GPR32, "stllrh">; +} + //===----------------------------------------------------------------------===// // Scaled floating point to integer conversion instructions. //===----------------------------------------------------------------------===// @@ -2341,8 +2364,15 @@ defm UCVTF : IntegerToFP<1, "ucvtf", uint_to_fp>; defm FMOV : UnscaledConversion<"fmov">; -def : Pat<(f32 (fpimm0)), (FMOVWSr WZR)>, Requires<[NoZCZ]>; -def : Pat<(f64 (fpimm0)), (FMOVXDr XZR)>, Requires<[NoZCZ]>; +// Add pseudo ops for FMOV 0 so we can mark them as isReMaterializable +let isReMaterializable = 1, isCodeGenOnly = 1 in { +def FMOVS0 : Pseudo<(outs FPR32:$Rd), (ins), [(set f32:$Rd, (fpimm0))]>, + PseudoInstExpansion<(FMOVWSr FPR32:$Rd, WZR)>, + Requires<[NoZCZ]>; +def FMOVD0 : Pseudo<(outs FPR64:$Rd), (ins), [(set f64:$Rd, (fpimm0))]>, + PseudoInstExpansion<(FMOVXDr FPR64:$Rd, XZR)>, + Requires<[NoZCZ]>; +} //===----------------------------------------------------------------------===// // Floating point conversion instruction. @@ -2762,6 +2792,10 @@ defm UQSUB : SIMDThreeSameVector<1,0b00101,"uqsub", int_aarch64_neon_uqsub>; defm URHADD : SIMDThreeSameVectorBHS<1,0b00010,"urhadd", int_aarch64_neon_urhadd>; defm URSHL : SIMDThreeSameVector<1,0b01010,"urshl", int_aarch64_neon_urshl>; defm USHL : SIMDThreeSameVector<1,0b01000,"ushl", int_aarch64_neon_ushl>; +defm SQRDMLAH : SIMDThreeSameVectorSQRDMLxHTiedHS<1,0b10000,"sqrdmlah", + int_aarch64_neon_sqadd>; +defm SQRDMLSH : SIMDThreeSameVectorSQRDMLxHTiedHS<1,0b10001,"sqrdmlsh", + int_aarch64_neon_sqsub>; defm AND : SIMDLogicalThreeVector<0, 0b00, "and", and>; defm BIC : SIMDLogicalThreeVector<0, 0b01, "bic", @@ -2775,6 +2809,55 @@ defm ORN : SIMDLogicalThreeVector<0, 0b11, "orn", BinOpFrag<(or node:$LHS, (vnot node:$RHS))> >; defm ORR : SIMDLogicalThreeVector<0, 0b10, "orr", or>; +def : Pat<(v8i8 (smin V64:$Rn, V64:$Rm)), + (SMINv8i8 V64:$Rn, V64:$Rm)>; +def : Pat<(v4i16 (smin V64:$Rn, V64:$Rm)), + (SMINv4i16 V64:$Rn, V64:$Rm)>; +def : Pat<(v2i32 (smin V64:$Rn, V64:$Rm)), + (SMINv2i32 V64:$Rn, V64:$Rm)>; +def : Pat<(v16i8 (smin V128:$Rn, V128:$Rm)), + (SMINv16i8 V128:$Rn, V128:$Rm)>; +def : Pat<(v8i16 (smin V128:$Rn, V128:$Rm)), + (SMINv8i16 V128:$Rn, V128:$Rm)>; +def : Pat<(v4i32 (smin V128:$Rn, V128:$Rm)), + (SMINv4i32 V128:$Rn, V128:$Rm)>; +def : Pat<(v8i8 (smax V64:$Rn, V64:$Rm)), + (SMAXv8i8 V64:$Rn, V64:$Rm)>; +def : Pat<(v4i16 (smax V64:$Rn, V64:$Rm)), + (SMAXv4i16 V64:$Rn, V64:$Rm)>; +def : Pat<(v2i32 (smax V64:$Rn, V64:$Rm)), + (SMAXv2i32 V64:$Rn, V64:$Rm)>; +def : Pat<(v16i8 (smax V128:$Rn, V128:$Rm)), + (SMAXv16i8 V128:$Rn, V128:$Rm)>; +def : Pat<(v8i16 (smax V128:$Rn, V128:$Rm)), + (SMAXv8i16 V128:$Rn, V128:$Rm)>; +def : Pat<(v4i32 (smax V128:$Rn, V128:$Rm)), + (SMAXv4i32 V128:$Rn, V128:$Rm)>; +def : Pat<(v8i8 (umin V64:$Rn, V64:$Rm)), + (UMINv8i8 V64:$Rn, V64:$Rm)>; +def : Pat<(v4i16 (umin V64:$Rn, V64:$Rm)), + (UMINv4i16 V64:$Rn, V64:$Rm)>; +def : Pat<(v2i32 (umin V64:$Rn, V64:$Rm)), + (UMINv2i32 V64:$Rn, V64:$Rm)>; +def : Pat<(v16i8 (umin V128:$Rn, V128:$Rm)), + (UMINv16i8 V128:$Rn, V128:$Rm)>; +def : Pat<(v8i16 (umin V128:$Rn, V128:$Rm)), + (UMINv8i16 V128:$Rn, V128:$Rm)>; +def : Pat<(v4i32 (umin V128:$Rn, V128:$Rm)), + (UMINv4i32 V128:$Rn, V128:$Rm)>; +def : Pat<(v8i8 (umax V64:$Rn, V64:$Rm)), + (UMAXv8i8 V64:$Rn, V64:$Rm)>; +def : Pat<(v4i16 (umax V64:$Rn, V64:$Rm)), + (UMAXv4i16 V64:$Rn, V64:$Rm)>; +def : Pat<(v2i32 (umax V64:$Rn, V64:$Rm)), + (UMAXv2i32 V64:$Rn, V64:$Rm)>; +def : Pat<(v16i8 (umax V128:$Rn, V128:$Rm)), + (UMAXv16i8 V128:$Rn, V128:$Rm)>; +def : Pat<(v8i16 (umax V128:$Rn, V128:$Rm)), + (UMAXv8i16 V128:$Rn, V128:$Rm)>; +def : Pat<(v4i32 (umax V128:$Rn, V128:$Rm)), + (UMAXv4i32 V128:$Rn, V128:$Rm)>; + def : Pat<(AArch64bsl (v8i8 V64:$Rd), V64:$Rn, V64:$Rm), (BSLv8i8 V64:$Rd, V64:$Rn, V64:$Rm)>; def : Pat<(AArch64bsl (v4i16 V64:$Rd), V64:$Rn, V64:$Rm), @@ -2978,6 +3061,20 @@ defm UQSHL : SIMDThreeScalarBHSD<1, 0b01001, "uqshl", int_aarch64_neon_uqshl> defm UQSUB : SIMDThreeScalarBHSD<1, 0b00101, "uqsub", int_aarch64_neon_uqsub>; defm URSHL : SIMDThreeScalarD< 1, 0b01010, "urshl", int_aarch64_neon_urshl>; defm USHL : SIMDThreeScalarD< 1, 0b01000, "ushl", int_aarch64_neon_ushl>; +let Predicates = [HasV8_1a] in { + defm SQRDMLAH : SIMDThreeScalarHSTied<1, 0, 0b10000, "sqrdmlah">; + defm SQRDMLSH : SIMDThreeScalarHSTied<1, 0, 0b10001, "sqrdmlsh">; + def : Pat<(i32 (int_aarch64_neon_sqadd + (i32 FPR32:$Rd), + (i32 (int_aarch64_neon_sqrdmulh (i32 FPR32:$Rn), + (i32 FPR32:$Rm))))), + (SQRDMLAHv1i32 FPR32:$Rd, FPR32:$Rn, FPR32:$Rm)>; + def : Pat<(i32 (int_aarch64_neon_sqsub + (i32 FPR32:$Rd), + (i32 (int_aarch64_neon_sqrdmulh (i32 FPR32:$Rn), + (i32 FPR32:$Rm))))), + (SQRDMLSHv1i32 FPR32:$Rd, FPR32:$Rn, FPR32:$Rm)>; +} def : InstAlias<"cmls $dst, $src1, $src2", (CMHSv1i64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>; @@ -3431,10 +3528,10 @@ defm FMAXNMP : SIMDPairwiseScalarSD<1, 0, 0b01100, "fmaxnmp">; defm FMAXP : SIMDPairwiseScalarSD<1, 0, 0b01111, "fmaxp">; defm FMINNMP : SIMDPairwiseScalarSD<1, 1, 0b01100, "fminnmp">; defm FMINP : SIMDPairwiseScalarSD<1, 1, 0b01111, "fminp">; -def : Pat<(i64 (int_aarch64_neon_saddv (v2i64 V128:$Rn))), - (ADDPv2i64p V128:$Rn)>; -def : Pat<(i64 (int_aarch64_neon_uaddv (v2i64 V128:$Rn))), - (ADDPv2i64p V128:$Rn)>; +def : Pat<(v2i64 (AArch64saddv V128:$Rn)), + (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), (ADDPv2i64p V128:$Rn), dsub)>; +def : Pat<(v2i64 (AArch64uaddv V128:$Rn)), + (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), (ADDPv2i64p V128:$Rn), dsub)>; def : Pat<(f32 (int_aarch64_neon_faddv (v2f32 V64:$Rn))), (FADDPv2i32p V64:$Rn)>; def : Pat<(f32 (int_aarch64_neon_faddv (v4f32 V128:$Rn))), @@ -3462,13 +3559,13 @@ def : Pat<(f64 (int_aarch64_neon_fminv (v2f64 V128:$Rn))), // AdvSIMD INS/DUP instructions //---------------------------------------------------------------------------- -def DUPv8i8gpr : SIMDDupFromMain<0, 0b00001, ".8b", v8i8, V64, GPR32>; -def DUPv16i8gpr : SIMDDupFromMain<1, 0b00001, ".16b", v16i8, V128, GPR32>; -def DUPv4i16gpr : SIMDDupFromMain<0, 0b00010, ".4h", v4i16, V64, GPR32>; -def DUPv8i16gpr : SIMDDupFromMain<1, 0b00010, ".8h", v8i16, V128, GPR32>; -def DUPv2i32gpr : SIMDDupFromMain<0, 0b00100, ".2s", v2i32, V64, GPR32>; -def DUPv4i32gpr : SIMDDupFromMain<1, 0b00100, ".4s", v4i32, V128, GPR32>; -def DUPv2i64gpr : SIMDDupFromMain<1, 0b01000, ".2d", v2i64, V128, GPR64>; +def DUPv8i8gpr : SIMDDupFromMain<0, {?,?,?,?,1}, ".8b", v8i8, V64, GPR32>; +def DUPv16i8gpr : SIMDDupFromMain<1, {?,?,?,?,1}, ".16b", v16i8, V128, GPR32>; +def DUPv4i16gpr : SIMDDupFromMain<0, {?,?,?,1,0}, ".4h", v4i16, V64, GPR32>; +def DUPv8i16gpr : SIMDDupFromMain<1, {?,?,?,1,0}, ".8h", v8i16, V128, GPR32>; +def DUPv2i32gpr : SIMDDupFromMain<0, {?,?,1,0,0}, ".2s", v2i32, V64, GPR32>; +def DUPv4i32gpr : SIMDDupFromMain<1, {?,?,1,0,0}, ".4s", v4i32, V128, GPR32>; +def DUPv2i64gpr : SIMDDupFromMain<1, {?,1,0,0,0}, ".2d", v2i64, V128, GPR64>; def DUPv2i64lane : SIMDDup64FromElement; def DUPv2i32lane : SIMDDup32FromElement<0, ".2s", v2i32, V64>; @@ -3515,13 +3612,13 @@ def : Pat<(v2f64 (AArch64duplane64 (v2f64 V128:$Rn), VectorIndexD:$imm)), // instruction even if the types don't match: we just have to remap the lane // carefully. N.b. this trick only applies to truncations. def VecIndex_x2 : SDNodeXForm<imm, [{ - return CurDAG->getTargetConstant(2 * N->getZExtValue(), MVT::i64); + return CurDAG->getTargetConstant(2 * N->getZExtValue(), SDLoc(N), MVT::i64); }]>; def VecIndex_x4 : SDNodeXForm<imm, [{ - return CurDAG->getTargetConstant(4 * N->getZExtValue(), MVT::i64); + return CurDAG->getTargetConstant(4 * N->getZExtValue(), SDLoc(N), MVT::i64); }]>; def VecIndex_x8 : SDNodeXForm<imm, [{ - return CurDAG->getTargetConstant(8 * N->getZExtValue(), MVT::i64); + return CurDAG->getTargetConstant(8 * N->getZExtValue(), SDLoc(N), MVT::i64); }]>; multiclass DUPWithTruncPats<ValueType ResVT, ValueType Src64VT, @@ -3724,36 +3821,24 @@ multiclass Neon_INS_elt_pattern<ValueType VT128, ValueType VT64, defm : Neon_INS_elt_pattern<v8f16, v4f16, f16, INSvi16lane>; defm : Neon_INS_elt_pattern<v4f32, v2f32, f32, INSvi32lane>; defm : Neon_INS_elt_pattern<v2f64, v1f64, f64, INSvi64lane>; -defm : Neon_INS_elt_pattern<v16i8, v8i8, i32, INSvi8lane>; -defm : Neon_INS_elt_pattern<v8i16, v4i16, i32, INSvi16lane>; -defm : Neon_INS_elt_pattern<v4i32, v2i32, i32, INSvi32lane>; -defm : Neon_INS_elt_pattern<v2i64, v1i64, i64, INSvi32lane>; // Floating point vector extractions are codegen'd as either a sequence of -// subregister extractions, possibly fed by an INS if the lane number is -// anything other than zero. +// subregister extractions, or a MOV (aka CPY here, alias for DUP) if +// the lane number is anything other than zero. def : Pat<(vector_extract (v2f64 V128:$Rn), 0), (f64 (EXTRACT_SUBREG V128:$Rn, dsub))>; def : Pat<(vector_extract (v4f32 V128:$Rn), 0), (f32 (EXTRACT_SUBREG V128:$Rn, ssub))>; def : Pat<(vector_extract (v8f16 V128:$Rn), 0), (f16 (EXTRACT_SUBREG V128:$Rn, hsub))>; + def : Pat<(vector_extract (v2f64 V128:$Rn), VectorIndexD:$idx), - (f64 (EXTRACT_SUBREG - (INSvi64lane (v2f64 (IMPLICIT_DEF)), 0, - V128:$Rn, VectorIndexD:$idx), - dsub))>; + (f64 (CPYi64 V128:$Rn, VectorIndexD:$idx))>; def : Pat<(vector_extract (v4f32 V128:$Rn), VectorIndexS:$idx), - (f32 (EXTRACT_SUBREG - (INSvi32lane (v4f32 (IMPLICIT_DEF)), 0, - V128:$Rn, VectorIndexS:$idx), - ssub))>; + (f32 (CPYi32 V128:$Rn, VectorIndexS:$idx))>; def : Pat<(vector_extract (v8f16 V128:$Rn), VectorIndexH:$idx), - (f16 (EXTRACT_SUBREG - (INSvi16lane (v8f16 (IMPLICIT_DEF)), 0, - V128:$Rn, VectorIndexH:$idx), - hsub))>; + (f16 (CPYi16 V128:$Rn, VectorIndexH:$idx))>; // All concat_vectors operations are canonicalised to act on i64 vectors for // AArch64. In the general case we need an instruction, which had just as well be @@ -3799,121 +3884,143 @@ defm FMAXV : SIMDAcrossLanesS<0b01111, 0, "fmaxv", int_aarch64_neon_fmaxv>; defm FMINNMV : SIMDAcrossLanesS<0b01100, 1, "fminnmv", int_aarch64_neon_fminnmv>; defm FMINV : SIMDAcrossLanesS<0b01111, 1, "fminv", int_aarch64_neon_fminv>; -multiclass SIMDAcrossLanesSignedIntrinsic<string baseOpc, Intrinsic intOp> { -// If there is a sign extension after this intrinsic, consume it as smov already -// performed it - def : Pat<(i32 (sext_inreg (i32 (intOp (v8i8 V64:$Rn))), i8)), - (i32 (SMOVvi8to32 +// Patterns for across-vector intrinsics, that have a node equivalent, that +// returns a vector (with only the low lane defined) instead of a scalar. +// In effect, opNode is the same as (scalar_to_vector (IntNode)). +multiclass SIMDAcrossLanesIntrinsic<string baseOpc, + SDPatternOperator opNode> { +// If a lane instruction caught the vector_extract around opNode, we can +// directly match the latter to the instruction. +def : Pat<(v8i8 (opNode V64:$Rn)), + (INSERT_SUBREG (v8i8 (IMPLICIT_DEF)), + (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub)>; +def : Pat<(v16i8 (opNode V128:$Rn)), (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), - (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub), - (i64 0)))>; - def : Pat<(i32 (intOp (v8i8 V64:$Rn))), - (i32 (SMOVvi8to32 - (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), - (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub), - (i64 0)))>; -// If there is a sign extension after this intrinsic, consume it as smov already -// performed it -def : Pat<(i32 (sext_inreg (i32 (intOp (v16i8 V128:$Rn))), i8)), - (i32 (SMOVvi8to32 - (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), - (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub), - (i64 0)))>; -def : Pat<(i32 (intOp (v16i8 V128:$Rn))), - (i32 (SMOVvi8to32 - (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), - (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub), - (i64 0)))>; + (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub)>; +def : Pat<(v4i16 (opNode V64:$Rn)), + (INSERT_SUBREG (v4i16 (IMPLICIT_DEF)), + (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), hsub)>; +def : Pat<(v8i16 (opNode V128:$Rn)), + (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), + (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), hsub)>; +def : Pat<(v4i32 (opNode V128:$Rn)), + (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), + (!cast<Instruction>(!strconcat(baseOpc, "v4i32v")) V128:$Rn), ssub)>; + + +// If none did, fallback to the explicit patterns, consuming the vector_extract. +def : Pat<(i32 (vector_extract (insert_subvector undef, (v8i8 (opNode V64:$Rn)), + (i32 0)), (i64 0))), + (EXTRACT_SUBREG (INSERT_SUBREG (v8i8 (IMPLICIT_DEF)), + (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), + bsub), ssub)>; +def : Pat<(i32 (vector_extract (v16i8 (opNode V128:$Rn)), (i64 0))), + (EXTRACT_SUBREG (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), + (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), + bsub), ssub)>; +def : Pat<(i32 (vector_extract (insert_subvector undef, + (v4i16 (opNode V64:$Rn)), (i32 0)), (i64 0))), + (EXTRACT_SUBREG (INSERT_SUBREG (v4i16 (IMPLICIT_DEF)), + (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), + hsub), ssub)>; +def : Pat<(i32 (vector_extract (v8i16 (opNode V128:$Rn)), (i64 0))), + (EXTRACT_SUBREG (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), + (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), + hsub), ssub)>; +def : Pat<(i32 (vector_extract (v4i32 (opNode V128:$Rn)), (i64 0))), + (EXTRACT_SUBREG (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), + (!cast<Instruction>(!strconcat(baseOpc, "v4i32v")) V128:$Rn), + ssub), ssub)>; + +} + +multiclass SIMDAcrossLanesSignedIntrinsic<string baseOpc, + SDPatternOperator opNode> + : SIMDAcrossLanesIntrinsic<baseOpc, opNode> { // If there is a sign extension after this intrinsic, consume it as smov already // performed it -def : Pat<(i32 (sext_inreg (i32 (intOp (v4i16 V64:$Rn))), i16)), +def : Pat<(i32 (sext_inreg (i32 (vector_extract (insert_subvector undef, + (opNode (v8i8 V64:$Rn)), (i32 0)), (i64 0))), i8)), + (i32 (SMOVvi8to32 + (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), + (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub), + (i64 0)))>; +def : Pat<(i32 (sext_inreg (i32 (vector_extract + (opNode (v16i8 V128:$Rn)), (i64 0))), i8)), + (i32 (SMOVvi8to32 + (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), + (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub), + (i64 0)))>; +def : Pat<(i32 (sext_inreg (i32 (vector_extract (insert_subvector undef, + (opNode (v4i16 V64:$Rn)), (i32 0)), (i64 0))), i16)), (i32 (SMOVvi16to32 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), hsub), (i64 0)))>; -def : Pat<(i32 (intOp (v4i16 V64:$Rn))), +def : Pat<(i32 (sext_inreg (i32 (vector_extract + (opNode (v8i16 V128:$Rn)), (i64 0))), i16)), (i32 (SMOVvi16to32 - (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), - (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), hsub), - (i64 0)))>; -// If there is a sign extension after this intrinsic, consume it as smov already -// performed it -def : Pat<(i32 (sext_inreg (i32 (intOp (v8i16 V128:$Rn))), i16)), - (i32 (SMOVvi16to32 - (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), - (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), hsub), - (i64 0)))>; -def : Pat<(i32 (intOp (v8i16 V128:$Rn))), - (i32 (SMOVvi16to32 - (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), - (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), hsub), - (i64 0)))>; - -def : Pat<(i32 (intOp (v4i32 V128:$Rn))), - (i32 (EXTRACT_SUBREG - (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), - (!cast<Instruction>(!strconcat(baseOpc, "v4i32v")) V128:$Rn), ssub), - ssub))>; + (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), + (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), hsub), + (i64 0)))>; } -multiclass SIMDAcrossLanesUnsignedIntrinsic<string baseOpc, Intrinsic intOp> { +multiclass SIMDAcrossLanesUnsignedIntrinsic<string baseOpc, + SDPatternOperator opNode> + : SIMDAcrossLanesIntrinsic<baseOpc, opNode> { // If there is a masking operation keeping only what has been actually // generated, consume it. - def : Pat<(i32 (and (i32 (intOp (v8i8 V64:$Rn))), maski8_or_more)), - (i32 (EXTRACT_SUBREG - (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), - (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub), - ssub))>; - def : Pat<(i32 (intOp (v8i8 V64:$Rn))), - (i32 (EXTRACT_SUBREG - (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), - (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub), - ssub))>; -// If there is a masking operation keeping only what has been actually -// generated, consume it. -def : Pat<(i32 (and (i32 (intOp (v16i8 V128:$Rn))), maski8_or_more)), - (i32 (EXTRACT_SUBREG - (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), - (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub), - ssub))>; -def : Pat<(i32 (intOp (v16i8 V128:$Rn))), +def : Pat<(i32 (and (i32 (vector_extract (insert_subvector undef, + (opNode (v8i8 V64:$Rn)), (i32 0)), (i64 0))), maski8_or_more)), + (i32 (EXTRACT_SUBREG + (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), + (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub), + ssub))>; +def : Pat<(i32 (and (i32 (vector_extract (opNode (v16i8 V128:$Rn)), (i64 0))), + maski8_or_more)), (i32 (EXTRACT_SUBREG (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub), ssub))>; - -// If there is a masking operation keeping only what has been actually -// generated, consume it. -def : Pat<(i32 (and (i32 (intOp (v4i16 V64:$Rn))), maski16_or_more)), - (i32 (EXTRACT_SUBREG - (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), - (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), hsub), - ssub))>; -def : Pat<(i32 (intOp (v4i16 V64:$Rn))), +def : Pat<(i32 (and (i32 (vector_extract (insert_subvector undef, + (opNode (v4i16 V64:$Rn)), (i32 0)), (i64 0))), maski16_or_more)), (i32 (EXTRACT_SUBREG (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), hsub), ssub))>; -// If there is a masking operation keeping only what has been actually -// generated, consume it. -def : Pat<(i32 (and (i32 (intOp (v8i16 V128:$Rn))), maski16_or_more)), - (i32 (EXTRACT_SUBREG - (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), - (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), hsub), - ssub))>; -def : Pat<(i32 (intOp (v8i16 V128:$Rn))), +def : Pat<(i32 (and (i32 (vector_extract (opNode (v8i16 V128:$Rn)), (i64 0))), + maski16_or_more)), (i32 (EXTRACT_SUBREG (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), hsub), ssub))>; +} -def : Pat<(i32 (intOp (v4i32 V128:$Rn))), - (i32 (EXTRACT_SUBREG - (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), - (!cast<Instruction>(!strconcat(baseOpc, "v4i32v")) V128:$Rn), ssub), - ssub))>; +defm : SIMDAcrossLanesSignedIntrinsic<"ADDV", AArch64saddv>; +// vaddv_[su]32 is special; -> ADDP Vd.2S,Vn.2S,Vm.2S; return Vd.s[0];Vn==Vm +def : Pat<(v2i32 (AArch64saddv (v2i32 V64:$Rn))), + (ADDPv2i32 V64:$Rn, V64:$Rn)>; -} +defm : SIMDAcrossLanesUnsignedIntrinsic<"ADDV", AArch64uaddv>; +// vaddv_[su]32 is special; -> ADDP Vd.2S,Vn.2S,Vm.2S; return Vd.s[0];Vn==Vm +def : Pat<(v2i32 (AArch64uaddv (v2i32 V64:$Rn))), + (ADDPv2i32 V64:$Rn, V64:$Rn)>; + +defm : SIMDAcrossLanesSignedIntrinsic<"SMAXV", AArch64smaxv>; +def : Pat<(v2i32 (AArch64smaxv (v2i32 V64:$Rn))), + (SMAXPv2i32 V64:$Rn, V64:$Rn)>; + +defm : SIMDAcrossLanesSignedIntrinsic<"SMINV", AArch64sminv>; +def : Pat<(v2i32 (AArch64sminv (v2i32 V64:$Rn))), + (SMINPv2i32 V64:$Rn, V64:$Rn)>; + +defm : SIMDAcrossLanesUnsignedIntrinsic<"UMAXV", AArch64umaxv>; +def : Pat<(v2i32 (AArch64umaxv (v2i32 V64:$Rn))), + (UMAXPv2i32 V64:$Rn, V64:$Rn)>; + +defm : SIMDAcrossLanesUnsignedIntrinsic<"UMINV", AArch64uminv>; +def : Pat<(v2i32 (AArch64uminv (v2i32 V64:$Rn))), + (UMINPv2i32 V64:$Rn, V64:$Rn)>; multiclass SIMDAcrossLanesSignedLongIntrinsic<string baseOpc, Intrinsic intOp> { def : Pat<(i32 (intOp (v8i8 V64:$Rn))), @@ -3976,32 +4083,6 @@ def : Pat<(i64 (intOp (v4i32 V128:$Rn))), dsub))>; } -defm : SIMDAcrossLanesSignedIntrinsic<"ADDV", int_aarch64_neon_saddv>; -// vaddv_[su]32 is special; -> ADDP Vd.2S,Vn.2S,Vm.2S; return Vd.s[0];Vn==Vm -def : Pat<(i32 (int_aarch64_neon_saddv (v2i32 V64:$Rn))), - (EXTRACT_SUBREG (ADDPv2i32 V64:$Rn, V64:$Rn), ssub)>; - -defm : SIMDAcrossLanesUnsignedIntrinsic<"ADDV", int_aarch64_neon_uaddv>; -// vaddv_[su]32 is special; -> ADDP Vd.2S,Vn.2S,Vm.2S; return Vd.s[0];Vn==Vm -def : Pat<(i32 (int_aarch64_neon_uaddv (v2i32 V64:$Rn))), - (EXTRACT_SUBREG (ADDPv2i32 V64:$Rn, V64:$Rn), ssub)>; - -defm : SIMDAcrossLanesSignedIntrinsic<"SMAXV", int_aarch64_neon_smaxv>; -def : Pat<(i32 (int_aarch64_neon_smaxv (v2i32 V64:$Rn))), - (EXTRACT_SUBREG (SMAXPv2i32 V64:$Rn, V64:$Rn), ssub)>; - -defm : SIMDAcrossLanesSignedIntrinsic<"SMINV", int_aarch64_neon_sminv>; -def : Pat<(i32 (int_aarch64_neon_sminv (v2i32 V64:$Rn))), - (EXTRACT_SUBREG (SMINPv2i32 V64:$Rn, V64:$Rn), ssub)>; - -defm : SIMDAcrossLanesUnsignedIntrinsic<"UMAXV", int_aarch64_neon_umaxv>; -def : Pat<(i32 (int_aarch64_neon_umaxv (v2i32 V64:$Rn))), - (EXTRACT_SUBREG (UMAXPv2i32 V64:$Rn, V64:$Rn), ssub)>; - -defm : SIMDAcrossLanesUnsignedIntrinsic<"UMINV", int_aarch64_neon_uminv>; -def : Pat<(i32 (int_aarch64_neon_uminv (v2i32 V64:$Rn))), - (EXTRACT_SUBREG (UMINPv2i32 V64:$Rn, V64:$Rn), ssub)>; - defm : SIMDAcrossLanesSignedLongIntrinsic<"SADDLV", int_aarch64_neon_saddlv>; defm : SIMDAcrossLanesUnsignedLongIntrinsic<"UADDLV", int_aarch64_neon_uaddlv>; @@ -4324,6 +4405,10 @@ defm SQDMLAL : SIMDIndexedLongSQDMLXSDTied<0, 0b0011, "sqdmlal", int_aarch64_neon_sqadd>; defm SQDMLSL : SIMDIndexedLongSQDMLXSDTied<0, 0b0111, "sqdmlsl", int_aarch64_neon_sqsub>; +defm SQRDMLAH : SIMDIndexedSQRDMLxHSDTied<1, 0b1101, "sqrdmlah", + int_aarch64_neon_sqadd>; +defm SQRDMLSH : SIMDIndexedSQRDMLxHSDTied<1, 0b1111, "sqrdmlsh", + int_aarch64_neon_sqsub>; defm SQDMULL : SIMDIndexedLongSD<0, 0b1011, "sqdmull", int_aarch64_neon_sqdmull>; defm UMLAL : SIMDVectorIndexedLongSDTied<1, 0b0010, "umlal", TriOpFrag<(add node:$LHS, (int_aarch64_neon_umull node:$MHS, node:$RHS))>>; @@ -5092,22 +5177,26 @@ def : Pat<(trap), (BRK 1)>; // Natural vector casts (64 bit) def : Pat<(v8i8 (AArch64NvCast (v2i32 FPR64:$src))), (v8i8 FPR64:$src)>; def : Pat<(v4i16 (AArch64NvCast (v2i32 FPR64:$src))), (v4i16 FPR64:$src)>; +def : Pat<(v4f16 (AArch64NvCast (v2i32 FPR64:$src))), (v4f16 FPR64:$src)>; def : Pat<(v2i32 (AArch64NvCast (v2i32 FPR64:$src))), (v2i32 FPR64:$src)>; def : Pat<(v2f32 (AArch64NvCast (v2i32 FPR64:$src))), (v2f32 FPR64:$src)>; def : Pat<(v1i64 (AArch64NvCast (v2i32 FPR64:$src))), (v1i64 FPR64:$src)>; def : Pat<(v8i8 (AArch64NvCast (v4i16 FPR64:$src))), (v8i8 FPR64:$src)>; def : Pat<(v4i16 (AArch64NvCast (v4i16 FPR64:$src))), (v4i16 FPR64:$src)>; +def : Pat<(v4f16 (AArch64NvCast (v4i16 FPR64:$src))), (v4f16 FPR64:$src)>; def : Pat<(v2i32 (AArch64NvCast (v4i16 FPR64:$src))), (v2i32 FPR64:$src)>; def : Pat<(v1i64 (AArch64NvCast (v4i16 FPR64:$src))), (v1i64 FPR64:$src)>; def : Pat<(v8i8 (AArch64NvCast (v8i8 FPR64:$src))), (v8i8 FPR64:$src)>; def : Pat<(v4i16 (AArch64NvCast (v8i8 FPR64:$src))), (v4i16 FPR64:$src)>; +def : Pat<(v4f16 (AArch64NvCast (v8i8 FPR64:$src))), (v4f16 FPR64:$src)>; def : Pat<(v2i32 (AArch64NvCast (v8i8 FPR64:$src))), (v2i32 FPR64:$src)>; def : Pat<(v1i64 (AArch64NvCast (v8i8 FPR64:$src))), (v1i64 FPR64:$src)>; def : Pat<(v8i8 (AArch64NvCast (f64 FPR64:$src))), (v8i8 FPR64:$src)>; def : Pat<(v4i16 (AArch64NvCast (f64 FPR64:$src))), (v4i16 FPR64:$src)>; +def : Pat<(v4f16 (AArch64NvCast (f64 FPR64:$src))), (v4f16 FPR64:$src)>; def : Pat<(v2i32 (AArch64NvCast (f64 FPR64:$src))), (v2i32 FPR64:$src)>; def : Pat<(v2f32 (AArch64NvCast (f64 FPR64:$src))), (v2f32 FPR64:$src)>; def : Pat<(v1i64 (AArch64NvCast (f64 FPR64:$src))), (v1i64 FPR64:$src)>; @@ -5122,22 +5211,26 @@ def : Pat<(v1i64 (AArch64NvCast (v2f32 FPR64:$src))), (v1i64 FPR64:$src)>; // Natural vector casts (128 bit) def : Pat<(v16i8 (AArch64NvCast (v4i32 FPR128:$src))), (v16i8 FPR128:$src)>; def : Pat<(v8i16 (AArch64NvCast (v4i32 FPR128:$src))), (v8i16 FPR128:$src)>; +def : Pat<(v8f16 (AArch64NvCast (v4i32 FPR128:$src))), (v8f16 FPR128:$src)>; def : Pat<(v4i32 (AArch64NvCast (v4i32 FPR128:$src))), (v4i32 FPR128:$src)>; def : Pat<(v4f32 (AArch64NvCast (v4i32 FPR128:$src))), (v4f32 FPR128:$src)>; def : Pat<(v2i64 (AArch64NvCast (v4i32 FPR128:$src))), (v2i64 FPR128:$src)>; def : Pat<(v16i8 (AArch64NvCast (v8i16 FPR128:$src))), (v16i8 FPR128:$src)>; def : Pat<(v8i16 (AArch64NvCast (v8i16 FPR128:$src))), (v8i16 FPR128:$src)>; +def : Pat<(v8f16 (AArch64NvCast (v8i16 FPR128:$src))), (v8f16 FPR128:$src)>; def : Pat<(v4i32 (AArch64NvCast (v8i16 FPR128:$src))), (v4i32 FPR128:$src)>; def : Pat<(v2i64 (AArch64NvCast (v8i16 FPR128:$src))), (v2i64 FPR128:$src)>; def : Pat<(v16i8 (AArch64NvCast (v16i8 FPR128:$src))), (v16i8 FPR128:$src)>; def : Pat<(v8i16 (AArch64NvCast (v16i8 FPR128:$src))), (v8i16 FPR128:$src)>; +def : Pat<(v8f16 (AArch64NvCast (v16i8 FPR128:$src))), (v8f16 FPR128:$src)>; def : Pat<(v4i32 (AArch64NvCast (v16i8 FPR128:$src))), (v4i32 FPR128:$src)>; def : Pat<(v2i64 (AArch64NvCast (v16i8 FPR128:$src))), (v2i64 FPR128:$src)>; def : Pat<(v16i8 (AArch64NvCast (v2i64 FPR128:$src))), (v16i8 FPR128:$src)>; def : Pat<(v8i16 (AArch64NvCast (v2i64 FPR128:$src))), (v8i16 FPR128:$src)>; +def : Pat<(v8f16 (AArch64NvCast (v2i64 FPR128:$src))), (v8f16 FPR128:$src)>; def : Pat<(v4i32 (AArch64NvCast (v2i64 FPR128:$src))), (v4i32 FPR128:$src)>; def : Pat<(v2i64 (AArch64NvCast (v2i64 FPR128:$src))), (v2i64 FPR128:$src)>; def : Pat<(v4f32 (AArch64NvCast (v2i64 FPR128:$src))), (v4f32 FPR128:$src)>; diff --git a/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp b/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp index 8157981..186e71a 100644 --- a/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp +++ b/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp @@ -16,6 +16,7 @@ #include "AArch64Subtarget.h" #include "MCTargetDesc/AArch64AddressingModes.h" #include "llvm/ADT/BitVector.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunctionPass.h" @@ -63,16 +64,24 @@ struct AArch64LoadStoreOpt : public MachineFunctionPass { // If a matching instruction is found, MergeForward is set to true if the // merge is to remove the first instruction and replace the second with // a pair-wise insn, and false if the reverse is true. + // \p SExtIdx[out] gives the index of the result of the load pair that + // must be extended. The value of SExtIdx assumes that the paired load + // produces the value in this order: (I, returned iterator), i.e., + // -1 means no value has to be extended, 0 means I, and 1 means the + // returned iterator. MachineBasicBlock::iterator findMatchingInsn(MachineBasicBlock::iterator I, - bool &MergeForward, + bool &MergeForward, int &SExtIdx, unsigned Limit); // Merge the two instructions indicated into a single pair-wise instruction. // If MergeForward is true, erase the first instruction and fold its // operation into the second. If false, the reverse. Return the instruction // following the first instruction (which may change during processing). + // \p SExtIdx index of the result that must be extended for a paired load. + // -1 means none, 0 means I, and 1 means Paired. MachineBasicBlock::iterator mergePairedInsns(MachineBasicBlock::iterator I, - MachineBasicBlock::iterator Paired, bool MergeForward); + MachineBasicBlock::iterator Paired, bool MergeForward, + int SExtIdx); // Scan the instruction list to find a base register update that can // be combined with the current instruction (a load or store) using @@ -135,6 +144,8 @@ static bool isUnscaledLdst(unsigned Opc) { return true; case AArch64::LDURXi: return true; + case AArch64::LDURSWi: + return true; } } @@ -173,6 +184,46 @@ int AArch64LoadStoreOpt::getMemSize(MachineInstr *MemMI) { case AArch64::LDRXui: case AArch64::LDURXi: return 8; + case AArch64::LDRSWui: + case AArch64::LDURSWi: + return 4; + } +} + +static unsigned getMatchingNonSExtOpcode(unsigned Opc, + bool *IsValidLdStrOpc = nullptr) { + if (IsValidLdStrOpc) + *IsValidLdStrOpc = true; + switch (Opc) { + default: + if (IsValidLdStrOpc) + *IsValidLdStrOpc = false; + return UINT_MAX; + case AArch64::STRDui: + case AArch64::STURDi: + case AArch64::STRQui: + case AArch64::STURQi: + case AArch64::STRWui: + case AArch64::STURWi: + case AArch64::STRXui: + case AArch64::STURXi: + case AArch64::LDRDui: + case AArch64::LDURDi: + case AArch64::LDRQui: + case AArch64::LDURQi: + case AArch64::LDRWui: + case AArch64::LDURWi: + case AArch64::LDRXui: + case AArch64::LDURXi: + case AArch64::STRSui: + case AArch64::STURSi: + case AArch64::LDRSui: + case AArch64::LDURSi: + return Opc; + case AArch64::LDRSWui: + return AArch64::LDRWui; + case AArch64::LDURSWi: + return AArch64::LDURWi; } } @@ -210,6 +261,9 @@ static unsigned getMatchingPairOpcode(unsigned Opc) { case AArch64::LDRXui: case AArch64::LDURXi: return AArch64::LDPXi; + case AArch64::LDRSWui: + case AArch64::LDURSWi: + return AArch64::LDPSWi; } } @@ -237,6 +291,8 @@ static unsigned getPreIndexedOpcode(unsigned Opc) { return AArch64::LDRWpre; case AArch64::LDRXui: return AArch64::LDRXpre; + case AArch64::LDRSWui: + return AArch64::LDRSWpre; } } @@ -264,13 +320,15 @@ static unsigned getPostIndexedOpcode(unsigned Opc) { return AArch64::LDRWpost; case AArch64::LDRXui: return AArch64::LDRXpost; + case AArch64::LDRSWui: + return AArch64::LDRSWpost; } } MachineBasicBlock::iterator AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I, MachineBasicBlock::iterator Paired, - bool MergeForward) { + bool MergeForward, int SExtIdx) { MachineBasicBlock::iterator NextI = I; ++NextI; // If NextI is the second of the two instructions to be merged, we need @@ -280,11 +338,13 @@ AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I, if (NextI == Paired) ++NextI; - bool IsUnscaled = isUnscaledLdst(I->getOpcode()); + unsigned Opc = + SExtIdx == -1 ? I->getOpcode() : getMatchingNonSExtOpcode(I->getOpcode()); + bool IsUnscaled = isUnscaledLdst(Opc); int OffsetStride = IsUnscaled && EnableAArch64UnscaledMemOp ? getMemSize(I) : 1; - unsigned NewOpc = getMatchingPairOpcode(I->getOpcode()); + unsigned NewOpc = getMatchingPairOpcode(Opc); // Insert our new paired instruction after whichever of the paired // instructions MergeForward indicates. MachineBasicBlock::iterator InsertionPoint = MergeForward ? Paired : I; @@ -299,6 +359,11 @@ AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I, Paired->getOperand(2).getImm() + OffsetStride) { RtMI = Paired; Rt2MI = I; + // Here we swapped the assumption made for SExtIdx. + // I.e., we turn ldp I, Paired into ldp Paired, I. + // Update the index accordingly. + if (SExtIdx != -1) + SExtIdx = (SExtIdx + 1) % 2; } else { RtMI = I; Rt2MI = Paired; @@ -325,8 +390,47 @@ AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I, DEBUG(dbgs() << " "); DEBUG(Paired->print(dbgs())); DEBUG(dbgs() << " with instruction:\n "); - DEBUG(((MachineInstr *)MIB)->print(dbgs())); - DEBUG(dbgs() << "\n"); + + if (SExtIdx != -1) { + // Generate the sign extension for the proper result of the ldp. + // I.e., with X1, that would be: + // %W1<def> = KILL %W1, %X1<imp-def> + // %X1<def> = SBFMXri %X1<kill>, 0, 31 + MachineOperand &DstMO = MIB->getOperand(SExtIdx); + // Right now, DstMO has the extended register, since it comes from an + // extended opcode. + unsigned DstRegX = DstMO.getReg(); + // Get the W variant of that register. + unsigned DstRegW = TRI->getSubReg(DstRegX, AArch64::sub_32); + // Update the result of LDP to use the W instead of the X variant. + DstMO.setReg(DstRegW); + DEBUG(((MachineInstr *)MIB)->print(dbgs())); + DEBUG(dbgs() << "\n"); + // Make the machine verifier happy by providing a definition for + // the X register. + // Insert this definition right after the generated LDP, i.e., before + // InsertionPoint. + MachineInstrBuilder MIBKill = + BuildMI(*I->getParent(), InsertionPoint, I->getDebugLoc(), + TII->get(TargetOpcode::KILL), DstRegW) + .addReg(DstRegW) + .addReg(DstRegX, RegState::Define); + MIBKill->getOperand(2).setImplicit(); + // Create the sign extension. + MachineInstrBuilder MIBSXTW = + BuildMI(*I->getParent(), InsertionPoint, I->getDebugLoc(), + TII->get(AArch64::SBFMXri), DstRegX) + .addReg(DstRegX) + .addImm(0) + .addImm(31); + (void)MIBSXTW; + DEBUG(dbgs() << " Extend operand:\n "); + DEBUG(((MachineInstr *)MIBSXTW)->print(dbgs())); + DEBUG(dbgs() << "\n"); + } else { + DEBUG(((MachineInstr *)MIB)->print(dbgs())); + DEBUG(dbgs() << "\n"); + } // Erase the old instructions. I->eraseFromParent(); @@ -380,17 +484,41 @@ static int alignTo(int Num, int PowOf2) { return (Num + PowOf2 - 1) & ~(PowOf2 - 1); } +static bool mayAlias(MachineInstr *MIa, MachineInstr *MIb, + const AArch64InstrInfo *TII) { + // One of the instructions must modify memory. + if (!MIa->mayStore() && !MIb->mayStore()) + return false; + + // Both instructions must be memory operations. + if (!MIa->mayLoadOrStore() && !MIb->mayLoadOrStore()) + return false; + + return !TII->areMemAccessesTriviallyDisjoint(MIa, MIb); +} + +static bool mayAlias(MachineInstr *MIa, + SmallVectorImpl<MachineInstr *> &MemInsns, + const AArch64InstrInfo *TII) { + for (auto &MIb : MemInsns) + if (mayAlias(MIa, MIb, TII)) + return true; + + return false; +} + /// findMatchingInsn - Scan the instructions looking for a load/store that can /// be combined with the current instruction into a load/store pair. MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I, - bool &MergeForward, unsigned Limit) { + bool &MergeForward, int &SExtIdx, + unsigned Limit) { MachineBasicBlock::iterator E = I->getParent()->end(); MachineBasicBlock::iterator MBBI = I; MachineInstr *FirstMI = I; ++MBBI; - int Opc = FirstMI->getOpcode(); + unsigned Opc = FirstMI->getOpcode(); bool MayLoad = FirstMI->mayLoad(); bool IsUnscaled = isUnscaledLdst(Opc); unsigned Reg = FirstMI->getOperand(0).getReg(); @@ -414,6 +542,10 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I, BitVector ModifiedRegs, UsedRegs; ModifiedRegs.resize(TRI->getNumRegs()); UsedRegs.resize(TRI->getNumRegs()); + + // Remember any instructions that read/write memory between FirstMI and MI. + SmallVector<MachineInstr *, 4> MemInsns; + for (unsigned Count = 0; MBBI != E && Count < Limit; ++MBBI) { MachineInstr *MI = MBBI; // Skip DBG_VALUE instructions. Otherwise debug info can affect the @@ -424,7 +556,19 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I, // Now that we know this is a real instruction, count it. ++Count; - if (Opc == MI->getOpcode() && MI->getOperand(2).isImm()) { + bool CanMergeOpc = Opc == MI->getOpcode(); + SExtIdx = -1; + if (!CanMergeOpc) { + bool IsValidLdStrOpc; + unsigned NonSExtOpc = getMatchingNonSExtOpcode(Opc, &IsValidLdStrOpc); + if (!IsValidLdStrOpc) + continue; + // Opc will be the first instruction in the pair. + SExtIdx = NonSExtOpc == (unsigned)Opc ? 1 : 0; + CanMergeOpc = NonSExtOpc == getMatchingNonSExtOpcode(MI->getOpcode()); + } + + if (CanMergeOpc && MI->getOperand(2).isImm()) { // If we've found another instruction with the same opcode, check to see // if the base and offset are compatible with our starting instruction. // These instructions all have scaled immediate operands, so we just @@ -450,6 +594,8 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I, bool MIIsUnscaled = isUnscaledLdst(MI->getOpcode()); if (!inBoundsForPair(MIIsUnscaled, MinOffset, OffsetStride)) { trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI); + if (MI->mayLoadOrStore()) + MemInsns.push_back(MI); continue; } // If the alignment requirements of the paired (scaled) instruction @@ -458,6 +604,8 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I, if (IsUnscaled && EnableAArch64UnscaledMemOp && (alignTo(MinOffset, OffsetStride) != MinOffset)) { trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI); + if (MI->mayLoadOrStore()) + MemInsns.push_back(MI); continue; } // If the destination register of the loads is the same register, bail @@ -465,22 +613,29 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I, // registers the same is UNPREDICTABLE and will result in an exception. if (MayLoad && Reg == MI->getOperand(0).getReg()) { trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI); + if (MI->mayLoadOrStore()) + MemInsns.push_back(MI); continue; } // If the Rt of the second instruction was not modified or used between - // the two instructions, we can combine the second into the first. + // the two instructions and none of the instructions between the second + // and first alias with the second, we can combine the second into the + // first. if (!ModifiedRegs[MI->getOperand(0).getReg()] && - !UsedRegs[MI->getOperand(0).getReg()]) { + !UsedRegs[MI->getOperand(0).getReg()] && + !mayAlias(MI, MemInsns, TII)) { MergeForward = false; return MBBI; } // Likewise, if the Rt of the first instruction is not modified or used - // between the two instructions, we can combine the first into the - // second. + // between the two instructions and none of the instructions between the + // first and the second alias with the first, we can combine the first + // into the second. if (!ModifiedRegs[FirstMI->getOperand(0).getReg()] && - !UsedRegs[FirstMI->getOperand(0).getReg()]) { + !UsedRegs[FirstMI->getOperand(0).getReg()] && + !mayAlias(FirstMI, MemInsns, TII)) { MergeForward = true; return MBBI; } @@ -489,21 +644,9 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I, } } - // If the instruction wasn't a matching load or store, but does (or can) - // modify memory, stop searching, as we don't have alias analysis or - // anything like that to tell us whether the access is tromping on the - // locations we care about. The big one we want to catch is calls. - // - // FIXME: Theoretically, we can do better than that for SP and FP based - // references since we can effectively know where those are touching. It's - // unclear if it's worth the extra code, though. Most paired instructions - // will be sequential, perhaps with a few intervening non-memory related - // instructions. - if (MI->mayStore() || MI->isCall()) - return E; - // Likewise, if we're matching a store instruction, we don't want to - // move across a load, as it may be reading the same location. - if (FirstMI->mayStore() && MI->mayLoad()) + // If the instruction wasn't a matching load or store. Stop searching if we + // encounter a call instruction that might modify memory. + if (MI->isCall()) return E; // Update modified / uses register lists. @@ -513,6 +656,10 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I, // return early. if (ModifiedRegs[BaseReg]) return E; + + // Update list of instructions that read/write memory. + if (MI->mayLoadOrStore()) + MemInsns.push_back(MI); } return E; } @@ -780,6 +927,7 @@ bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB) { case AArch64::LDRQui: case AArch64::LDRXui: case AArch64::LDRWui: + case AArch64::LDRSWui: // do the unscaled versions as well case AArch64::STURSi: case AArch64::STURDi: @@ -790,7 +938,8 @@ bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB) { case AArch64::LDURDi: case AArch64::LDURQi: case AArch64::LDURWi: - case AArch64::LDURXi: { + case AArch64::LDURXi: + case AArch64::LDURSWi: { // If this is a volatile load/store, don't mess with it. if (MI->hasOrderedMemoryRef()) { ++MBBI; @@ -809,13 +958,14 @@ bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB) { } // Look ahead up to ScanLimit instructions for a pairable instruction. bool MergeForward = false; + int SExtIdx = -1; MachineBasicBlock::iterator Paired = - findMatchingInsn(MBBI, MergeForward, ScanLimit); + findMatchingInsn(MBBI, MergeForward, SExtIdx, ScanLimit); if (Paired != E) { // Merge the loads into a pair. Keeping the iterator straight is a // pain, so we let the merge routine tell us what the next instruction // is after it's done mucking about. - MBBI = mergePairedInsns(MBBI, Paired, MergeForward); + MBBI = mergePairedInsns(MBBI, Paired, MergeForward, SExtIdx); Modified = true; ++NumPairCreated; @@ -835,7 +985,7 @@ bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB) { MachineInstr *MI = MBBI; // Do update merging. It's simpler to keep this separate from the above // switch, though not strictly necessary. - int Opc = MI->getOpcode(); + unsigned Opc = MI->getOpcode(); switch (Opc) { default: // Just move on to the next instruction. @@ -931,10 +1081,8 @@ bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB) { } bool AArch64LoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { - const TargetMachine &TM = Fn.getTarget(); - TII = static_cast<const AArch64InstrInfo *>( - TM.getSubtargetImpl()->getInstrInfo()); - TRI = TM.getSubtargetImpl()->getRegisterInfo(); + TII = static_cast<const AArch64InstrInfo *>(Fn.getSubtarget().getInstrInfo()); + TRI = Fn.getSubtarget().getRegisterInfo(); bool Modified = false; for (auto &MBB : Fn) diff --git a/lib/Target/AArch64/AArch64MCInstLower.cpp b/lib/Target/AArch64/AArch64MCInstLower.cpp index b829341..72edbf1 100644 --- a/lib/Target/AArch64/AArch64MCInstLower.cpp +++ b/lib/Target/AArch64/AArch64MCInstLower.cpp @@ -73,7 +73,7 @@ MCOperand AArch64MCInstLower::lowerSymbolOperandDarwin(const MachineOperand &MO, if (!MO.isJTI() && MO.getOffset()) Expr = MCBinaryExpr::CreateAdd( Expr, MCConstantExpr::Create(MO.getOffset(), Ctx), Ctx); - return MCOperand::CreateExpr(Expr); + return MCOperand::createExpr(Expr); } MCOperand AArch64MCInstLower::lowerSymbolOperandELF(const MachineOperand &MO, @@ -148,7 +148,7 @@ MCOperand AArch64MCInstLower::lowerSymbolOperandELF(const MachineOperand &MO, RefKind = static_cast<AArch64MCExpr::VariantKind>(RefFlags); Expr = AArch64MCExpr::Create(Expr, RefKind, Ctx); - return MCOperand::CreateExpr(Expr); + return MCOperand::createExpr(Expr); } MCOperand AArch64MCInstLower::LowerSymbolOperand(const MachineOperand &MO, @@ -169,16 +169,16 @@ bool AArch64MCInstLower::lowerOperand(const MachineOperand &MO, // Ignore all implicit register operands. if (MO.isImplicit()) return false; - MCOp = MCOperand::CreateReg(MO.getReg()); + MCOp = MCOperand::createReg(MO.getReg()); break; case MachineOperand::MO_RegisterMask: // Regmasks are like implicit defs. return false; case MachineOperand::MO_Immediate: - MCOp = MCOperand::CreateImm(MO.getImm()); + MCOp = MCOperand::createImm(MO.getImm()); break; case MachineOperand::MO_MachineBasicBlock: - MCOp = MCOperand::CreateExpr( + MCOp = MCOperand::createExpr( MCSymbolRefExpr::Create(MO.getMBB()->getSymbol(), Ctx)); break; case MachineOperand::MO_GlobalAddress: diff --git a/lib/Target/AArch64/AArch64PBQPRegAlloc.cpp b/lib/Target/AArch64/AArch64PBQPRegAlloc.cpp index f942c4e..5394875 100644 --- a/lib/Target/AArch64/AArch64PBQPRegAlloc.cpp +++ b/lib/Target/AArch64/AArch64PBQPRegAlloc.cpp @@ -235,7 +235,7 @@ bool A57ChainingConstraint::addIntraChainConstraint(PBQPRAGraph &G, unsigned Rd, costs[i + 1][j + 1] = sameParityMax + 1.0; } } - G.setEdgeCosts(edge, std::move(costs)); + G.updateEdgeCosts(edge, std::move(costs)); return true; } @@ -312,14 +312,14 @@ void A57ChainingConstraint::addInterChainConstraint(PBQPRAGraph &G, unsigned Rd, costs[i + 1][j + 1] = sameParityMax + 1.0; } } - G.setEdgeCosts(edge, std::move(costs)); + G.updateEdgeCosts(edge, std::move(costs)); } } } static bool regJustKilledBefore(const LiveIntervals &LIs, unsigned reg, const MachineInstr &MI) { - LiveInterval LI = LIs.getInterval(reg); + const LiveInterval &LI = LIs.getInterval(reg); SlotIndex SI = LIs.getInstructionIndex(&MI); return LI.expiredAt(SI); } @@ -328,7 +328,7 @@ void A57ChainingConstraint::apply(PBQPRAGraph &G) { const MachineFunction &MF = G.getMetadata().MF; LiveIntervals &LIs = G.getMetadata().LIS; - TRI = MF.getTarget().getSubtargetImpl()->getRegisterInfo(); + TRI = MF.getSubtarget().getRegisterInfo(); DEBUG(MF.dump()); for (const auto &MBB: MF) { diff --git a/lib/Target/AArch64/AArch64PromoteConstant.cpp b/lib/Target/AArch64/AArch64PromoteConstant.cpp index 97b0f0e..e1b93bf 100644 --- a/lib/Target/AArch64/AArch64PromoteConstant.cpp +++ b/lib/Target/AArch64/AArch64PromoteConstant.cpp @@ -22,7 +22,7 @@ #include "AArch64.h" #include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/IR/Constants.h" @@ -31,12 +31,14 @@ #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/InlineAsm.h" +#include "llvm/IR/InstIterator.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Module.h" #include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -112,44 +114,42 @@ private: AU.addPreserved<DominatorTreeWrapperPass>(); } - /// Type to store a list of User. - typedef SmallVector<Value::user_iterator, 4> Users; + /// Type to store a list of Uses. + typedef SmallVector<Use *, 4> Uses; /// Map an insertion point to all the uses it dominates. - typedef DenseMap<Instruction *, Users> InsertionPoints; + typedef DenseMap<Instruction *, Uses> InsertionPoints; /// Map a function to the required insertion point of load for a /// global variable. typedef DenseMap<Function *, InsertionPoints> InsertionPointsPerFunc; /// Find the closest point that dominates the given Use. - Instruction *findInsertionPoint(Value::user_iterator &Use); + Instruction *findInsertionPoint(Use &Use); /// Check if the given insertion point is dominated by an existing /// insertion point. /// If true, the given use is added to the list of dominated uses for /// the related existing point. /// \param NewPt the insertion point to be checked - /// \param UseIt the use to be added into the list of dominated uses + /// \param Use the use to be added into the list of dominated uses /// \param InsertPts existing insertion points /// \pre NewPt and all instruction in InsertPts belong to the same function /// \return true if one of the insertion point in InsertPts dominates NewPt, /// false otherwise - bool isDominated(Instruction *NewPt, Value::user_iterator &UseIt, - InsertionPoints &InsertPts); + bool isDominated(Instruction *NewPt, Use &Use, InsertionPoints &InsertPts); /// Check if the given insertion point can be merged with an existing /// insertion point in a common dominator. /// If true, the given use is added to the list of the created insertion /// point. /// \param NewPt the insertion point to be checked - /// \param UseIt the use to be added into the list of dominated uses + /// \param Use the use to be added into the list of dominated uses /// \param InsertPts existing insertion points /// \pre NewPt and all instruction in InsertPts belong to the same function /// \pre isDominated returns false for the exact same parameters. /// \return true if it exists an insertion point in InsertPts that could /// have been merged with NewPt in a common dominator, /// false otherwise - bool tryAndMerge(Instruction *NewPt, Value::user_iterator &UseIt, - InsertionPoints &InsertPts); + bool tryAndMerge(Instruction *NewPt, Use &Use, InsertionPoints &InsertPts); /// Compute the minimal insertion points to dominates all the interesting /// uses of value. @@ -182,21 +182,21 @@ private: bool promoteConstant(Constant *Cst); /// Transfer the list of dominated uses of IPI to NewPt in InsertPts. - /// Append UseIt to this list and delete the entry of IPI in InsertPts. - static void appendAndTransferDominatedUses(Instruction *NewPt, - Value::user_iterator &UseIt, + /// Append Use to this list and delete the entry of IPI in InsertPts. + static void appendAndTransferDominatedUses(Instruction *NewPt, Use &Use, InsertionPoints::iterator &IPI, InsertionPoints &InsertPts) { // Record the dominated use. - IPI->second.push_back(UseIt); + IPI->second.push_back(&Use); // Transfer the dominated uses of IPI to NewPt // Inserting into the DenseMap may invalidate existing iterator. - // Keep a copy of the key to find the iterator to erase. + // Keep a copy of the key to find the iterator to erase. Keep a copy of the + // value so that we don't have to dereference IPI->second. Instruction *OldInstr = IPI->first; - InsertPts[NewPt] = std::move(IPI->second); + Uses OldUses = std::move(IPI->second); + InsertPts[NewPt] = std::move(OldUses); // Erase IPI. - IPI = InsertPts.find(OldInstr); - InsertPts.erase(IPI); + InsertPts.erase(OldInstr); } }; } // end anonymous namespace @@ -328,23 +328,18 @@ static bool shouldConvert(const Constant *Cst) { return isConstantUsingVectorTy(Cst->getType()); } -Instruction * -AArch64PromoteConstant::findInsertionPoint(Value::user_iterator &Use) { +Instruction *AArch64PromoteConstant::findInsertionPoint(Use &Use) { + Instruction *User = cast<Instruction>(Use.getUser()); + // If this user is a phi, the insertion point is in the related // incoming basic block. - PHINode *PhiInst = dyn_cast<PHINode>(*Use); - Instruction *InsertionPoint; - if (PhiInst) - InsertionPoint = - PhiInst->getIncomingBlock(Use.getOperandNo())->getTerminator(); - else - InsertionPoint = dyn_cast<Instruction>(*Use); - assert(InsertionPoint && "User is not an instruction!"); - return InsertionPoint; + if (PHINode *PhiInst = dyn_cast<PHINode>(User)) + return PhiInst->getIncomingBlock(Use.getOperandNo())->getTerminator(); + + return User; } -bool AArch64PromoteConstant::isDominated(Instruction *NewPt, - Value::user_iterator &UseIt, +bool AArch64PromoteConstant::isDominated(Instruction *NewPt, Use &Use, InsertionPoints &InsertPts) { DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>( @@ -363,15 +358,14 @@ bool AArch64PromoteConstant::isDominated(Instruction *NewPt, DEBUG(dbgs() << "Insertion point dominated by:\n"); DEBUG(IPI.first->print(dbgs())); DEBUG(dbgs() << '\n'); - IPI.second.push_back(UseIt); + IPI.second.push_back(&Use); return true; } } return false; } -bool AArch64PromoteConstant::tryAndMerge(Instruction *NewPt, - Value::user_iterator &UseIt, +bool AArch64PromoteConstant::tryAndMerge(Instruction *NewPt, Use &Use, InsertionPoints &InsertPts) { DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>( *NewPt->getParent()->getParent()).getDomTree(); @@ -391,7 +385,7 @@ bool AArch64PromoteConstant::tryAndMerge(Instruction *NewPt, DEBUG(dbgs() << "Merge insertion point with:\n"); DEBUG(IPI->first->print(dbgs())); DEBUG(dbgs() << "\nat considered insertion point.\n"); - appendAndTransferDominatedUses(NewPt, UseIt, IPI, InsertPts); + appendAndTransferDominatedUses(NewPt, Use, IPI, InsertPts); return true; } @@ -415,7 +409,7 @@ bool AArch64PromoteConstant::tryAndMerge(Instruction *NewPt, DEBUG(dbgs() << '\n'); DEBUG(NewPt->print(dbgs())); DEBUG(dbgs() << '\n'); - appendAndTransferDominatedUses(NewPt, UseIt, IPI, InsertPts); + appendAndTransferDominatedUses(NewPt, Use, IPI, InsertPts); return true; } return false; @@ -424,22 +418,22 @@ bool AArch64PromoteConstant::tryAndMerge(Instruction *NewPt, void AArch64PromoteConstant::computeInsertionPoints( Constant *Val, InsertionPointsPerFunc &InsPtsPerFunc) { DEBUG(dbgs() << "** Compute insertion points **\n"); - for (Value::user_iterator UseIt = Val->user_begin(), - EndUseIt = Val->user_end(); - UseIt != EndUseIt; ++UseIt) { + for (Use &Use : Val->uses()) { + Instruction *User = dyn_cast<Instruction>(Use.getUser()); + // If the user is not an Instruction, we cannot modify it. - if (!isa<Instruction>(*UseIt)) + if (!User) continue; // Filter out uses that should not be converted. - if (!shouldConvertUse(Val, cast<Instruction>(*UseIt), UseIt.getOperandNo())) + if (!shouldConvertUse(Val, User, Use.getOperandNo())) continue; - DEBUG(dbgs() << "Considered use, opidx " << UseIt.getOperandNo() << ":\n"); - DEBUG((*UseIt)->print(dbgs())); + DEBUG(dbgs() << "Considered use, opidx " << Use.getOperandNo() << ":\n"); + DEBUG(User->print(dbgs())); DEBUG(dbgs() << '\n'); - Instruction *InsertionPoint = findInsertionPoint(UseIt); + Instruction *InsertionPoint = findInsertionPoint(Use); DEBUG(dbgs() << "Considered insertion point:\n"); DEBUG(InsertionPoint->print(dbgs())); @@ -449,17 +443,17 @@ void AArch64PromoteConstant::computeInsertionPoints( // by another one. InsertionPoints &InsertPts = InsPtsPerFunc[InsertionPoint->getParent()->getParent()]; - if (isDominated(InsertionPoint, UseIt, InsertPts)) + if (isDominated(InsertionPoint, Use, InsertPts)) continue; // This insertion point is useful, check if we can merge some insertion // point in a common dominator or if NewPt dominates an existing one. - if (tryAndMerge(InsertionPoint, UseIt, InsertPts)) + if (tryAndMerge(InsertionPoint, Use, InsertPts)) continue; DEBUG(dbgs() << "Keep considered insertion point\n"); // It is definitely useful by its own - InsertPts[InsertionPoint].push_back(UseIt); + InsertPts[InsertionPoint].push_back(&Use); } } @@ -470,41 +464,32 @@ bool AArch64PromoteConstant::insertDefinitions( bool HasChanged = false; // Traverse all insertion points in all the function. - for (InsertionPointsPerFunc::iterator FctToInstPtsIt = InsPtsPerFunc.begin(), - EndIt = InsPtsPerFunc.end(); - FctToInstPtsIt != EndIt; ++FctToInstPtsIt) { - InsertionPoints &InsertPts = FctToInstPtsIt->second; + for (const auto &FctToInstPtsIt : InsPtsPerFunc) { + const InsertionPoints &InsertPts = FctToInstPtsIt.second; // Do more checking for debug purposes. #ifndef NDEBUG DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>( - *FctToInstPtsIt->first).getDomTree(); + *FctToInstPtsIt.first).getDomTree(); #endif - GlobalVariable *PromotedGV; assert(!InsertPts.empty() && "Empty uses does not need a definition"); - Module *M = FctToInstPtsIt->first->getParent(); - DenseMap<Module *, GlobalVariable *>::iterator MapIt = - ModuleToMergedGV.find(M); - if (MapIt == ModuleToMergedGV.end()) { + Module *M = FctToInstPtsIt.first->getParent(); + GlobalVariable *&PromotedGV = ModuleToMergedGV[M]; + if (!PromotedGV) { PromotedGV = new GlobalVariable( *M, Cst->getType(), true, GlobalValue::InternalLinkage, nullptr, "_PromotedConst", nullptr, GlobalVariable::NotThreadLocal); PromotedGV->setInitializer(Cst); - ModuleToMergedGV[M] = PromotedGV; DEBUG(dbgs() << "Global replacement: "); DEBUG(PromotedGV->print(dbgs())); DEBUG(dbgs() << '\n'); ++NumPromoted; HasChanged = true; - } else { - PromotedGV = MapIt->second; } - for (InsertionPoints::iterator IPI = InsertPts.begin(), - EndIPI = InsertPts.end(); - IPI != EndIPI; ++IPI) { + for (const auto &IPI : InsertPts) { // Create the load of the global variable. - IRBuilder<> Builder(IPI->first->getParent(), IPI->first); + IRBuilder<> Builder(IPI.first->getParent(), IPI.first); LoadInst *LoadedCst = Builder.CreateLoad(PromotedGV); DEBUG(dbgs() << "**********\n"); DEBUG(dbgs() << "New def: "); @@ -512,18 +497,15 @@ bool AArch64PromoteConstant::insertDefinitions( DEBUG(dbgs() << '\n'); // Update the dominated uses. - Users &DominatedUsers = IPI->second; - for (Value::user_iterator Use : DominatedUsers) { + for (Use *Use : IPI.second) { #ifndef NDEBUG - assert((DT.dominates(LoadedCst, cast<Instruction>(*Use)) || - (isa<PHINode>(*Use) && - DT.dominates(LoadedCst, findInsertionPoint(Use)))) && + assert(DT.dominates(LoadedCst, findInsertionPoint(*Use)) && "Inserted definition does not dominate all its uses!"); #endif - DEBUG(dbgs() << "Use to update " << Use.getOperandNo() << ":"); - DEBUG(Use->print(dbgs())); + DEBUG(dbgs() << "Use to update " << Use->getOperandNo() << ":"); + DEBUG(Use->getUser()->print(dbgs())); DEBUG(dbgs() << '\n'); - Use->setOperand(Use.getOperandNo(), LoadedCst); + Use->set(LoadedCst); ++NumPromotedUses; } } @@ -556,22 +538,19 @@ bool AArch64PromoteConstant::runOnFunction(Function &F) { // global variable. Create as few loads of this variable as possible and // update the uses accordingly. bool LocalChange = false; - SmallSet<Constant *, 8> AlreadyChecked; - - for (auto &MBB : F) { - for (auto &MI : MBB) { - // Traverse the operand, looking for constant vectors. Replace them by a - // load of a global variable of constant vector type. - for (unsigned OpIdx = 0, EndOpIdx = MI.getNumOperands(); - OpIdx != EndOpIdx; ++OpIdx) { - Constant *Cst = dyn_cast<Constant>(MI.getOperand(OpIdx)); - // There is no point in promoting global values as they are already - // global. Do not promote constant expressions either, as they may - // require some code expansion. - if (Cst && !isa<GlobalValue>(Cst) && !isa<ConstantExpr>(Cst) && - AlreadyChecked.insert(Cst).second) - LocalChange |= promoteConstant(Cst); - } + SmallPtrSet<Constant *, 8> AlreadyChecked; + + for (Instruction &I : inst_range(&F)) { + // Traverse the operand, looking for constant vectors. Replace them by a + // load of a global variable of constant vector type. + for (Value *Op : I.operand_values()) { + Constant *Cst = dyn_cast<Constant>(Op); + // There is no point in promoting global values as they are already + // global. Do not promote constant expressions either, as they may + // require some code expansion. + if (Cst && !isa<GlobalValue>(Cst) && !isa<ConstantExpr>(Cst) && + AlreadyChecked.insert(Cst).second) + LocalChange |= promoteConstant(Cst); } } return LocalChange; diff --git a/lib/Target/AArch64/AArch64RegisterInfo.cpp b/lib/Target/AArch64/AArch64RegisterInfo.cpp index 206cdbb..1836682 100644 --- a/lib/Target/AArch64/AArch64RegisterInfo.cpp +++ b/lib/Target/AArch64/AArch64RegisterInfo.cpp @@ -18,6 +18,7 @@ #include "AArch64Subtarget.h" #include "MCTargetDesc/AArch64AddressingModes.h" #include "llvm/ADT/BitVector.h" +#include "llvm/ADT/Triple.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" @@ -37,9 +38,8 @@ static cl::opt<bool> ReserveX18("aarch64-reserve-x18", cl::Hidden, cl::desc("Reserve X18, making it unavailable as GPR")); -AArch64RegisterInfo::AArch64RegisterInfo(const AArch64InstrInfo *tii, - const AArch64Subtarget *sti) - : AArch64GenRegisterInfo(AArch64::LR), TII(tii), STI(sti) {} +AArch64RegisterInfo::AArch64RegisterInfo(const Triple &TT) + : AArch64GenRegisterInfo(AArch64::LR), TT(TT) {} const MCPhysReg * AArch64RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { @@ -55,7 +55,8 @@ AArch64RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { } const uint32_t * -AArch64RegisterInfo::getCallPreservedMask(CallingConv::ID CC) const { +AArch64RegisterInfo::getCallPreservedMask(const MachineFunction &MF, + CallingConv::ID CC) const { if (CC == CallingConv::GHC) // This is academic becase all GHC calls are (supposed to be) tail calls return CSR_AArch64_NoRegs_RegMask; @@ -66,15 +67,16 @@ AArch64RegisterInfo::getCallPreservedMask(CallingConv::ID CC) const { } const uint32_t *AArch64RegisterInfo::getTLSCallPreservedMask() const { - if (STI->isTargetDarwin()) + if (TT.isOSDarwin()) return CSR_AArch64_TLS_Darwin_RegMask; - assert(STI->isTargetELF() && "only expect Darwin or ELF TLS"); + assert(TT.isOSBinFormatELF() && "only expect Darwin or ELF TLS"); return CSR_AArch64_TLS_ELF_RegMask; } const uint32_t * -AArch64RegisterInfo::getThisReturnPreservedMask(CallingConv::ID CC) const { +AArch64RegisterInfo::getThisReturnPreservedMask(const MachineFunction &MF, + CallingConv::ID CC) const { // This should return a register mask that is the same as that returned by // getCallPreservedMask but that additionally preserves the register used for // the first i64 argument (which must also be the register used to return a @@ -97,12 +99,12 @@ AArch64RegisterInfo::getReservedRegs(const MachineFunction &MF) const { Reserved.set(AArch64::WSP); Reserved.set(AArch64::WZR); - if (TFI->hasFP(MF) || STI->isTargetDarwin()) { + if (TFI->hasFP(MF) || TT.isOSDarwin()) { Reserved.set(AArch64::FP); Reserved.set(AArch64::W29); } - if (STI->isTargetDarwin() || ReserveX18) { + if (TT.isOSDarwin() || ReserveX18) { Reserved.set(AArch64::X18); // Platform register Reserved.set(AArch64::W18); } @@ -129,10 +131,10 @@ bool AArch64RegisterInfo::isReservedReg(const MachineFunction &MF, return true; case AArch64::X18: case AArch64::W18: - return STI->isTargetDarwin() || ReserveX18; + return TT.isOSDarwin() || ReserveX18; case AArch64::FP: case AArch64::W29: - return TFI->hasFP(MF) || STI->isTargetDarwin(); + return TFI->hasFP(MF) || TT.isOSDarwin(); case AArch64::W19: case AArch64::X19: return hasBasePointer(MF); @@ -163,7 +165,12 @@ bool AArch64RegisterInfo::hasBasePointer(const MachineFunction &MF) const { // large enough that referencing from the FP won't result in things being // in range relatively often, we can use a base pointer to allow access // from the other direction like the SP normally works. + // Furthermore, if both variable sized objects are present, and the + // stack needs to be dynamically re-aligned, the base pointer is the only + // reliable way to reference the locals. if (MFI->hasVarSizedObjects()) { + if (needsStackRealignment(MF)) + return true; // Conservatively estimate whether the negative offset from the frame // pointer will be sufficient to reach. If a function has a smallish // frame, it's less likely to have lots of spills and callee saved @@ -179,6 +186,31 @@ bool AArch64RegisterInfo::hasBasePointer(const MachineFunction &MF) const { return false; } +bool AArch64RegisterInfo::canRealignStack(const MachineFunction &MF) const { + + if (MF.getFunction()->hasFnAttribute("no-realign-stack")) + return false; + + return true; +} + +// FIXME: share this with other backends with identical implementation? +bool +AArch64RegisterInfo::needsStackRealignment(const MachineFunction &MF) const { + const MachineFrameInfo *MFI = MF.getFrameInfo(); + const Function *F = MF.getFunction(); + unsigned StackAlign = MF.getTarget() + .getSubtargetImpl(*MF.getFunction()) + ->getFrameLowering() + ->getStackAlignment(); + bool requiresRealignment = + ((MFI->getMaxAlignment() > StackAlign) || + F->getAttributes().hasAttribute(AttributeSet::FunctionIndex, + Attribute::StackAlignment)); + + return requiresRealignment && canRealignStack(MF); +} + unsigned AArch64RegisterInfo::getFrameRegister(const MachineFunction &MF) const { const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); @@ -269,7 +301,7 @@ bool AArch64RegisterInfo::needsFrameBaseReg(MachineInstr *MI, // The FP is only available if there is no dynamic realignment. We // don't know for sure yet whether we'll need that, so we guess based // on whether there are any local variables that would trigger it. - if (TFI->hasFP(MF) && isFrameOffsetLegal(MI, FPOffset)) + if (TFI->hasFP(MF) && isFrameOffsetLegal(MI, AArch64::FP, FPOffset)) return false; // If we can reference via the stack pointer or base pointer, try that. @@ -277,7 +309,7 @@ bool AArch64RegisterInfo::needsFrameBaseReg(MachineInstr *MI, // to only disallow SP relative references in the live range of // the VLA(s). In practice, it's unclear how much difference that // would make, but it may be worth doing. - if (isFrameOffsetLegal(MI, Offset)) + if (isFrameOffsetLegal(MI, AArch64::SP, Offset)) return false; // The offset likely isn't legal; we want to allocate a virtual base register. @@ -285,6 +317,7 @@ bool AArch64RegisterInfo::needsFrameBaseReg(MachineInstr *MI, } bool AArch64RegisterInfo::isFrameOffsetLegal(const MachineInstr *MI, + unsigned BaseReg, int64_t Offset) const { assert(Offset <= INT_MAX && "Offset too big to fit in int."); assert(MI && "Unable to get the legal offset for nil instruction."); @@ -302,10 +335,11 @@ void AArch64RegisterInfo::materializeFrameBaseRegister(MachineBasicBlock *MBB, DebugLoc DL; // Defaults to "unknown" if (Ins != MBB->end()) DL = Ins->getDebugLoc(); - + const MachineFunction &MF = *MBB->getParent(); + const AArch64InstrInfo *TII = + MF.getSubtarget<AArch64Subtarget>().getInstrInfo(); const MCInstrDesc &MCID = TII->get(AArch64::ADDXri); MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); - const MachineFunction &MF = *MBB->getParent(); MRI.constrainRegClass(BaseReg, TII->getRegClass(MCID, 0, this, MF)); unsigned Shifter = AArch64_AM::getShifterImm(AArch64_AM::LSL, 0); @@ -324,6 +358,9 @@ void AArch64RegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg, ++i; assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!"); } + const MachineFunction *MF = MI.getParent()->getParent(); + const AArch64InstrInfo *TII = + MF->getSubtarget<AArch64Subtarget>().getInstrInfo(); bool Done = rewriteAArch64FrameIndex(MI, i, BaseReg, Off, TII); assert(Done && "Unable to resolve frame index!"); (void)Done; @@ -337,6 +374,8 @@ void AArch64RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, MachineInstr &MI = *II; MachineBasicBlock &MBB = *MI.getParent(); MachineFunction &MF = *MBB.getParent(); + const AArch64InstrInfo *TII = + MF.getSubtarget<AArch64Subtarget>().getInstrInfo(); const AArch64FrameLowering *TFI = static_cast<const AArch64FrameLowering *>( MF.getSubtarget().getFrameLowering()); @@ -389,10 +428,10 @@ unsigned AArch64RegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC, case AArch64::GPR64RegClassID: case AArch64::GPR32commonRegClassID: case AArch64::GPR64commonRegClassID: - return 32 - 1 // XZR/SP - - (TFI->hasFP(MF) || STI->isTargetDarwin()) // FP - - (STI->isTargetDarwin() || ReserveX18) // X18 reserved as platform register - - hasBasePointer(MF); // X19 + return 32 - 1 // XZR/SP + - (TFI->hasFP(MF) || TT.isOSDarwin()) // FP + - (TT.isOSDarwin() || ReserveX18) // X18 reserved as platform register + - hasBasePointer(MF); // X19 case AArch64::FPR8RegClassID: case AArch64::FPR16RegClassID: case AArch64::FPR32RegClassID: diff --git a/lib/Target/AArch64/AArch64RegisterInfo.h b/lib/Target/AArch64/AArch64RegisterInfo.h index 51a5034..8c379d9 100644 --- a/lib/Target/AArch64/AArch64RegisterInfo.h +++ b/lib/Target/AArch64/AArch64RegisterInfo.h @@ -19,26 +19,24 @@ namespace llvm { -class AArch64InstrInfo; -class AArch64Subtarget; class MachineFunction; class RegScavenger; class TargetRegisterClass; +class Triple; struct AArch64RegisterInfo : public AArch64GenRegisterInfo { private: - const AArch64InstrInfo *TII; - const AArch64Subtarget *STI; + const Triple &TT; public: - AArch64RegisterInfo(const AArch64InstrInfo *tii, const AArch64Subtarget *sti); + AArch64RegisterInfo(const Triple &TT); bool isReservedReg(const MachineFunction &MF, unsigned Reg) const; /// Code Generation virtual methods... - const MCPhysReg * - getCalleeSavedRegs(const MachineFunction *MF = nullptr) const override; - const uint32_t *getCallPreservedMask(CallingConv::ID) const override; + const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override; + const uint32_t *getCallPreservedMask(const MachineFunction &MF, + CallingConv::ID) const override; unsigned getCSRFirstUseCost() const override { // The cost will be compared against BlockFrequency where entry has the @@ -59,7 +57,8 @@ public: /// /// Should return NULL in the case that the calling convention does not have /// this property - const uint32_t *getThisReturnPreservedMask(CallingConv::ID) const; + const uint32_t *getThisReturnPreservedMask(const MachineFunction &MF, + CallingConv::ID) const; BitVector getReservedRegs(const MachineFunction &MF) const override; const TargetRegisterClass * @@ -73,7 +72,7 @@ public: bool requiresFrameIndexScavenging(const MachineFunction &MF) const override; bool needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const override; - bool isFrameOffsetLegal(const MachineInstr *MI, + bool isFrameOffsetLegal(const MachineInstr *MI, unsigned BaseReg, int64_t Offset) const override; void materializeFrameBaseRegister(MachineBasicBlock *MBB, unsigned BaseReg, int FrameIdx, @@ -94,6 +93,9 @@ public: unsigned getRegPressureLimit(const TargetRegisterClass *RC, MachineFunction &MF) const override; + // Base pointer (stack realignment) support. + bool canRealignStack(const MachineFunction &MF) const; + bool needsStackRealignment(const MachineFunction &MF) const override; }; } // end namespace llvm diff --git a/lib/Target/AArch64/AArch64SchedA57.td b/lib/Target/AArch64/AArch64SchedA57.td index 3ec4157..ca4457a 100644 --- a/lib/Target/AArch64/AArch64SchedA57.td +++ b/lib/Target/AArch64/AArch64SchedA57.td @@ -60,7 +60,12 @@ include "AArch64SchedA57WriteRes.td" // Cortex-A57. The Cortex-A57 types are directly associated with resources, so // defining the aliases precludes the need for mapping them using WriteRes. The // aliases are sufficient for creating a coarse, working model. As the model -// evolves, InstRWs will be used to override these SchedAliases. +// evolves, InstRWs will be used to override some of these SchedAliases. +// +// WARNING: Using SchedAliases is convenient and works well for latency and +// resource lookup for instructions. However, this creates an entry in +// AArch64WriteLatencyTable with a WriteResourceID of 0, breaking +// any SchedReadAdvance since the lookup will fail. def : SchedAlias<WriteImm, A57Write_1cyc_1I>; def : SchedAlias<WriteI, A57Write_1cyc_1I>; @@ -70,8 +75,8 @@ def : SchedAlias<WriteExtr, A57Write_1cyc_1I>; def : SchedAlias<WriteIS, A57Write_1cyc_1I>; def : SchedAlias<WriteID32, A57Write_19cyc_1M>; def : SchedAlias<WriteID64, A57Write_35cyc_1M>; -def : SchedAlias<WriteIM32, A57Write_3cyc_1M>; -def : SchedAlias<WriteIM64, A57Write_5cyc_1M>; +def : WriteRes<WriteIM32, [A57UnitM]> { let Latency = 3; } +def : WriteRes<WriteIM64, [A57UnitM]> { let Latency = 5; } def : SchedAlias<WriteBr, A57Write_1cyc_1B>; def : SchedAlias<WriteBrReg, A57Write_1cyc_1B>; def : SchedAlias<WriteLD, A57Write_4cyc_1L>; @@ -127,6 +132,15 @@ def : InstRW<[A57Write_1cyc_1B_1I], (instrs BL)>; def : InstRW<[A57Write_2cyc_1B_1I], (instrs BLR)>; +// Shifted Register with Shift == 0 +// ---------------------------------------------------------------------------- + +def A57WriteISReg : SchedWriteVariant<[ + SchedVar<RegShiftedPred, [WriteISReg]>, + SchedVar<NoSchedPred, [WriteI]>]>; +def : InstRW<[A57WriteISReg], (instregex ".*rs$")>; + + // Divide and Multiply Instructions // ----------------------------------------------------------------------------- diff --git a/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp b/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp index 0cfd582..b9c5399 100644 --- a/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp +++ b/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp @@ -28,15 +28,14 @@ SDValue AArch64SelectionDAGInfo::EmitTargetCodeForMemset( // Check to see if there is a specialized entry-point for memory zeroing. ConstantSDNode *V = dyn_cast<ConstantSDNode>(Src); ConstantSDNode *SizeValue = dyn_cast<ConstantSDNode>(Size); + const AArch64Subtarget &STI = + DAG.getMachineFunction().getSubtarget<AArch64Subtarget>(); const char *bzeroEntry = - (V && V->isNullValue()) - ? DAG.getTarget().getSubtarget<AArch64Subtarget>().getBZeroEntry() - : nullptr; + (V && V->isNullValue()) ? STI.getBZeroEntry() : nullptr; // For small size (< 256), it is not beneficial to use bzero // instead of memset. if (bzeroEntry && (!SizeValue || SizeValue->getZExtValue() > 256)) { - const AArch64TargetLowering &TLI = - *DAG.getTarget().getSubtarget<AArch64Subtarget>().getTargetLowering(); + const AArch64TargetLowering &TLI = *STI.getTargetLowering(); EVT IntPtr = TLI.getPointerTy(); Type *IntPtrTy = getDataLayout()->getIntPtrType(*DAG.getContext()); diff --git a/lib/Target/AArch64/AArch64StorePairSuppress.cpp b/lib/Target/AArch64/AArch64StorePairSuppress.cpp index 0c36e8f..85b44a2 100644 --- a/lib/Target/AArch64/AArch64StorePairSuppress.cpp +++ b/lib/Target/AArch64/AArch64StorePairSuppress.cpp @@ -30,7 +30,6 @@ class AArch64StorePairSuppress : public MachineFunctionPass { const AArch64InstrInfo *TII; const TargetRegisterInfo *TRI; const MachineRegisterInfo *MRI; - MachineFunction *MF; TargetSchedModel SchedModel; MachineTraceMetrics *Traces; MachineTraceMetrics::Ensemble *MinInstr; @@ -115,20 +114,16 @@ bool AArch64StorePairSuppress::isNarrowFPStore(const MachineInstr &MI) { } } -bool AArch64StorePairSuppress::runOnMachineFunction(MachineFunction &mf) { - MF = &mf; - TII = - static_cast<const AArch64InstrInfo *>(MF->getSubtarget().getInstrInfo()); - TRI = MF->getSubtarget().getRegisterInfo(); - MRI = &MF->getRegInfo(); - const TargetSubtargetInfo &ST = - MF->getTarget().getSubtarget<TargetSubtargetInfo>(); +bool AArch64StorePairSuppress::runOnMachineFunction(MachineFunction &MF) { + const TargetSubtargetInfo &ST = MF.getSubtarget(); + TII = static_cast<const AArch64InstrInfo *>(ST.getInstrInfo()); + TRI = ST.getRegisterInfo(); + MRI = &MF.getRegInfo(); SchedModel.init(ST.getSchedModel(), &ST, TII); - Traces = &getAnalysis<MachineTraceMetrics>(); MinInstr = nullptr; - DEBUG(dbgs() << "*** " << getPassName() << ": " << MF->getName() << '\n'); + DEBUG(dbgs() << "*** " << getPassName() << ": " << MF.getName() << '\n'); if (!SchedModel.hasInstrSchedModel()) { DEBUG(dbgs() << " Skipping pass: no machine model present.\n"); @@ -139,7 +134,7 @@ bool AArch64StorePairSuppress::runOnMachineFunction(MachineFunction &mf) { // precisely determine whether a store pair can be formed. But we do want to // filter out most situations where we can't form store pairs to avoid // computing trace metrics in those cases. - for (auto &MBB : *MF) { + for (auto &MBB : MF) { bool SuppressSTP = false; unsigned PrevBaseReg = 0; for (auto &MI : MBB) { diff --git a/lib/Target/AArch64/AArch64Subtarget.cpp b/lib/Target/AArch64/AArch64Subtarget.cpp index 47b5d54..0b97af8 100644 --- a/lib/Target/AArch64/AArch64Subtarget.cpp +++ b/lib/Target/AArch64/AArch64Subtarget.cpp @@ -47,18 +47,12 @@ AArch64Subtarget::AArch64Subtarget(const std::string &TT, const std::string &FS, const TargetMachine &TM, bool LittleEndian) : AArch64GenSubtargetInfo(TT, CPU, FS), ARMProcFamily(Others), + HasV8_1aOps(false), HasFPARMv8(false), HasNEON(false), HasCrypto(false), HasCRC(false), - HasZeroCycleRegMove(false), HasZeroCycleZeroing(false), CPUString(CPU), - TargetTriple(TT), - // This nested ternary is horrible, but DL needs to be properly - // initialized - // before TLInfo is constructed. - DL(isTargetMachO() - ? "e-m:o-i64:64-i128:128-n32:64-S128" - : (LittleEndian ? "e-m:e-i64:64-i128:128-n32:64-S128" - : "E-m:e-i64:64-i128:128-n32:64-S128")), - FrameLowering(), InstrInfo(initializeSubtargetDependencies(FS)), - TSInfo(&DL), TLInfo(TM) {} + HasZeroCycleRegMove(false), HasZeroCycleZeroing(false), + IsLittle(LittleEndian), CPUString(CPU), TargetTriple(TT), FrameLowering(), + InstrInfo(initializeSubtargetDependencies(FS)), + TSInfo(TM.getDataLayout()), TLInfo(TM, *this) {} /// ClassifyGlobalReference - Find the target operand flags that describe /// how a global value should be referenced for the current subtarget. diff --git a/lib/Target/AArch64/AArch64Subtarget.h b/lib/Target/AArch64/AArch64Subtarget.h index e2740f1..5454b20 100644 --- a/lib/Target/AArch64/AArch64Subtarget.h +++ b/lib/Target/AArch64/AArch64Subtarget.h @@ -37,6 +37,8 @@ protected: /// ARMProcFamily - ARM processor family: Cortex-A53, Cortex-A57, and others. ARMProcFamilyEnum ARMProcFamily; + bool HasV8_1aOps; + bool HasFPARMv8; bool HasNEON; bool HasCrypto; @@ -48,13 +50,14 @@ protected: // HasZeroCycleZeroing - Has zero-cycle zeroing instructions. bool HasZeroCycleZeroing; + bool IsLittle; + /// CPUString - String name of used CPU. std::string CPUString; /// TargetTriple - What processor and OS we're targeting. Triple TargetTriple; - const DataLayout DL; AArch64FrameLowering FrameLowering; AArch64InstrInfo InstrInfo; AArch64SelectionDAGInfo TSInfo; @@ -82,15 +85,17 @@ public: return &TLInfo; } const AArch64InstrInfo *getInstrInfo() const override { return &InstrInfo; } - const DataLayout *getDataLayout() const override { return &DL; } const AArch64RegisterInfo *getRegisterInfo() const override { return &getInstrInfo()->getRegisterInfo(); } + const Triple &getTargetTriple() const { return TargetTriple; } bool enableMachineScheduler() const override { return true; } bool enablePostMachineScheduler() const override { return isCortexA53() || isCortexA57(); } + bool hasV8_1aOps() const { return HasV8_1aOps; } + bool hasZeroCycleRegMove() const { return HasZeroCycleRegMove; } bool hasZeroCycleZeroing() const { return HasZeroCycleZeroing; } @@ -100,7 +105,7 @@ public: bool hasCrypto() const { return HasCrypto; } bool hasCRC() const { return HasCRC; } - bool isLittleEndian() const { return DL.isLittleEndian(); } + bool isLittleEndian() const { return IsLittle; } bool isTargetDarwin() const { return TargetTriple.isOSDarwin(); } bool isTargetIOS() const { return TargetTriple.isiOS(); } diff --git a/lib/Target/AArch64/AArch64TargetMachine.cpp b/lib/Target/AArch64/AArch64TargetMachine.cpp index d4f19d2..a9059ab 100644 --- a/lib/Target/AArch64/AArch64TargetMachine.cpp +++ b/lib/Target/AArch64/AArch64TargetMachine.cpp @@ -13,10 +13,11 @@ #include "AArch64.h" #include "AArch64TargetMachine.h" #include "AArch64TargetObjectFile.h" +#include "AArch64TargetTransformInfo.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/RegAllocRegistry.h" #include "llvm/IR/Function.h" -#include "llvm/PassManager.h" +#include "llvm/IR/LegacyPassManager.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Target/TargetOptions.h" @@ -84,7 +85,12 @@ EnableA53Fix835769("aarch64-fix-cortex-a53-835769", cl::Hidden, static cl::opt<bool> EnableGEPOpt("aarch64-gep-opt", cl::Hidden, cl::desc("Enable optimizations on complex GEPs"), - cl::init(true)); + cl::init(false)); + +// FIXME: Unify control over GlobalMerge. +static cl::opt<cl::boolOrDefault> +EnableGlobalMerge("aarch64-global-merge", cl::Hidden, + cl::desc("Enable the global merge pass")); extern "C" void LLVMInitializeAArch64Target() { // Register the target. @@ -103,6 +109,16 @@ static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) { return make_unique<AArch64_ELFTargetObjectFile>(); } +// Helper function to build a DataLayout string +static std::string computeDataLayout(StringRef TT, bool LittleEndian) { + Triple Triple(TT); + if (Triple.isOSBinFormatMachO()) + return "e-m:o-i64:64-i128:128-n32:64-S128"; + if (LittleEndian) + return "e-m:e-i64:64-i128:128-n32:64-S128"; + return "E-m:e-i64:64-i128:128-n32:64-S128"; +} + /// TargetMachine ctor - Create an AArch64 architecture model. /// AArch64TargetMachine::AArch64TargetMachine(const Target &T, StringRef TT, @@ -111,9 +127,12 @@ AArch64TargetMachine::AArch64TargetMachine(const Target &T, StringRef TT, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL, bool LittleEndian) - : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL), + // This nested ternary is horrible, but DL needs to be properly + // initialized before TLInfo is constructed. + : LLVMTargetMachine(T, computeDataLayout(TT, LittleEndian), TT, CPU, FS, + Options, RM, CM, OL), TLOF(createTLOF(Triple(getTargetTriple()))), - Subtarget(TT, CPU, FS, *this, LittleEndian), isLittle(LittleEndian) { + isLittle(LittleEndian) { initAsmInfo(); } @@ -121,11 +140,8 @@ AArch64TargetMachine::~AArch64TargetMachine() {} const AArch64Subtarget * AArch64TargetMachine::getSubtargetImpl(const Function &F) const { - AttributeSet FnAttrs = F.getAttributes(); - Attribute CPUAttr = - FnAttrs.getAttribute(AttributeSet::FunctionIndex, "target-cpu"); - Attribute FSAttr = - FnAttrs.getAttribute(AttributeSet::FunctionIndex, "target-features"); + Attribute CPUAttr = F.getFnAttribute("target-cpu"); + Attribute FSAttr = F.getFnAttribute("target-features"); std::string CPU = !CPUAttr.hasAttribute(Attribute::None) ? CPUAttr.getValueAsString().str() @@ -188,12 +204,10 @@ public: }; } // namespace -void AArch64TargetMachine::addAnalysisPasses(PassManagerBase &PM) { - // Add first the target-independent BasicTTI pass, then our AArch64 pass. This - // allows the AArch64 pass to delegate to the target independent layer when - // appropriate. - PM.add(createBasicTargetTransformInfoPass(this)); - PM.add(createAArch64TargetTransformInfoPass(this)); +TargetIRAnalysis AArch64TargetMachine::getTargetIRAnalysis() { + return TargetIRAnalysis([this](Function &F) { + return TargetTransformInfo(AArch64TTIImpl(this, F)); + }); } TargetPassConfig *AArch64TargetMachine::createPassConfig(PassManagerBase &PM) { @@ -233,8 +247,13 @@ bool AArch64PassConfig::addPreISel() { // get a chance to be merged if (TM->getOptLevel() != CodeGenOpt::None && EnablePromoteConstant) addPass(createAArch64PromoteConstantPass()); - if (TM->getOptLevel() != CodeGenOpt::None) - addPass(createGlobalMergePass(TM)); + // FIXME: On AArch64, this depends on the type. + // Basically, the addressable offsets are up to 4095 * Ty.getSizeInBytes(). + // and the offset has to be a multiple of the related size in bytes. + if ((TM->getOptLevel() == CodeGenOpt::Aggressive && + EnableGlobalMerge == cl::BOU_UNSET) || + EnableGlobalMerge == cl::BOU_TRUE) + addPass(createGlobalMergePass(TM, 4095)); if (TM->getOptLevel() != CodeGenOpt::None) addPass(createAArch64AddressTypePromotionPass()); @@ -246,7 +265,7 @@ bool AArch64PassConfig::addInstSelector() { // For ELF, cleanup any local-dynamic TLS accesses (i.e. combine as many // references to _TLS_MODULE_BASE_ as possible. - if (TM->getSubtarget<AArch64Subtarget>().isTargetELF() && + if (Triple(TM->getTargetTriple()).isOSBinFormatELF() && getOptLevel() != CodeGenOpt::None) addPass(createAArch64CleanupLocalDynamicTLSPass()); @@ -281,10 +300,7 @@ void AArch64PassConfig::addPostRegAlloc() { // Change dead register definitions to refer to the zero register. if (TM->getOptLevel() != CodeGenOpt::None && EnableDeadRegisterElimination) addPass(createAArch64DeadRegisterDefinitions()); - if (TM->getOptLevel() != CodeGenOpt::None && - (TM->getSubtarget<AArch64Subtarget>().isCortexA53() || - TM->getSubtarget<AArch64Subtarget>().isCortexA57()) && - usingDefaultRegAlloc()) + if (TM->getOptLevel() != CodeGenOpt::None && usingDefaultRegAlloc()) // Improve performance for some FP/SIMD code for A57. addPass(createAArch64A57FPLoadBalancing()); } @@ -304,6 +320,6 @@ void AArch64PassConfig::addPreEmitPass() { // range of their destination. addPass(createAArch64BranchRelaxation()); if (TM->getOptLevel() != CodeGenOpt::None && EnableCollectLOH && - TM->getSubtarget<AArch64Subtarget>().isTargetMachO()) + Triple(TM->getTargetTriple()).isOSBinFormatMachO()) addPass(createAArch64CollectLOHPass()); } diff --git a/lib/Target/AArch64/AArch64TargetMachine.h b/lib/Target/AArch64/AArch64TargetMachine.h index 75c65c5..ec34fad 100644 --- a/lib/Target/AArch64/AArch64TargetMachine.h +++ b/lib/Target/AArch64/AArch64TargetMachine.h @@ -24,7 +24,6 @@ namespace llvm { class AArch64TargetMachine : public LLVMTargetMachine { protected: std::unique_ptr<TargetLoweringObjectFile> TLOF; - AArch64Subtarget Subtarget; mutable StringMap<std::unique_ptr<AArch64Subtarget>> SubtargetMap; public: @@ -34,17 +33,13 @@ public: CodeGenOpt::Level OL, bool IsLittleEndian); ~AArch64TargetMachine() override; - - const AArch64Subtarget *getSubtargetImpl() const override { - return &Subtarget; - } const AArch64Subtarget *getSubtargetImpl(const Function &F) const override; // Pass Pipeline Configuration TargetPassConfig *createPassConfig(PassManagerBase &PM) override; - /// \brief Register AArch64 analysis passes with a pass manager. - void addAnalysisPasses(PassManagerBase &PM) override; + /// \brief Get the TargetIRAnalysis for this target. + TargetIRAnalysis getTargetIRAnalysis() override; TargetLoweringObjectFile* getObjFileLowering() const override { return TLOF.get(); diff --git a/lib/Target/AArch64/AArch64TargetObjectFile.cpp b/lib/Target/AArch64/AArch64TargetObjectFile.cpp index 4069038..299b4a5 100644 --- a/lib/Target/AArch64/AArch64TargetObjectFile.cpp +++ b/lib/Target/AArch64/AArch64TargetObjectFile.cpp @@ -13,6 +13,7 @@ #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCValue.h" #include "llvm/Support/Dwarf.h" using namespace llvm; using namespace dwarf; @@ -23,6 +24,11 @@ void AArch64_ELFTargetObjectFile::Initialize(MCContext &Ctx, InitializeELF(TM.Options.UseInitArray); } +AArch64_MachoTargetObjectFile::AArch64_MachoTargetObjectFile() + : TargetLoweringObjectFileMachO() { + SupportGOTPCRelWithOffset = false; +} + const MCExpr *AArch64_MachoTargetObjectFile::getTTypeGlobalReference( const GlobalValue *GV, unsigned Encoding, Mangler &Mang, const TargetMachine &TM, MachineModuleInfo *MMI, @@ -35,7 +41,7 @@ const MCExpr *AArch64_MachoTargetObjectFile::getTTypeGlobalReference( const MCSymbol *Sym = TM.getSymbol(GV, Mang); const MCExpr *Res = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_GOT, getContext()); - MCSymbol *PCSym = getContext().CreateTempSymbol(); + MCSymbol *PCSym = getContext().createTempSymbol(); Streamer.EmitLabel(PCSym); const MCExpr *PC = MCSymbolRefExpr::Create(PCSym, getContext()); return MCBinaryExpr::CreateSub(Res, PC, getContext()); @@ -50,3 +56,18 @@ MCSymbol *AArch64_MachoTargetObjectFile::getCFIPersonalitySymbol( MachineModuleInfo *MMI) const { return TM.getSymbol(GV, Mang); } + +const MCExpr *AArch64_MachoTargetObjectFile::getIndirectSymViaGOTPCRel( + const MCSymbol *Sym, const MCValue &MV, int64_t Offset, + MachineModuleInfo *MMI, MCStreamer &Streamer) const { + assert((Offset+MV.getConstant() == 0) && + "Arch64 does not support GOT PC rel with extra offset"); + // On ARM64 Darwin, we can reference symbols with foo@GOT-., which + // is an indirect pc-relative reference. + const MCExpr *Res = + MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_GOT, getContext()); + MCSymbol *PCSym = getContext().createTempSymbol(); + Streamer.EmitLabel(PCSym); + const MCExpr *PC = MCSymbolRefExpr::Create(PCSym, getContext()); + return MCBinaryExpr::CreateSub(Res, PC, getContext()); +} diff --git a/lib/Target/AArch64/AArch64TargetObjectFile.h b/lib/Target/AArch64/AArch64TargetObjectFile.h index 2e595f9..d41f445 100644 --- a/lib/Target/AArch64/AArch64TargetObjectFile.h +++ b/lib/Target/AArch64/AArch64TargetObjectFile.h @@ -24,6 +24,8 @@ class AArch64_ELFTargetObjectFile : public TargetLoweringObjectFileELF { /// AArch64_MachoTargetObjectFile - This TLOF implementation is used for Darwin. class AArch64_MachoTargetObjectFile : public TargetLoweringObjectFileMachO { public: + AArch64_MachoTargetObjectFile(); + const MCExpr *getTTypeGlobalReference(const GlobalValue *GV, unsigned Encoding, Mangler &Mang, const TargetMachine &TM, @@ -33,6 +35,11 @@ public: MCSymbol *getCFIPersonalitySymbol(const GlobalValue *GV, Mangler &Mang, const TargetMachine &TM, MachineModuleInfo *MMI) const override; + + const MCExpr *getIndirectSymViaGOTPCRel(const MCSymbol *Sym, + const MCValue &MV, int64_t Offset, + MachineModuleInfo *MMI, + MCStreamer &Streamer) const override; }; } // end namespace llvm diff --git a/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index b1a2914..ed27cf8 100644 --- a/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -1,4 +1,4 @@ -//===-- AArch64TargetTransformInfo.cpp - AArch64 specific TTI pass --------===// +//===-- AArch64TargetTransformInfo.cpp - AArch64 specific TTI -------------===// // // The LLVM Compiler Infrastructure // @@ -6,18 +6,12 @@ // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// -/// \file -/// This file implements a TargetTransformInfo analysis pass specific to the -/// AArch64 target machine. It uses the target's detailed information to provide -/// more precise answers to certain TTI queries, while letting the target -/// independent and default TTI implementations handle the rest. -/// -//===----------------------------------------------------------------------===// -#include "AArch64.h" -#include "AArch64TargetMachine.h" +#include "AArch64TargetTransformInfo.h" #include "MCTargetDesc/AArch64AddressingModes.h" #include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/CodeGen/BasicTTIImpl.h" #include "llvm/Support/Debug.h" #include "llvm/Target/CostTable.h" #include "llvm/Target/TargetLowering.h" @@ -26,130 +20,10 @@ using namespace llvm; #define DEBUG_TYPE "aarch64tti" -// Declare the pass initialization routine locally as target-specific passes -// don't have a target-wide initialization entry point, and so we rely on the -// pass constructor initialization. -namespace llvm { -void initializeAArch64TTIPass(PassRegistry &); -} - -namespace { - -class AArch64TTI final : public ImmutablePass, public TargetTransformInfo { - const AArch64TargetMachine *TM; - const AArch64Subtarget *ST; - const AArch64TargetLowering *TLI; - - /// Estimate the overhead of scalarizing an instruction. Insert and Extract - /// are set if the result needs to be inserted and/or extracted from vectors. - unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const; - -public: - AArch64TTI() : ImmutablePass(ID), TM(nullptr), ST(nullptr), TLI(nullptr) { - llvm_unreachable("This pass cannot be directly constructed"); - } - - AArch64TTI(const AArch64TargetMachine *TM) - : ImmutablePass(ID), TM(TM), ST(TM->getSubtargetImpl()), - TLI(TM->getSubtargetImpl()->getTargetLowering()) { - initializeAArch64TTIPass(*PassRegistry::getPassRegistry()); - } - - void initializePass() override { pushTTIStack(this); } - - void getAnalysisUsage(AnalysisUsage &AU) const override { - TargetTransformInfo::getAnalysisUsage(AU); - } - - /// Pass identification. - static char ID; - - /// Provide necessary pointer adjustments for the two base classes. - void *getAdjustedAnalysisPointer(const void *ID) override { - if (ID == &TargetTransformInfo::ID) - return (TargetTransformInfo *)this; - return this; - } - - /// \name Scalar TTI Implementations - /// @{ - unsigned getIntImmCost(int64_t Val) const; - unsigned getIntImmCost(const APInt &Imm, Type *Ty) const override; - unsigned getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm, - Type *Ty) const override; - unsigned getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, - Type *Ty) const override; - PopcntSupportKind getPopcntSupport(unsigned TyWidth) const override; - - /// @} - - /// \name Vector TTI Implementations - /// @{ - - unsigned getNumberOfRegisters(bool Vector) const override { - if (Vector) { - if (ST->hasNEON()) - return 32; - return 0; - } - return 31; - } - - unsigned getRegisterBitWidth(bool Vector) const override { - if (Vector) { - if (ST->hasNEON()) - return 128; - return 0; - } - return 64; - } - - unsigned getMaxInterleaveFactor() const override; - - unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) const - override; - - unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) const - override; - - unsigned getArithmeticInstrCost( - unsigned Opcode, Type *Ty, OperandValueKind Opd1Info = OK_AnyValue, - OperandValueKind Opd2Info = OK_AnyValue, - OperandValueProperties Opd1PropInfo = OP_None, - OperandValueProperties Opd2PropInfo = OP_None) const override; - - unsigned getAddressComputationCost(Type *Ty, bool IsComplex) const override; - - unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy) const - override; - - unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, - unsigned AddressSpace) const override; - - unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type*> Tys) const override; - - void getUnrollingPreferences(const Function *F, Loop *L, - UnrollingPreferences &UP) const override; - - - /// @} -}; - -} // end anonymous namespace - -INITIALIZE_AG_PASS(AArch64TTI, TargetTransformInfo, "aarch64tti", - "AArch64 Target Transform Info", true, true, false) -char AArch64TTI::ID = 0; - -ImmutablePass * -llvm::createAArch64TargetTransformInfoPass(const AArch64TargetMachine *TM) { - return new AArch64TTI(TM); -} - /// \brief Calculate the cost of materializing a 64-bit value. This helper /// method might only calculate a fraction of a larger immediate. Therefore it /// is valid to return a cost of ZERO. -unsigned AArch64TTI::getIntImmCost(int64_t Val) const { +unsigned AArch64TTIImpl::getIntImmCost(int64_t Val) { // Check if the immediate can be encoded within an instruction. if (Val == 0 || AArch64_AM::isLogicalImmediate(Val, 64)) return 0; @@ -163,7 +37,7 @@ unsigned AArch64TTI::getIntImmCost(int64_t Val) const { } /// \brief Calculate the cost of materializing the given constant. -unsigned AArch64TTI::getIntImmCost(const APInt &Imm, Type *Ty) const { +unsigned AArch64TTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) { assert(Ty->isIntegerTy()); unsigned BitSize = Ty->getPrimitiveSizeInBits(); @@ -187,25 +61,25 @@ unsigned AArch64TTI::getIntImmCost(const APInt &Imm, Type *Ty) const { return std::max(1U, Cost); } -unsigned AArch64TTI::getIntImmCost(unsigned Opcode, unsigned Idx, - const APInt &Imm, Type *Ty) const { +unsigned AArch64TTIImpl::getIntImmCost(unsigned Opcode, unsigned Idx, + const APInt &Imm, Type *Ty) { assert(Ty->isIntegerTy()); unsigned BitSize = Ty->getPrimitiveSizeInBits(); // There is no cost model for constants with a bit size of 0. Return TCC_Free // here, so that constant hoisting will ignore this constant. if (BitSize == 0) - return TCC_Free; + return TTI::TCC_Free; unsigned ImmIdx = ~0U; switch (Opcode) { default: - return TCC_Free; + return TTI::TCC_Free; case Instruction::GetElementPtr: // Always hoist the base address of a GetElementPtr. if (Idx == 0) - return 2 * TCC_Basic; - return TCC_Free; + return 2 * TTI::TCC_Basic; + return TTI::TCC_Free; case Instruction::Store: ImmIdx = 0; break; @@ -227,7 +101,7 @@ unsigned AArch64TTI::getIntImmCost(unsigned Opcode, unsigned Idx, case Instruction::LShr: case Instruction::AShr: if (Idx == 1) - return TCC_Free; + return TTI::TCC_Free; break; case Instruction::Trunc: case Instruction::ZExt: @@ -245,26 +119,27 @@ unsigned AArch64TTI::getIntImmCost(unsigned Opcode, unsigned Idx, if (Idx == ImmIdx) { unsigned NumConstants = (BitSize + 63) / 64; - unsigned Cost = AArch64TTI::getIntImmCost(Imm, Ty); - return (Cost <= NumConstants * TCC_Basic) - ? static_cast<unsigned>(TCC_Free) : Cost; + unsigned Cost = AArch64TTIImpl::getIntImmCost(Imm, Ty); + return (Cost <= NumConstants * TTI::TCC_Basic) + ? static_cast<unsigned>(TTI::TCC_Free) + : Cost; } - return AArch64TTI::getIntImmCost(Imm, Ty); + return AArch64TTIImpl::getIntImmCost(Imm, Ty); } -unsigned AArch64TTI::getIntImmCost(Intrinsic::ID IID, unsigned Idx, - const APInt &Imm, Type *Ty) const { +unsigned AArch64TTIImpl::getIntImmCost(Intrinsic::ID IID, unsigned Idx, + const APInt &Imm, Type *Ty) { assert(Ty->isIntegerTy()); unsigned BitSize = Ty->getPrimitiveSizeInBits(); // There is no cost model for constants with a bit size of 0. Return TCC_Free // here, so that constant hoisting will ignore this constant. if (BitSize == 0) - return TCC_Free; + return TTI::TCC_Free; switch (IID) { default: - return TCC_Free; + return TTI::TCC_Free; case Intrinsic::sadd_with_overflow: case Intrinsic::uadd_with_overflow: case Intrinsic::ssub_with_overflow: @@ -273,35 +148,36 @@ unsigned AArch64TTI::getIntImmCost(Intrinsic::ID IID, unsigned Idx, case Intrinsic::umul_with_overflow: if (Idx == 1) { unsigned NumConstants = (BitSize + 63) / 64; - unsigned Cost = AArch64TTI::getIntImmCost(Imm, Ty); - return (Cost <= NumConstants * TCC_Basic) - ? static_cast<unsigned>(TCC_Free) : Cost; + unsigned Cost = AArch64TTIImpl::getIntImmCost(Imm, Ty); + return (Cost <= NumConstants * TTI::TCC_Basic) + ? static_cast<unsigned>(TTI::TCC_Free) + : Cost; } break; case Intrinsic::experimental_stackmap: if ((Idx < 2) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue()))) - return TCC_Free; + return TTI::TCC_Free; break; case Intrinsic::experimental_patchpoint_void: case Intrinsic::experimental_patchpoint_i64: if ((Idx < 4) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue()))) - return TCC_Free; + return TTI::TCC_Free; break; } - return AArch64TTI::getIntImmCost(Imm, Ty); + return AArch64TTIImpl::getIntImmCost(Imm, Ty); } -AArch64TTI::PopcntSupportKind -AArch64TTI::getPopcntSupport(unsigned TyWidth) const { +TargetTransformInfo::PopcntSupportKind +AArch64TTIImpl::getPopcntSupport(unsigned TyWidth) { assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2"); if (TyWidth == 32 || TyWidth == 64) - return PSK_FastHardware; + return TTI::PSK_FastHardware; // TODO: AArch64TargetLowering::LowerCTPOP() supports 128bit popcount. - return PSK_Software; + return TTI::PSK_Software; } -unsigned AArch64TTI::getCastInstrCost(unsigned Opcode, Type *Dst, - Type *Src) const { +unsigned AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, + Type *Src) { int ISD = TLI->InstructionOpcodeToISD(Opcode); assert(ISD && "Invalid opcode"); @@ -309,7 +185,7 @@ unsigned AArch64TTI::getCastInstrCost(unsigned Opcode, Type *Dst, EVT DstTy = TLI->getValueType(Dst); if (!SrcTy.isSimple() || !DstTy.isSimple()) - return TargetTransformInfo::getCastInstrCost(Opcode, Dst, Src); + return BaseT::getCastInstrCost(Opcode, Dst, Src); static const TypeConversionCostTblEntry<MVT> ConversionTbl[] = { // LowerVectorINT_TO_FP: @@ -380,11 +256,11 @@ unsigned AArch64TTI::getCastInstrCost(unsigned Opcode, Type *Dst, if (Idx != -1) return ConversionTbl[Idx].Cost; - return TargetTransformInfo::getCastInstrCost(Opcode, Dst, Src); + return BaseT::getCastInstrCost(Opcode, Dst, Src); } -unsigned AArch64TTI::getVectorInstrCost(unsigned Opcode, Type *Val, - unsigned Index) const { +unsigned AArch64TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, + unsigned Index) { assert(Val->isVectorTy() && "This must be a vector type"); if (Index != -1U) { @@ -408,10 +284,10 @@ unsigned AArch64TTI::getVectorInstrCost(unsigned Opcode, Type *Val, return 2; } -unsigned AArch64TTI::getArithmeticInstrCost( - unsigned Opcode, Type *Ty, OperandValueKind Opd1Info, - OperandValueKind Opd2Info, OperandValueProperties Opd1PropInfo, - OperandValueProperties Opd2PropInfo) const { +unsigned AArch64TTIImpl::getArithmeticInstrCost( + unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info, + TTI::OperandValueKind Opd2Info, TTI::OperandValueProperties Opd1PropInfo, + TTI::OperandValueProperties Opd2PropInfo) { // Legalize the type. std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Ty); @@ -442,8 +318,8 @@ unsigned AArch64TTI::getArithmeticInstrCost( switch (ISD) { default: - return TargetTransformInfo::getArithmeticInstrCost( - Opcode, Ty, Opd1Info, Opd2Info, Opd1PropInfo, Opd2PropInfo); + return BaseT::getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info, + Opd1PropInfo, Opd2PropInfo); case ISD::ADD: case ISD::MUL: case ISD::XOR: @@ -455,7 +331,7 @@ unsigned AArch64TTI::getArithmeticInstrCost( } } -unsigned AArch64TTI::getAddressComputationCost(Type *Ty, bool IsComplex) const { +unsigned AArch64TTIImpl::getAddressComputationCost(Type *Ty, bool IsComplex) { // Address computations in vectorized code with non-consecutive addresses will // likely result in more instructions compared to scalar code where the // computation can more often be merged into the index mode. The resulting @@ -470,14 +346,14 @@ unsigned AArch64TTI::getAddressComputationCost(Type *Ty, bool IsComplex) const { return 1; } -unsigned AArch64TTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, - Type *CondTy) const { +unsigned AArch64TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, + Type *CondTy) { int ISD = TLI->InstructionOpcodeToISD(Opcode); // We don't lower vector selects well that are wider than the register width. if (ValTy->isVectorTy() && ISD == ISD::SELECT) { // We would need this many instructions to hide the scalarization happening. - unsigned AmortizationCost = 20; + const unsigned AmortizationCost = 20; static const TypeConversionCostTblEntry<MVT::SimpleValueType> VectorSelectTbl[] = { { ISD::SELECT, MVT::v16i1, MVT::v16i16, 16 * AmortizationCost }, @@ -498,12 +374,12 @@ unsigned AArch64TTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, return VectorSelectTbl[Idx].Cost; } } - return TargetTransformInfo::getCmpSelInstrCost(Opcode, ValTy, CondTy); + return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy); } -unsigned AArch64TTI::getMemoryOpCost(unsigned Opcode, Type *Src, - unsigned Alignment, - unsigned AddressSpace) const { +unsigned AArch64TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, + unsigned Alignment, + unsigned AddressSpace) { std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Src); if (Opcode == Instruction::Store && Src->isVectorTy() && Alignment != 16 && @@ -531,7 +407,7 @@ unsigned AArch64TTI::getMemoryOpCost(unsigned Opcode, Type *Src, return LT.first; } -unsigned AArch64TTI::getCostOfKeepingLiveOverCall(ArrayRef<Type*> Tys) const { +unsigned AArch64TTIImpl::getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) { unsigned Cost = 0; for (auto *I : Tys) { if (!I->isVectorTy()) @@ -543,14 +419,103 @@ unsigned AArch64TTI::getCostOfKeepingLiveOverCall(ArrayRef<Type*> Tys) const { return Cost; } -unsigned AArch64TTI::getMaxInterleaveFactor() const { +unsigned AArch64TTIImpl::getMaxInterleaveFactor(unsigned VF) { if (ST->isCortexA57()) return 4; return 2; } -void AArch64TTI::getUnrollingPreferences(const Function *F, Loop *L, - UnrollingPreferences &UP) const { +void AArch64TTIImpl::getUnrollingPreferences(Loop *L, + TTI::UnrollingPreferences &UP) { + // Enable partial unrolling and runtime unrolling. + BaseT::getUnrollingPreferences(L, UP); + + // For inner loop, it is more likely to be a hot one, and the runtime check + // can be promoted out from LICM pass, so the overhead is less, let's try + // a larger threshold to unroll more loops. + if (L->getLoopDepth() > 1) + UP.PartialThreshold *= 2; + // Disable partial & runtime unrolling on -Os. UP.PartialOptSizeThreshold = 0; } + +Value *AArch64TTIImpl::getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, + Type *ExpectedType) { + switch (Inst->getIntrinsicID()) { + default: + return nullptr; + case Intrinsic::aarch64_neon_st2: + case Intrinsic::aarch64_neon_st3: + case Intrinsic::aarch64_neon_st4: { + // Create a struct type + StructType *ST = dyn_cast<StructType>(ExpectedType); + if (!ST) + return nullptr; + unsigned NumElts = Inst->getNumArgOperands() - 1; + if (ST->getNumElements() != NumElts) + return nullptr; + for (unsigned i = 0, e = NumElts; i != e; ++i) { + if (Inst->getArgOperand(i)->getType() != ST->getElementType(i)) + return nullptr; + } + Value *Res = UndefValue::get(ExpectedType); + IRBuilder<> Builder(Inst); + for (unsigned i = 0, e = NumElts; i != e; ++i) { + Value *L = Inst->getArgOperand(i); + Res = Builder.CreateInsertValue(Res, L, i); + } + return Res; + } + case Intrinsic::aarch64_neon_ld2: + case Intrinsic::aarch64_neon_ld3: + case Intrinsic::aarch64_neon_ld4: + if (Inst->getType() == ExpectedType) + return Inst; + return nullptr; + } +} + +bool AArch64TTIImpl::getTgtMemIntrinsic(IntrinsicInst *Inst, + MemIntrinsicInfo &Info) { + switch (Inst->getIntrinsicID()) { + default: + break; + case Intrinsic::aarch64_neon_ld2: + case Intrinsic::aarch64_neon_ld3: + case Intrinsic::aarch64_neon_ld4: + Info.ReadMem = true; + Info.WriteMem = false; + Info.Vol = false; + Info.NumMemRefs = 1; + Info.PtrVal = Inst->getArgOperand(0); + break; + case Intrinsic::aarch64_neon_st2: + case Intrinsic::aarch64_neon_st3: + case Intrinsic::aarch64_neon_st4: + Info.ReadMem = false; + Info.WriteMem = true; + Info.Vol = false; + Info.NumMemRefs = 1; + Info.PtrVal = Inst->getArgOperand(Inst->getNumArgOperands() - 1); + break; + } + + switch (Inst->getIntrinsicID()) { + default: + return false; + case Intrinsic::aarch64_neon_ld2: + case Intrinsic::aarch64_neon_st2: + Info.MatchingId = VECTOR_LDST_TWO_ELEMENTS; + break; + case Intrinsic::aarch64_neon_ld3: + case Intrinsic::aarch64_neon_st3: + Info.MatchingId = VECTOR_LDST_THREE_ELEMENTS; + break; + case Intrinsic::aarch64_neon_ld4: + case Intrinsic::aarch64_neon_st4: + Info.MatchingId = VECTOR_LDST_FOUR_ELEMENTS; + break; + } + return true; +} diff --git a/lib/Target/AArch64/AArch64TargetTransformInfo.h b/lib/Target/AArch64/AArch64TargetTransformInfo.h new file mode 100644 index 0000000..25c22bc --- /dev/null +++ b/lib/Target/AArch64/AArch64TargetTransformInfo.h @@ -0,0 +1,147 @@ +//===-- AArch64TargetTransformInfo.h - AArch64 specific TTI -----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// This file a TargetTransformInfo::Concept conforming object specific to the +/// AArch64 target machine. It uses the target's detailed information to +/// provide more precise answers to certain TTI queries, while letting the +/// target independent and default TTI implementations handle the rest. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_AARCH64_AARCH64TARGETTRANSFORMINFO_H +#define LLVM_LIB_TARGET_AARCH64_AARCH64TARGETTRANSFORMINFO_H + +#include "AArch64.h" +#include "AArch64TargetMachine.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/CodeGen/BasicTTIImpl.h" +#include "llvm/Target/TargetLowering.h" +#include <algorithm> + +namespace llvm { + +class AArch64TTIImpl : public BasicTTIImplBase<AArch64TTIImpl> { + typedef BasicTTIImplBase<AArch64TTIImpl> BaseT; + typedef TargetTransformInfo TTI; + friend BaseT; + + const AArch64TargetMachine *TM; + const AArch64Subtarget *ST; + const AArch64TargetLowering *TLI; + + /// Estimate the overhead of scalarizing an instruction. Insert and Extract + /// are set if the result needs to be inserted and/or extracted from vectors. + unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract); + + const AArch64Subtarget *getST() const { return ST; } + const AArch64TargetLowering *getTLI() const { return TLI; } + + enum MemIntrinsicType { + VECTOR_LDST_TWO_ELEMENTS, + VECTOR_LDST_THREE_ELEMENTS, + VECTOR_LDST_FOUR_ELEMENTS + }; + +public: + explicit AArch64TTIImpl(const AArch64TargetMachine *TM, Function &F) + : BaseT(TM), TM(TM), ST(TM->getSubtargetImpl(F)), + TLI(ST->getTargetLowering()) {} + + // Provide value semantics. MSVC requires that we spell all of these out. + AArch64TTIImpl(const AArch64TTIImpl &Arg) + : BaseT(static_cast<const BaseT &>(Arg)), TM(Arg.TM), ST(Arg.ST), + TLI(Arg.TLI) {} + AArch64TTIImpl(AArch64TTIImpl &&Arg) + : BaseT(std::move(static_cast<BaseT &>(Arg))), TM(std::move(Arg.TM)), + ST(std::move(Arg.ST)), TLI(std::move(Arg.TLI)) {} + AArch64TTIImpl &operator=(const AArch64TTIImpl &RHS) { + BaseT::operator=(static_cast<const BaseT &>(RHS)); + TM = RHS.TM; + ST = RHS.ST; + TLI = RHS.TLI; + return *this; + } + AArch64TTIImpl &operator=(AArch64TTIImpl &&RHS) { + BaseT::operator=(std::move(static_cast<BaseT &>(RHS))); + TM = std::move(RHS.TM); + ST = std::move(RHS.ST); + TLI = std::move(RHS.TLI); + return *this; + } + + /// \name Scalar TTI Implementations + /// @{ + + using BaseT::getIntImmCost; + unsigned getIntImmCost(int64_t Val); + unsigned getIntImmCost(const APInt &Imm, Type *Ty); + unsigned getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm, + Type *Ty); + unsigned getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, + Type *Ty); + TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth); + + /// @} + + /// \name Vector TTI Implementations + /// @{ + + unsigned getNumberOfRegisters(bool Vector) { + if (Vector) { + if (ST->hasNEON()) + return 32; + return 0; + } + return 31; + } + + unsigned getRegisterBitWidth(bool Vector) { + if (Vector) { + if (ST->hasNEON()) + return 128; + return 0; + } + return 64; + } + + unsigned getMaxInterleaveFactor(unsigned VF); + + unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src); + + unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index); + + unsigned getArithmeticInstrCost( + unsigned Opcode, Type *Ty, + TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue, + TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue, + TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None, + TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None); + + unsigned getAddressComputationCost(Type *Ty, bool IsComplex); + + unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy); + + unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, + unsigned AddressSpace); + + unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys); + + void getUnrollingPreferences(Loop *L, TTI::UnrollingPreferences &UP); + + Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, + Type *ExpectedType); + + bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info); + + /// @} +}; + +} // end namespace llvm + +#endif diff --git a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp index 8eb906b..38d34e6 100644 --- a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp +++ b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp @@ -9,6 +9,7 @@ #include "MCTargetDesc/AArch64AddressingModes.h" #include "MCTargetDesc/AArch64MCExpr.h" +#include "MCTargetDesc/AArch64TargetStreamer.h" #include "Utils/AArch64BaseInfo.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/STLExtras.h" @@ -113,11 +114,10 @@ public: #define GET_OPERAND_DIAGNOSTIC_TYPES #include "AArch64GenAsmMatcher.inc" }; - AArch64AsmParser(MCSubtargetInfo &_STI, MCAsmParser &_Parser, - const MCInstrInfo &MII, - const MCTargetOptions &Options) - : MCTargetAsmParser(), STI(_STI) { - MCAsmParserExtension::Initialize(_Parser); + AArch64AsmParser(MCSubtargetInfo &STI, MCAsmParser &Parser, + const MCInstrInfo &MII, const MCTargetOptions &Options) + : MCTargetAsmParser(), STI(STI) { + MCAsmParserExtension::Initialize(Parser); MCStreamer &S = getParser().getStreamer(); if (S.getTargetStreamer() == nullptr) new AArch64TargetStreamer(S); @@ -205,14 +205,16 @@ private: struct BarrierOp { unsigned Val; // Not the enum since not all values have names. + const char *Data; + unsigned Length; }; struct SysRegOp { const char *Data; unsigned Length; - uint64_t FeatureBits; // We need to pass through information about which - // core we are compiling for so that the SysReg - // Mappers can appropriately conditionalize. + uint32_t MRSReg; + uint32_t MSRReg; + uint32_t PStateField; }; struct SysCRImmOp { @@ -221,6 +223,8 @@ private: struct PrefetchOp { unsigned Val; + const char *Data; + unsigned Length; }; struct ShiftExtendOp { @@ -254,8 +258,7 @@ private: MCContext &Ctx; public: - AArch64Operand(KindTy K, MCContext &_Ctx) - : MCParsedAsmOperand(), Kind(K), Ctx(_Ctx) {} + AArch64Operand(KindTy K, MCContext &Ctx) : Kind(K), Ctx(Ctx) {} AArch64Operand(const AArch64Operand &o) : MCParsedAsmOperand(), Ctx(o.Ctx) { Kind = o.Kind; @@ -349,6 +352,11 @@ public: return Barrier.Val; } + StringRef getBarrierName() const { + assert(Kind == k_Barrier && "Invalid access!"); + return StringRef(Barrier.Data, Barrier.Length); + } + unsigned getReg() const override { assert(Kind == k_Register && "Invalid access!"); return Reg.RegNum; @@ -374,11 +382,6 @@ public: return StringRef(SysReg.Data, SysReg.Length); } - uint64_t getSysRegFeatureBits() const { - assert(Kind == k_SysReg && "Invalid access!"); - return SysReg.FeatureBits; - } - unsigned getSysCR() const { assert(Kind == k_SysCR && "Invalid access!"); return SysCRImm.Val; @@ -389,6 +392,11 @@ public: return Prefetch.Val; } + StringRef getPrefetchName() const { + assert(Kind == k_Prefetch && "Invalid access!"); + return StringRef(Prefetch.Data, Prefetch.Length); + } + AArch64_AM::ShiftExtendType getShiftExtendType() const { assert(Kind == k_ShiftExtend && "Invalid access!"); return ShiftExtend.Type; @@ -757,58 +765,47 @@ public: } bool isMovZSymbolG3() const { - static AArch64MCExpr::VariantKind Variants[] = { AArch64MCExpr::VK_ABS_G3 }; - return isMovWSymbol(Variants); + return isMovWSymbol(AArch64MCExpr::VK_ABS_G3); } bool isMovZSymbolG2() const { - static AArch64MCExpr::VariantKind Variants[] = { - AArch64MCExpr::VK_ABS_G2, AArch64MCExpr::VK_ABS_G2_S, - AArch64MCExpr::VK_TPREL_G2, AArch64MCExpr::VK_DTPREL_G2}; - return isMovWSymbol(Variants); + return isMovWSymbol({AArch64MCExpr::VK_ABS_G2, AArch64MCExpr::VK_ABS_G2_S, + AArch64MCExpr::VK_TPREL_G2, + AArch64MCExpr::VK_DTPREL_G2}); } bool isMovZSymbolG1() const { - static AArch64MCExpr::VariantKind Variants[] = { - AArch64MCExpr::VK_ABS_G1, AArch64MCExpr::VK_ABS_G1_S, + return isMovWSymbol({ + AArch64MCExpr::VK_ABS_G1, AArch64MCExpr::VK_ABS_G1_S, AArch64MCExpr::VK_GOTTPREL_G1, AArch64MCExpr::VK_TPREL_G1, AArch64MCExpr::VK_DTPREL_G1, - }; - return isMovWSymbol(Variants); + }); } bool isMovZSymbolG0() const { - static AArch64MCExpr::VariantKind Variants[] = { - AArch64MCExpr::VK_ABS_G0, AArch64MCExpr::VK_ABS_G0_S, - AArch64MCExpr::VK_TPREL_G0, AArch64MCExpr::VK_DTPREL_G0}; - return isMovWSymbol(Variants); + return isMovWSymbol({AArch64MCExpr::VK_ABS_G0, AArch64MCExpr::VK_ABS_G0_S, + AArch64MCExpr::VK_TPREL_G0, + AArch64MCExpr::VK_DTPREL_G0}); } bool isMovKSymbolG3() const { - static AArch64MCExpr::VariantKind Variants[] = { AArch64MCExpr::VK_ABS_G3 }; - return isMovWSymbol(Variants); + return isMovWSymbol(AArch64MCExpr::VK_ABS_G3); } bool isMovKSymbolG2() const { - static AArch64MCExpr::VariantKind Variants[] = { - AArch64MCExpr::VK_ABS_G2_NC}; - return isMovWSymbol(Variants); + return isMovWSymbol(AArch64MCExpr::VK_ABS_G2_NC); } bool isMovKSymbolG1() const { - static AArch64MCExpr::VariantKind Variants[] = { - AArch64MCExpr::VK_ABS_G1_NC, AArch64MCExpr::VK_TPREL_G1_NC, - AArch64MCExpr::VK_DTPREL_G1_NC - }; - return isMovWSymbol(Variants); + return isMovWSymbol({AArch64MCExpr::VK_ABS_G1_NC, + AArch64MCExpr::VK_TPREL_G1_NC, + AArch64MCExpr::VK_DTPREL_G1_NC}); } bool isMovKSymbolG0() const { - static AArch64MCExpr::VariantKind Variants[] = { - AArch64MCExpr::VK_ABS_G0_NC, AArch64MCExpr::VK_GOTTPREL_G0_NC, - AArch64MCExpr::VK_TPREL_G0_NC, AArch64MCExpr::VK_DTPREL_G0_NC - }; - return isMovWSymbol(Variants); + return isMovWSymbol( + {AArch64MCExpr::VK_ABS_G0_NC, AArch64MCExpr::VK_GOTTPREL_G0_NC, + AArch64MCExpr::VK_TPREL_G0_NC, AArch64MCExpr::VK_DTPREL_G0_NC}); } template<int RegWidth, int Shift> @@ -855,28 +852,17 @@ public: bool isMRSSystemRegister() const { if (!isSysReg()) return false; - bool IsKnownRegister; - auto Mapper = AArch64SysReg::MRSMapper(getSysRegFeatureBits()); - Mapper.fromString(getSysReg(), IsKnownRegister); - - return IsKnownRegister; + return SysReg.MRSReg != -1U; } bool isMSRSystemRegister() const { if (!isSysReg()) return false; - bool IsKnownRegister; - auto Mapper = AArch64SysReg::MSRMapper(getSysRegFeatureBits()); - Mapper.fromString(getSysReg(), IsKnownRegister); - - return IsKnownRegister; + return SysReg.MSRReg != -1U; } bool isSystemPStateField() const { if (!isSysReg()) return false; - bool IsKnownRegister; - AArch64PState::PStateMapper().fromString(getSysReg(), IsKnownRegister); - - return IsKnownRegister; + return SysReg.PStateField != -1U; } bool isReg() const override { return Kind == k_Register && !Reg.isVector; } bool isVectorReg() const { return Kind == k_Register && Reg.isVector; } @@ -1113,16 +1099,16 @@ public: void addExpr(MCInst &Inst, const MCExpr *Expr) const { // Add as immediates when possible. Null MCExpr = 0. if (!Expr) - Inst.addOperand(MCOperand::CreateImm(0)); + Inst.addOperand(MCOperand::createImm(0)); else if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr)) - Inst.addOperand(MCOperand::CreateImm(CE->getValue())); + Inst.addOperand(MCOperand::createImm(CE->getValue())); else - Inst.addOperand(MCOperand::CreateExpr(Expr)); + Inst.addOperand(MCOperand::createExpr(Expr)); } void addRegOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); - Inst.addOperand(MCOperand::CreateReg(getReg())); + Inst.addOperand(MCOperand::createReg(getReg())); } void addGPR32as64Operands(MCInst &Inst, unsigned N) const { @@ -1134,26 +1120,26 @@ public: uint32_t Reg = RI->getRegClass(AArch64::GPR32RegClassID).getRegister( RI->getEncodingValue(getReg())); - Inst.addOperand(MCOperand::CreateReg(Reg)); + Inst.addOperand(MCOperand::createReg(Reg)); } void addVectorReg64Operands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); assert( AArch64MCRegisterClasses[AArch64::FPR128RegClassID].contains(getReg())); - Inst.addOperand(MCOperand::CreateReg(AArch64::D0 + getReg() - AArch64::Q0)); + Inst.addOperand(MCOperand::createReg(AArch64::D0 + getReg() - AArch64::Q0)); } void addVectorReg128Operands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); assert( AArch64MCRegisterClasses[AArch64::FPR128RegClassID].contains(getReg())); - Inst.addOperand(MCOperand::CreateReg(getReg())); + Inst.addOperand(MCOperand::createReg(getReg())); } void addVectorRegLoOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); - Inst.addOperand(MCOperand::CreateReg(getReg())); + Inst.addOperand(MCOperand::createReg(getReg())); } template <unsigned NumRegs> @@ -1164,7 +1150,7 @@ public: unsigned FirstReg = FirstRegs[NumRegs - 1]; Inst.addOperand( - MCOperand::CreateReg(FirstReg + getVectorListStart() - AArch64::Q0)); + MCOperand::createReg(FirstReg + getVectorListStart() - AArch64::Q0)); } template <unsigned NumRegs> @@ -1175,32 +1161,32 @@ public: unsigned FirstReg = FirstRegs[NumRegs - 1]; Inst.addOperand( - MCOperand::CreateReg(FirstReg + getVectorListStart() - AArch64::Q0)); + MCOperand::createReg(FirstReg + getVectorListStart() - AArch64::Q0)); } void addVectorIndex1Operands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); - Inst.addOperand(MCOperand::CreateImm(getVectorIndex())); + Inst.addOperand(MCOperand::createImm(getVectorIndex())); } void addVectorIndexBOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); - Inst.addOperand(MCOperand::CreateImm(getVectorIndex())); + Inst.addOperand(MCOperand::createImm(getVectorIndex())); } void addVectorIndexHOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); - Inst.addOperand(MCOperand::CreateImm(getVectorIndex())); + Inst.addOperand(MCOperand::createImm(getVectorIndex())); } void addVectorIndexSOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); - Inst.addOperand(MCOperand::CreateImm(getVectorIndex())); + Inst.addOperand(MCOperand::createImm(getVectorIndex())); } void addVectorIndexDOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); - Inst.addOperand(MCOperand::CreateImm(getVectorIndex())); + Inst.addOperand(MCOperand::createImm(getVectorIndex())); } void addImmOperands(MCInst &Inst, unsigned N) const { @@ -1215,16 +1201,16 @@ public: assert(N == 2 && "Invalid number of operands!"); if (isShiftedImm()) { addExpr(Inst, getShiftedImmVal()); - Inst.addOperand(MCOperand::CreateImm(getShiftedImmShift())); + Inst.addOperand(MCOperand::createImm(getShiftedImmShift())); } else { addExpr(Inst, getImm()); - Inst.addOperand(MCOperand::CreateImm(0)); + Inst.addOperand(MCOperand::createImm(0)); } } void addCondCodeOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); - Inst.addOperand(MCOperand::CreateImm(getCondCode())); + Inst.addOperand(MCOperand::createImm(getCondCode())); } void addAdrpLabelOperands(MCInst &Inst, unsigned N) const { @@ -1233,7 +1219,7 @@ public: if (!MCE) addExpr(Inst, getImm()); else - Inst.addOperand(MCOperand::CreateImm(MCE->getValue() >> 12)); + Inst.addOperand(MCOperand::createImm(MCE->getValue() >> 12)); } void addAdrLabelOperands(MCInst &Inst, unsigned N) const { @@ -1246,119 +1232,119 @@ public: const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm()); if (!MCE) { - Inst.addOperand(MCOperand::CreateExpr(getImm())); + Inst.addOperand(MCOperand::createExpr(getImm())); return; } - Inst.addOperand(MCOperand::CreateImm(MCE->getValue() / Scale)); + Inst.addOperand(MCOperand::createImm(MCE->getValue() / Scale)); } void addSImm9Operands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); const MCConstantExpr *MCE = cast<MCConstantExpr>(getImm()); - Inst.addOperand(MCOperand::CreateImm(MCE->getValue())); + Inst.addOperand(MCOperand::createImm(MCE->getValue())); } void addSImm7s4Operands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); const MCConstantExpr *MCE = cast<MCConstantExpr>(getImm()); - Inst.addOperand(MCOperand::CreateImm(MCE->getValue() / 4)); + Inst.addOperand(MCOperand::createImm(MCE->getValue() / 4)); } void addSImm7s8Operands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); const MCConstantExpr *MCE = cast<MCConstantExpr>(getImm()); - Inst.addOperand(MCOperand::CreateImm(MCE->getValue() / 8)); + Inst.addOperand(MCOperand::createImm(MCE->getValue() / 8)); } void addSImm7s16Operands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); const MCConstantExpr *MCE = cast<MCConstantExpr>(getImm()); - Inst.addOperand(MCOperand::CreateImm(MCE->getValue() / 16)); + Inst.addOperand(MCOperand::createImm(MCE->getValue() / 16)); } void addImm0_7Operands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); const MCConstantExpr *MCE = cast<MCConstantExpr>(getImm()); - Inst.addOperand(MCOperand::CreateImm(MCE->getValue())); + Inst.addOperand(MCOperand::createImm(MCE->getValue())); } void addImm1_8Operands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); const MCConstantExpr *MCE = cast<MCConstantExpr>(getImm()); - Inst.addOperand(MCOperand::CreateImm(MCE->getValue())); + Inst.addOperand(MCOperand::createImm(MCE->getValue())); } void addImm0_15Operands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); const MCConstantExpr *MCE = cast<MCConstantExpr>(getImm()); - Inst.addOperand(MCOperand::CreateImm(MCE->getValue())); + Inst.addOperand(MCOperand::createImm(MCE->getValue())); } void addImm1_16Operands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); const MCConstantExpr *MCE = cast<MCConstantExpr>(getImm()); assert(MCE && "Invalid constant immediate operand!"); - Inst.addOperand(MCOperand::CreateImm(MCE->getValue())); + Inst.addOperand(MCOperand::createImm(MCE->getValue())); } void addImm0_31Operands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); const MCConstantExpr *MCE = cast<MCConstantExpr>(getImm()); - Inst.addOperand(MCOperand::CreateImm(MCE->getValue())); + Inst.addOperand(MCOperand::createImm(MCE->getValue())); } void addImm1_31Operands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); const MCConstantExpr *MCE = cast<MCConstantExpr>(getImm()); - Inst.addOperand(MCOperand::CreateImm(MCE->getValue())); + Inst.addOperand(MCOperand::createImm(MCE->getValue())); } void addImm1_32Operands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); const MCConstantExpr *MCE = cast<MCConstantExpr>(getImm()); - Inst.addOperand(MCOperand::CreateImm(MCE->getValue())); + Inst.addOperand(MCOperand::createImm(MCE->getValue())); } void addImm0_63Operands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); const MCConstantExpr *MCE = cast<MCConstantExpr>(getImm()); - Inst.addOperand(MCOperand::CreateImm(MCE->getValue())); + Inst.addOperand(MCOperand::createImm(MCE->getValue())); } void addImm1_63Operands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); const MCConstantExpr *MCE = cast<MCConstantExpr>(getImm()); - Inst.addOperand(MCOperand::CreateImm(MCE->getValue())); + Inst.addOperand(MCOperand::createImm(MCE->getValue())); } void addImm1_64Operands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); const MCConstantExpr *MCE = cast<MCConstantExpr>(getImm()); - Inst.addOperand(MCOperand::CreateImm(MCE->getValue())); + Inst.addOperand(MCOperand::createImm(MCE->getValue())); } void addImm0_127Operands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); const MCConstantExpr *MCE = cast<MCConstantExpr>(getImm()); - Inst.addOperand(MCOperand::CreateImm(MCE->getValue())); + Inst.addOperand(MCOperand::createImm(MCE->getValue())); } void addImm0_255Operands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); const MCConstantExpr *MCE = cast<MCConstantExpr>(getImm()); - Inst.addOperand(MCOperand::CreateImm(MCE->getValue())); + Inst.addOperand(MCOperand::createImm(MCE->getValue())); } void addImm0_65535Operands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); const MCConstantExpr *MCE = cast<MCConstantExpr>(getImm()); - Inst.addOperand(MCOperand::CreateImm(MCE->getValue())); + Inst.addOperand(MCOperand::createImm(MCE->getValue())); } void addImm32_63Operands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); const MCConstantExpr *MCE = cast<MCConstantExpr>(getImm()); - Inst.addOperand(MCOperand::CreateImm(MCE->getValue())); + Inst.addOperand(MCOperand::createImm(MCE->getValue())); } void addLogicalImm32Operands(MCInst &Inst, unsigned N) const { @@ -1366,14 +1352,14 @@ public: const MCConstantExpr *MCE = cast<MCConstantExpr>(getImm()); uint64_t encoding = AArch64_AM::encodeLogicalImmediate(MCE->getValue() & 0xFFFFFFFF, 32); - Inst.addOperand(MCOperand::CreateImm(encoding)); + Inst.addOperand(MCOperand::createImm(encoding)); } void addLogicalImm64Operands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); const MCConstantExpr *MCE = cast<MCConstantExpr>(getImm()); uint64_t encoding = AArch64_AM::encodeLogicalImmediate(MCE->getValue(), 64); - Inst.addOperand(MCOperand::CreateImm(encoding)); + Inst.addOperand(MCOperand::createImm(encoding)); } void addLogicalImm32NotOperands(MCInst &Inst, unsigned N) const { @@ -1381,7 +1367,7 @@ public: const MCConstantExpr *MCE = cast<MCConstantExpr>(getImm()); int64_t Val = ~MCE->getValue() & 0xFFFFFFFF; uint64_t encoding = AArch64_AM::encodeLogicalImmediate(Val, 32); - Inst.addOperand(MCOperand::CreateImm(encoding)); + Inst.addOperand(MCOperand::createImm(encoding)); } void addLogicalImm64NotOperands(MCInst &Inst, unsigned N) const { @@ -1389,14 +1375,14 @@ public: const MCConstantExpr *MCE = cast<MCConstantExpr>(getImm()); uint64_t encoding = AArch64_AM::encodeLogicalImmediate(~MCE->getValue(), 64); - Inst.addOperand(MCOperand::CreateImm(encoding)); + Inst.addOperand(MCOperand::createImm(encoding)); } void addSIMDImmType10Operands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); const MCConstantExpr *MCE = cast<MCConstantExpr>(getImm()); uint64_t encoding = AArch64_AM::encodeAdvSIMDModImmType10(MCE->getValue()); - Inst.addOperand(MCOperand::CreateImm(encoding)); + Inst.addOperand(MCOperand::createImm(encoding)); } void addBranchTarget26Operands(MCInst &Inst, unsigned N) const { @@ -1410,7 +1396,7 @@ public: return; } assert(MCE && "Invalid constant immediate operand!"); - Inst.addOperand(MCOperand::CreateImm(MCE->getValue() >> 2)); + Inst.addOperand(MCOperand::createImm(MCE->getValue() >> 2)); } void addPCRelLabel19Operands(MCInst &Inst, unsigned N) const { @@ -1424,7 +1410,7 @@ public: return; } assert(MCE && "Invalid constant immediate operand!"); - Inst.addOperand(MCOperand::CreateImm(MCE->getValue() >> 2)); + Inst.addOperand(MCOperand::createImm(MCE->getValue() >> 2)); } void addBranchTarget14Operands(MCInst &Inst, unsigned N) const { @@ -1438,64 +1424,52 @@ public: return; } assert(MCE && "Invalid constant immediate operand!"); - Inst.addOperand(MCOperand::CreateImm(MCE->getValue() >> 2)); + Inst.addOperand(MCOperand::createImm(MCE->getValue() >> 2)); } void addFPImmOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); - Inst.addOperand(MCOperand::CreateImm(getFPImm())); + Inst.addOperand(MCOperand::createImm(getFPImm())); } void addBarrierOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); - Inst.addOperand(MCOperand::CreateImm(getBarrier())); + Inst.addOperand(MCOperand::createImm(getBarrier())); } void addMRSSystemRegisterOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); - bool Valid; - auto Mapper = AArch64SysReg::MRSMapper(getSysRegFeatureBits()); - uint32_t Bits = Mapper.fromString(getSysReg(), Valid); - - Inst.addOperand(MCOperand::CreateImm(Bits)); + Inst.addOperand(MCOperand::createImm(SysReg.MRSReg)); } void addMSRSystemRegisterOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); - bool Valid; - auto Mapper = AArch64SysReg::MSRMapper(getSysRegFeatureBits()); - uint32_t Bits = Mapper.fromString(getSysReg(), Valid); - - Inst.addOperand(MCOperand::CreateImm(Bits)); + Inst.addOperand(MCOperand::createImm(SysReg.MSRReg)); } void addSystemPStateFieldOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); - bool Valid; - uint32_t Bits = - AArch64PState::PStateMapper().fromString(getSysReg(), Valid); - - Inst.addOperand(MCOperand::CreateImm(Bits)); + Inst.addOperand(MCOperand::createImm(SysReg.PStateField)); } void addSysCROperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); - Inst.addOperand(MCOperand::CreateImm(getSysCR())); + Inst.addOperand(MCOperand::createImm(getSysCR())); } void addPrefetchOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); - Inst.addOperand(MCOperand::CreateImm(getPrefetch())); + Inst.addOperand(MCOperand::createImm(getPrefetch())); } void addShifterOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); unsigned Imm = AArch64_AM::getShifterImm(getShiftExtendType(), getShiftExtendAmount()); - Inst.addOperand(MCOperand::CreateImm(Imm)); + Inst.addOperand(MCOperand::createImm(Imm)); } void addExtendOperands(MCInst &Inst, unsigned N) const { @@ -1503,7 +1477,7 @@ public: AArch64_AM::ShiftExtendType ET = getShiftExtendType(); if (ET == AArch64_AM::LSL) ET = AArch64_AM::UXTW; unsigned Imm = AArch64_AM::getArithExtendImm(ET, getShiftExtendAmount()); - Inst.addOperand(MCOperand::CreateImm(Imm)); + Inst.addOperand(MCOperand::createImm(Imm)); } void addExtend64Operands(MCInst &Inst, unsigned N) const { @@ -1511,15 +1485,15 @@ public: AArch64_AM::ShiftExtendType ET = getShiftExtendType(); if (ET == AArch64_AM::LSL) ET = AArch64_AM::UXTX; unsigned Imm = AArch64_AM::getArithExtendImm(ET, getShiftExtendAmount()); - Inst.addOperand(MCOperand::CreateImm(Imm)); + Inst.addOperand(MCOperand::createImm(Imm)); } void addMemExtendOperands(MCInst &Inst, unsigned N) const { assert(N == 2 && "Invalid number of operands!"); AArch64_AM::ShiftExtendType ET = getShiftExtendType(); bool IsSigned = ET == AArch64_AM::SXTW || ET == AArch64_AM::SXTX; - Inst.addOperand(MCOperand::CreateImm(IsSigned)); - Inst.addOperand(MCOperand::CreateImm(getShiftExtendAmount() != 0)); + Inst.addOperand(MCOperand::createImm(IsSigned)); + Inst.addOperand(MCOperand::createImm(getShiftExtendAmount() != 0)); } // For 8-bit load/store instructions with a register offset, both the @@ -1530,8 +1504,8 @@ public: assert(N == 2 && "Invalid number of operands!"); AArch64_AM::ShiftExtendType ET = getShiftExtendType(); bool IsSigned = ET == AArch64_AM::SXTW || ET == AArch64_AM::SXTX; - Inst.addOperand(MCOperand::CreateImm(IsSigned)); - Inst.addOperand(MCOperand::CreateImm(hasShiftExtendAmount())); + Inst.addOperand(MCOperand::createImm(IsSigned)); + Inst.addOperand(MCOperand::createImm(hasShiftExtendAmount())); } template<int Shift> @@ -1540,7 +1514,7 @@ public: const MCConstantExpr *CE = cast<MCConstantExpr>(getImm()); uint64_t Value = CE->getValue(); - Inst.addOperand(MCOperand::CreateImm((Value >> Shift) & 0xffff)); + Inst.addOperand(MCOperand::createImm((Value >> Shift) & 0xffff)); } template<int Shift> @@ -1549,7 +1523,7 @@ public: const MCConstantExpr *CE = cast<MCConstantExpr>(getImm()); uint64_t Value = CE->getValue(); - Inst.addOperand(MCOperand::CreateImm((~Value >> Shift) & 0xffff)); + Inst.addOperand(MCOperand::createImm((~Value >> Shift) & 0xffff)); } void print(raw_ostream &OS) const override; @@ -1636,21 +1610,30 @@ public: return Op; } - static std::unique_ptr<AArch64Operand> CreateBarrier(unsigned Val, SMLoc S, + static std::unique_ptr<AArch64Operand> CreateBarrier(unsigned Val, + StringRef Str, + SMLoc S, MCContext &Ctx) { auto Op = make_unique<AArch64Operand>(k_Barrier, Ctx); Op->Barrier.Val = Val; + Op->Barrier.Data = Str.data(); + Op->Barrier.Length = Str.size(); Op->StartLoc = S; Op->EndLoc = S; return Op; } - static std::unique_ptr<AArch64Operand> - CreateSysReg(StringRef Str, SMLoc S, uint64_t FeatureBits, MCContext &Ctx) { + static std::unique_ptr<AArch64Operand> CreateSysReg(StringRef Str, SMLoc S, + uint32_t MRSReg, + uint32_t MSRReg, + uint32_t PStateField, + MCContext &Ctx) { auto Op = make_unique<AArch64Operand>(k_SysReg, Ctx); Op->SysReg.Data = Str.data(); Op->SysReg.Length = Str.size(); - Op->SysReg.FeatureBits = FeatureBits; + Op->SysReg.MRSReg = MRSReg; + Op->SysReg.MSRReg = MSRReg; + Op->SysReg.PStateField = PStateField; Op->StartLoc = S; Op->EndLoc = S; return Op; @@ -1665,10 +1648,14 @@ public: return Op; } - static std::unique_ptr<AArch64Operand> CreatePrefetch(unsigned Val, SMLoc S, + static std::unique_ptr<AArch64Operand> CreatePrefetch(unsigned Val, + StringRef Str, + SMLoc S, MCContext &Ctx) { auto Op = make_unique<AArch64Operand>(k_Prefetch, Ctx); Op->Prefetch.Val = Val; + Op->Barrier.Data = Str.data(); + Op->Barrier.Length = Str.size(); Op->StartLoc = S; Op->EndLoc = S; return Op; @@ -1696,21 +1683,20 @@ void AArch64Operand::print(raw_ostream &OS) const { << AArch64_AM::getFPImmFloat(getFPImm()) << ") >"; break; case k_Barrier: { - bool Valid; - StringRef Name = AArch64DB::DBarrierMapper().toString(getBarrier(), Valid); - if (Valid) + StringRef Name = getBarrierName(); + if (!Name.empty()) OS << "<barrier " << Name << ">"; else OS << "<barrier invalid #" << getBarrier() << ">"; break; } case k_Immediate: - getImm()->print(OS); + OS << *getImm(); break; case k_ShiftedImm: { unsigned Shift = getShiftedImmShift(); OS << "<shiftedimm "; - getShiftedImmVal()->print(OS); + OS << *getShiftedImmVal(); OS << ", lsl #" << AArch64_AM::getShiftValue(Shift) << ">"; break; } @@ -1741,9 +1727,8 @@ void AArch64Operand::print(raw_ostream &OS) const { OS << "c" << getSysCR(); break; case k_Prefetch: { - bool Valid; - StringRef Name = AArch64PRFM::PRFMMapper().toString(getPrefetch(), Valid); - if (Valid) + StringRef Name = getPrefetchName(); + if (!Name.empty()) OS << "<prfop " << Name << ">"; else OS << "<prfop invalid #" << getPrefetch() << ">"; @@ -1986,7 +1971,12 @@ AArch64AsmParser::tryParsePrefetch(OperandVector &Operands) { return MatchOperand_ParseFail; } - Operands.push_back(AArch64Operand::CreatePrefetch(prfop, S, getContext())); + bool Valid; + auto Mapper = AArch64PRFM::PRFMMapper(); + StringRef Name = + Mapper.toString(MCE->getValue(), STI.getFeatureBits(), Valid); + Operands.push_back(AArch64Operand::CreatePrefetch(prfop, Name, + S, getContext())); return MatchOperand_Success; } @@ -1996,14 +1986,17 @@ AArch64AsmParser::tryParsePrefetch(OperandVector &Operands) { } bool Valid; - unsigned prfop = AArch64PRFM::PRFMMapper().fromString(Tok.getString(), Valid); + auto Mapper = AArch64PRFM::PRFMMapper(); + unsigned prfop = + Mapper.fromString(Tok.getString(), STI.getFeatureBits(), Valid); if (!Valid) { TokError("pre-fetch hint expected"); return MatchOperand_ParseFail; } Parser.Lex(); // Eat identifier token. - Operands.push_back(AArch64Operand::CreatePrefetch(prfop, S, getContext())); + Operands.push_back(AArch64Operand::CreatePrefetch(prfop, Tok.getString(), + S, getContext())); return MatchOperand_Success; } @@ -2100,15 +2093,16 @@ AArch64AsmParser::tryParseFPImm(OperandVector &Operands) { const AsmToken &Tok = Parser.getTok(); if (Tok.is(AsmToken::Real)) { APFloat RealVal(APFloat::IEEEdouble, Tok.getString()); + if (isNegative) + RealVal.changeSign(); + uint64_t IntVal = RealVal.bitcastToAPInt().getZExtValue(); - // If we had a '-' in front, toggle the sign bit. - IntVal ^= (uint64_t)isNegative << 63; int Val = AArch64_AM::getFP64Imm(APInt(64, IntVal)); Parser.Lex(); // Eat the token. // Check for out of range values. As an exception, we let Zero through, // as we handle that special case in post-processing before matching in // order to use the zero register for it. - if (Val == -1 && !RealVal.isZero()) { + if (Val == -1 && !RealVal.isPosZero()) { TokError("expected compatible register or floating-point constant"); return MatchOperand_ParseFail; } @@ -2605,8 +2599,12 @@ AArch64AsmParser::tryParseBarrierOperand(OperandVector &Operands) { Error(ExprLoc, "barrier operand out of range"); return MatchOperand_ParseFail; } - Operands.push_back( - AArch64Operand::CreateBarrier(MCE->getValue(), ExprLoc, getContext())); + bool Valid; + auto Mapper = AArch64DB::DBarrierMapper(); + StringRef Name = + Mapper.toString(MCE->getValue(), STI.getFeatureBits(), Valid); + Operands.push_back( AArch64Operand::CreateBarrier(MCE->getValue(), Name, + ExprLoc, getContext())); return MatchOperand_Success; } @@ -2616,7 +2614,9 @@ AArch64AsmParser::tryParseBarrierOperand(OperandVector &Operands) { } bool Valid; - unsigned Opt = AArch64DB::DBarrierMapper().fromString(Tok.getString(), Valid); + auto Mapper = AArch64DB::DBarrierMapper(); + unsigned Opt = + Mapper.fromString(Tok.getString(), STI.getFeatureBits(), Valid); if (!Valid) { TokError("invalid barrier option name"); return MatchOperand_ParseFail; @@ -2628,8 +2628,8 @@ AArch64AsmParser::tryParseBarrierOperand(OperandVector &Operands) { return MatchOperand_ParseFail; } - Operands.push_back( - AArch64Operand::CreateBarrier(Opt, getLoc(), getContext())); + Operands.push_back( AArch64Operand::CreateBarrier(Opt, Tok.getString(), + getLoc(), getContext())); Parser.Lex(); // Consume the option return MatchOperand_Success; @@ -2643,8 +2643,27 @@ AArch64AsmParser::tryParseSysReg(OperandVector &Operands) { if (Tok.isNot(AsmToken::Identifier)) return MatchOperand_NoMatch; - Operands.push_back(AArch64Operand::CreateSysReg(Tok.getString(), getLoc(), - STI.getFeatureBits(), getContext())); + bool IsKnown; + auto MRSMapper = AArch64SysReg::MRSMapper(); + uint32_t MRSReg = MRSMapper.fromString(Tok.getString(), STI.getFeatureBits(), + IsKnown); + assert(IsKnown == (MRSReg != -1U) && + "register should be -1 if and only if it's unknown"); + + auto MSRMapper = AArch64SysReg::MSRMapper(); + uint32_t MSRReg = MSRMapper.fromString(Tok.getString(), STI.getFeatureBits(), + IsKnown); + assert(IsKnown == (MSRReg != -1U) && + "register should be -1 if and only if it's unknown"); + + auto PStateMapper = AArch64PState::PStateMapper(); + uint32_t PStateField = + PStateMapper.fromString(Tok.getString(), STI.getFeatureBits(), IsKnown); + assert(IsKnown == (PStateField != -1U) && + "register should be -1 if and only if it's unknown"); + + Operands.push_back(AArch64Operand::CreateSysReg( + Tok.getString(), getLoc(), MRSReg, MSRReg, PStateField, getContext())); Parser.Lex(); // Eat identifier return MatchOperand_Success; @@ -3626,6 +3645,60 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, Op3.getEndLoc(), getContext()); } } + } else if (NumOperands == 4 && Tok == "bfc") { + // FIXME: Horrible hack to handle BFC->BFM alias. + AArch64Operand &Op1 = static_cast<AArch64Operand &>(*Operands[1]); + AArch64Operand LSBOp = static_cast<AArch64Operand &>(*Operands[2]); + AArch64Operand WidthOp = static_cast<AArch64Operand &>(*Operands[3]); + + if (Op1.isReg() && LSBOp.isImm() && WidthOp.isImm()) { + const MCConstantExpr *LSBCE = dyn_cast<MCConstantExpr>(LSBOp.getImm()); + const MCConstantExpr *WidthCE = dyn_cast<MCConstantExpr>(WidthOp.getImm()); + + if (LSBCE && WidthCE) { + uint64_t LSB = LSBCE->getValue(); + uint64_t Width = WidthCE->getValue(); + + uint64_t RegWidth = 0; + if (AArch64MCRegisterClasses[AArch64::GPR64allRegClassID].contains( + Op1.getReg())) + RegWidth = 64; + else + RegWidth = 32; + + if (LSB >= RegWidth) + return Error(LSBOp.getStartLoc(), + "expected integer in range [0, 31]"); + if (Width < 1 || Width > RegWidth) + return Error(WidthOp.getStartLoc(), + "expected integer in range [1, 32]"); + + uint64_t ImmR = 0; + if (RegWidth == 32) + ImmR = (32 - LSB) & 0x1f; + else + ImmR = (64 - LSB) & 0x3f; + + uint64_t ImmS = Width - 1; + + if (ImmR != 0 && ImmS >= ImmR) + return Error(WidthOp.getStartLoc(), + "requested insert overflows register"); + + const MCExpr *ImmRExpr = MCConstantExpr::Create(ImmR, getContext()); + const MCExpr *ImmSExpr = MCConstantExpr::Create(ImmS, getContext()); + Operands[0] = AArch64Operand::CreateToken( + "bfm", false, Op.getStartLoc(), getContext()); + Operands[2] = AArch64Operand::CreateReg( + RegWidth == 32 ? AArch64::WZR : AArch64::XZR, false, SMLoc(), + SMLoc(), getContext()); + Operands[3] = AArch64Operand::CreateImm( + ImmRExpr, LSBOp.getStartLoc(), LSBOp.getEndLoc(), getContext()); + Operands.emplace_back( + AArch64Operand::CreateImm(ImmSExpr, WidthOp.getStartLoc(), + WidthOp.getEndLoc(), getContext())); + } + } } else if (NumOperands == 5) { // FIXME: Horrible hack to handle the BFI -> BFM, SBFIZ->SBFM, and // UBFIZ -> UBFM aliases. @@ -3657,8 +3730,7 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, "expected integer in range [1, 32]"); uint64_t NewOp3Val = 0; - if (AArch64MCRegisterClasses[AArch64::GPR32allRegClassID].contains( - Op1.getReg())) + if (RegWidth == 32) NewOp3Val = (32 - Op3Val) & 0x1f; else NewOp3Val = (64 - Op3Val) & 0x3f; @@ -4033,13 +4105,13 @@ bool AArch64AsmParser::parseDirectiveTLSDescCall(SMLoc L) { if (getParser().parseIdentifier(Name)) return Error(L, "expected symbol after directive"); - MCSymbol *Sym = getContext().GetOrCreateSymbol(Name); + MCSymbol *Sym = getContext().getOrCreateSymbol(Name); const MCExpr *Expr = MCSymbolRefExpr::Create(Sym, getContext()); Expr = AArch64MCExpr::Create(Expr, AArch64MCExpr::VK_TLSDESC, getContext()); MCInst Inst; Inst.setOpcode(AArch64::TLSDESCCALL); - Inst.addOperand(MCOperand::CreateExpr(Expr)); + Inst.addOperand(MCOperand::createExpr(Expr)); getParser().getStreamer().EmitInstruction(Inst, STI); return false; @@ -4082,7 +4154,7 @@ bool AArch64AsmParser::parseDirectiveLOH(StringRef IDVal, SMLoc Loc) { StringRef Name; if (getParser().parseIdentifier(Name)) return TokError("expected identifier in directive"); - Args.push_back(getContext().GetOrCreateSymbol(Name)); + Args.push_back(getContext().getOrCreateSymbol(Name)); if (Idx + 1 == NbArgs) break; @@ -4139,7 +4211,7 @@ bool AArch64AsmParser::parseDirectiveReq(StringRef Name, SMLoc L) { Parser.Lex(); // Consume the EndOfStatement auto pair = std::make_pair(IsVector, RegNum); - if (!RegisterReqs.insert(std::make_pair(Name, pair)).second) + if (RegisterReqs.insert(std::make_pair(Name, pair)).first->second != pair) Warning(L, "ignoring redefinition of register alias '" + Name + "'"); return true; diff --git a/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp b/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp index 878e29c..a1ed703 100644 --- a/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp +++ b/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp @@ -221,13 +221,11 @@ DecodeStatus AArch64Disassembler::getInstruction(MCInst &MI, uint64_t &Size, static MCSymbolizer * createAArch64ExternalSymbolizer(StringRef TT, LLVMOpInfoCallback GetOpInfo, - LLVMSymbolLookupCallback SymbolLookUp, - void *DisInfo, MCContext *Ctx, - MCRelocationInfo *RelInfo) { - return new llvm::AArch64ExternalSymbolizer( - *Ctx, - std::unique_ptr<MCRelocationInfo>(RelInfo), - GetOpInfo, SymbolLookUp, DisInfo); + LLVMSymbolLookupCallback SymbolLookUp, + void *DisInfo, MCContext *Ctx, + std::unique_ptr<MCRelocationInfo> &&RelInfo) { + return new llvm::AArch64ExternalSymbolizer(*Ctx, move(RelInfo), GetOpInfo, + SymbolLookUp, DisInfo); } extern "C" void LLVMInitializeAArch64Disassembler() { @@ -263,7 +261,7 @@ static DecodeStatus DecodeFPR128RegisterClass(MCInst &Inst, unsigned RegNo, return Fail; unsigned Register = FPR128DecoderTable[RegNo]; - Inst.addOperand(MCOperand::CreateReg(Register)); + Inst.addOperand(MCOperand::createReg(Register)); return Success; } @@ -292,7 +290,7 @@ static DecodeStatus DecodeFPR64RegisterClass(MCInst &Inst, unsigned RegNo, return Fail; unsigned Register = FPR64DecoderTable[RegNo]; - Inst.addOperand(MCOperand::CreateReg(Register)); + Inst.addOperand(MCOperand::createReg(Register)); return Success; } @@ -313,7 +311,7 @@ static DecodeStatus DecodeFPR32RegisterClass(MCInst &Inst, unsigned RegNo, return Fail; unsigned Register = FPR32DecoderTable[RegNo]; - Inst.addOperand(MCOperand::CreateReg(Register)); + Inst.addOperand(MCOperand::createReg(Register)); return Success; } @@ -334,7 +332,7 @@ static DecodeStatus DecodeFPR16RegisterClass(MCInst &Inst, unsigned RegNo, return Fail; unsigned Register = FPR16DecoderTable[RegNo]; - Inst.addOperand(MCOperand::CreateReg(Register)); + Inst.addOperand(MCOperand::createReg(Register)); return Success; } @@ -355,7 +353,7 @@ static DecodeStatus DecodeFPR8RegisterClass(MCInst &Inst, unsigned RegNo, return Fail; unsigned Register = FPR8DecoderTable[RegNo]; - Inst.addOperand(MCOperand::CreateReg(Register)); + Inst.addOperand(MCOperand::createReg(Register)); return Success; } @@ -376,7 +374,7 @@ static DecodeStatus DecodeGPR64RegisterClass(MCInst &Inst, unsigned RegNo, return Fail; unsigned Register = GPR64DecoderTable[RegNo]; - Inst.addOperand(MCOperand::CreateReg(Register)); + Inst.addOperand(MCOperand::createReg(Register)); return Success; } @@ -388,7 +386,7 @@ static DecodeStatus DecodeGPR64spRegisterClass(MCInst &Inst, unsigned RegNo, unsigned Register = GPR64DecoderTable[RegNo]; if (Register == AArch64::XZR) Register = AArch64::SP; - Inst.addOperand(MCOperand::CreateReg(Register)); + Inst.addOperand(MCOperand::createReg(Register)); return Success; } @@ -409,7 +407,7 @@ static DecodeStatus DecodeGPR32RegisterClass(MCInst &Inst, unsigned RegNo, return Fail; unsigned Register = GPR32DecoderTable[RegNo]; - Inst.addOperand(MCOperand::CreateReg(Register)); + Inst.addOperand(MCOperand::createReg(Register)); return Success; } @@ -422,7 +420,7 @@ static DecodeStatus DecodeGPR32spRegisterClass(MCInst &Inst, unsigned RegNo, unsigned Register = GPR32DecoderTable[RegNo]; if (Register == AArch64::WZR) Register = AArch64::WSP; - Inst.addOperand(MCOperand::CreateReg(Register)); + Inst.addOperand(MCOperand::createReg(Register)); return Success; } @@ -443,7 +441,7 @@ static DecodeStatus DecodeVectorRegisterClass(MCInst &Inst, unsigned RegNo, return Fail; unsigned Register = VectorDecoderTable[RegNo]; - Inst.addOperand(MCOperand::CreateReg(Register)); + Inst.addOperand(MCOperand::createReg(Register)); return Success; } @@ -463,7 +461,7 @@ static DecodeStatus DecodeQQRegisterClass(MCInst &Inst, unsigned RegNo, if (RegNo > 31) return Fail; unsigned Register = QQDecoderTable[RegNo]; - Inst.addOperand(MCOperand::CreateReg(Register)); + Inst.addOperand(MCOperand::createReg(Register)); return Success; } @@ -486,7 +484,7 @@ static DecodeStatus DecodeQQQRegisterClass(MCInst &Inst, unsigned RegNo, if (RegNo > 31) return Fail; unsigned Register = QQQDecoderTable[RegNo]; - Inst.addOperand(MCOperand::CreateReg(Register)); + Inst.addOperand(MCOperand::createReg(Register)); return Success; } @@ -510,7 +508,7 @@ static DecodeStatus DecodeQQQQRegisterClass(MCInst &Inst, unsigned RegNo, if (RegNo > 31) return Fail; unsigned Register = QQQQDecoderTable[RegNo]; - Inst.addOperand(MCOperand::CreateReg(Register)); + Inst.addOperand(MCOperand::createReg(Register)); return Success; } @@ -530,7 +528,7 @@ static DecodeStatus DecodeDDRegisterClass(MCInst &Inst, unsigned RegNo, if (RegNo > 31) return Fail; unsigned Register = DDDecoderTable[RegNo]; - Inst.addOperand(MCOperand::CreateReg(Register)); + Inst.addOperand(MCOperand::createReg(Register)); return Success; } @@ -553,7 +551,7 @@ static DecodeStatus DecodeDDDRegisterClass(MCInst &Inst, unsigned RegNo, if (RegNo > 31) return Fail; unsigned Register = DDDDecoderTable[RegNo]; - Inst.addOperand(MCOperand::CreateReg(Register)); + Inst.addOperand(MCOperand::createReg(Register)); return Success; } @@ -577,7 +575,7 @@ static DecodeStatus DecodeDDDDRegisterClass(MCInst &Inst, unsigned RegNo, if (RegNo > 31) return Fail; unsigned Register = DDDDDecoderTable[RegNo]; - Inst.addOperand(MCOperand::CreateReg(Register)); + Inst.addOperand(MCOperand::createReg(Register)); return Success; } @@ -586,14 +584,14 @@ static DecodeStatus DecodeFixedPointScaleImm32(llvm::MCInst &Inst, unsigned Imm, const void *Decoder) { // scale{5} is asserted as 1 in tblgen. Imm |= 0x20; - Inst.addOperand(MCOperand::CreateImm(64 - Imm)); + Inst.addOperand(MCOperand::createImm(64 - Imm)); return Success; } static DecodeStatus DecodeFixedPointScaleImm64(llvm::MCInst &Inst, unsigned Imm, uint64_t Addr, const void *Decoder) { - Inst.addOperand(MCOperand::CreateImm(64 - Imm)); + Inst.addOperand(MCOperand::createImm(64 - Imm)); return Success; } @@ -609,21 +607,21 @@ static DecodeStatus DecodePCRelLabel19(llvm::MCInst &Inst, unsigned Imm, if (!Dis->tryAddingSymbolicOperand(Inst, ImmVal * 4, Addr, Inst.getOpcode() != AArch64::LDRXl, 0, 4)) - Inst.addOperand(MCOperand::CreateImm(ImmVal)); + Inst.addOperand(MCOperand::createImm(ImmVal)); return Success; } static DecodeStatus DecodeMemExtend(llvm::MCInst &Inst, unsigned Imm, uint64_t Address, const void *Decoder) { - Inst.addOperand(MCOperand::CreateImm((Imm >> 1) & 1)); - Inst.addOperand(MCOperand::CreateImm(Imm & 1)); + Inst.addOperand(MCOperand::createImm((Imm >> 1) & 1)); + Inst.addOperand(MCOperand::createImm(Imm & 1)); return Success; } static DecodeStatus DecodeMRSSystemRegister(llvm::MCInst &Inst, unsigned Imm, uint64_t Address, const void *Decoder) { - Inst.addOperand(MCOperand::CreateImm(Imm)); + Inst.addOperand(MCOperand::createImm(Imm)); // Every system register in the encoding space is valid with the syntax // S<op0>_<op1>_<Cn>_<Cm>_<op2>, so decoding system registers always succeeds. @@ -633,7 +631,7 @@ static DecodeStatus DecodeMRSSystemRegister(llvm::MCInst &Inst, unsigned Imm, static DecodeStatus DecodeMSRSystemRegister(llvm::MCInst &Inst, unsigned Imm, uint64_t Address, const void *Decoder) { - Inst.addOperand(MCOperand::CreateImm(Imm)); + Inst.addOperand(MCOperand::createImm(Imm)); return Success; } @@ -656,20 +654,20 @@ static DecodeStatus DecodeFMOVLaneInstruction(llvm::MCInst &Inst, unsigned Insn, } // Add the lane - Inst.addOperand(MCOperand::CreateImm(1)); + Inst.addOperand(MCOperand::createImm(1)); return Success; } static DecodeStatus DecodeVecShiftRImm(llvm::MCInst &Inst, unsigned Imm, unsigned Add) { - Inst.addOperand(MCOperand::CreateImm(Add - Imm)); + Inst.addOperand(MCOperand::createImm(Add - Imm)); return Success; } static DecodeStatus DecodeVecShiftLImm(llvm::MCInst &Inst, unsigned Imm, unsigned Add) { - Inst.addOperand(MCOperand::CreateImm((Imm + Add) & (Add - 1))); + Inst.addOperand(MCOperand::createImm((Imm + Add) & (Add - 1))); return Success; } @@ -789,7 +787,7 @@ static DecodeStatus DecodeThreeAddrSRegInstruction(llvm::MCInst &Inst, break; } - Inst.addOperand(MCOperand::CreateImm(shift)); + Inst.addOperand(MCOperand::createImm(shift)); return Success; } @@ -821,8 +819,8 @@ static DecodeStatus DecodeMoveImmInstruction(llvm::MCInst &Inst, uint32_t insn, Inst.getOpcode() == AArch64::MOVKXi) Inst.addOperand(Inst.getOperand(0)); - Inst.addOperand(MCOperand::CreateImm(imm)); - Inst.addOperand(MCOperand::CreateImm(shift)); + Inst.addOperand(MCOperand::createImm(imm)); + Inst.addOperand(MCOperand::createImm(shift)); return Success; } @@ -840,7 +838,7 @@ static DecodeStatus DecodeUnsignedLdStInstruction(llvm::MCInst &Inst, return Fail; case AArch64::PRFMui: // Rt is an immediate in prefetch. - Inst.addOperand(MCOperand::CreateImm(Rt)); + Inst.addOperand(MCOperand::createImm(Rt)); break; case AArch64::STRBBui: case AArch64::LDRBBui: @@ -883,7 +881,7 @@ static DecodeStatus DecodeUnsignedLdStInstruction(llvm::MCInst &Inst, DecodeGPR64spRegisterClass(Inst, Rn, Addr, Decoder); if (!Dis->tryAddingSymbolicOperand(Inst, offset, Addr, Fail, 0, 4)) - Inst.addOperand(MCOperand::CreateImm(offset)); + Inst.addOperand(MCOperand::createImm(offset)); return Success; } @@ -958,7 +956,7 @@ static DecodeStatus DecodeSignedLdStInstruction(llvm::MCInst &Inst, return Fail; case AArch64::PRFUMi: // Rt is an immediate in prefetch. - Inst.addOperand(MCOperand::CreateImm(Rt)); + Inst.addOperand(MCOperand::createImm(Rt)); break; case AArch64::STURBBi: case AArch64::LDURBBi: @@ -1059,7 +1057,7 @@ static DecodeStatus DecodeSignedLdStInstruction(llvm::MCInst &Inst, } DecodeGPR64spRegisterClass(Inst, Rn, Addr, Decoder); - Inst.addOperand(MCOperand::CreateImm(offset)); + Inst.addOperand(MCOperand::createImm(offset)); bool IsLoad = fieldFromInstruction(insn, 22, 1); bool IsIndexed = fieldFromInstruction(insn, 10, 2) != 0; @@ -1104,6 +1102,12 @@ static DecodeStatus DecodeExclusiveLdStInstruction(llvm::MCInst &Inst, case AArch64::STLRW: case AArch64::STLRB: case AArch64::STLRH: + case AArch64::STLLRW: + case AArch64::STLLRB: + case AArch64::STLLRH: + case AArch64::LDLARW: + case AArch64::LDLARB: + case AArch64::LDLARH: DecodeGPR32RegisterClass(Inst, Rt, Addr, Decoder); break; case AArch64::STLXRX: @@ -1114,6 +1118,8 @@ static DecodeStatus DecodeExclusiveLdStInstruction(llvm::MCInst &Inst, case AArch64::LDAXRX: case AArch64::LDXRX: case AArch64::STLRX: + case AArch64::LDLARX: + case AArch64::STLLRX: DecodeGPR64RegisterClass(Inst, Rt, Addr, Decoder); break; case AArch64::STLXPW: @@ -1262,7 +1268,7 @@ static DecodeStatus DecodePairLdStInstruction(llvm::MCInst &Inst, uint32_t insn, } DecodeGPR64spRegisterClass(Inst, Rn, Addr, Decoder); - Inst.addOperand(MCOperand::CreateImm(offset)); + Inst.addOperand(MCOperand::createImm(offset)); // You shouldn't load to the same register twice in an instruction... if (IsLoad && Rt == Rt2) @@ -1329,7 +1335,7 @@ static DecodeStatus DecodeAddSubERegInstruction(llvm::MCInst &Inst, break; } - Inst.addOperand(MCOperand::CreateImm(extend)); + Inst.addOperand(MCOperand::createImm(extend)); return Success; } @@ -1360,7 +1366,7 @@ static DecodeStatus DecodeLogicalImmInstruction(llvm::MCInst &Inst, if (!AArch64_AM::isValidDecodeLogicalImmediate(imm, 32)) return Fail; } - Inst.addOperand(MCOperand::CreateImm(imm)); + Inst.addOperand(MCOperand::createImm(imm)); return Success; } @@ -1377,7 +1383,7 @@ static DecodeStatus DecodeModImmInstruction(llvm::MCInst &Inst, uint32_t insn, else DecodeVectorRegisterClass(Inst, Rd, Addr, Decoder); - Inst.addOperand(MCOperand::CreateImm(imm)); + Inst.addOperand(MCOperand::createImm(imm)); switch (Inst.getOpcode()) { default: @@ -1390,13 +1396,13 @@ static DecodeStatus DecodeModImmInstruction(llvm::MCInst &Inst, uint32_t insn, case AArch64::MOVIv4i32: case AArch64::MVNIv2i32: case AArch64::MVNIv4i32: - Inst.addOperand(MCOperand::CreateImm((cmode & 6) << 2)); + Inst.addOperand(MCOperand::createImm((cmode & 6) << 2)); break; case AArch64::MOVIv2s_msl: case AArch64::MOVIv4s_msl: case AArch64::MVNIv2s_msl: case AArch64::MVNIv4s_msl: - Inst.addOperand(MCOperand::CreateImm(cmode & 1 ? 0x110 : 0x108)); + Inst.addOperand(MCOperand::createImm(cmode & 1 ? 0x110 : 0x108)); break; } @@ -1415,8 +1421,8 @@ static DecodeStatus DecodeModImmTiedInstruction(llvm::MCInst &Inst, DecodeVectorRegisterClass(Inst, Rd, Addr, Decoder); DecodeVectorRegisterClass(Inst, Rd, Addr, Decoder); - Inst.addOperand(MCOperand::CreateImm(imm)); - Inst.addOperand(MCOperand::CreateImm((cmode & 6) << 2)); + Inst.addOperand(MCOperand::createImm(imm)); + Inst.addOperand(MCOperand::createImm((cmode & 6) << 2)); return Success; } @@ -1435,7 +1441,7 @@ static DecodeStatus DecodeAdrInstruction(llvm::MCInst &Inst, uint32_t insn, DecodeGPR64RegisterClass(Inst, Rd, Addr, Decoder); if (!Dis->tryAddingSymbolicOperand(Inst, imm, Addr, Fail, 0, 4)) - Inst.addOperand(MCOperand::CreateImm(imm)); + Inst.addOperand(MCOperand::createImm(imm)); return Success; } @@ -1471,8 +1477,8 @@ static DecodeStatus DecodeBaseAddSubImm(llvm::MCInst &Inst, uint32_t insn, } if (!Dis->tryAddingSymbolicOperand(Inst, Imm, Addr, Fail, 0, 4)) - Inst.addOperand(MCOperand::CreateImm(ImmVal)); - Inst.addOperand(MCOperand::CreateImm(12 * ShifterVal)); + Inst.addOperand(MCOperand::createImm(ImmVal)); + Inst.addOperand(MCOperand::createImm(12 * ShifterVal)); return Success; } @@ -1488,7 +1494,7 @@ static DecodeStatus DecodeUnconditionalBranch(llvm::MCInst &Inst, uint32_t insn, imm |= ~((1LL << 26) - 1); if (!Dis->tryAddingSymbolicOperand(Inst, imm * 4, Addr, true, 0, 4)) - Inst.addOperand(MCOperand::CreateImm(imm)); + Inst.addOperand(MCOperand::createImm(imm)); return Success; } @@ -1502,11 +1508,14 @@ static DecodeStatus DecodeSystemPStateInstruction(llvm::MCInst &Inst, uint64_t pstate_field = (op1 << 3) | op2; - Inst.addOperand(MCOperand::CreateImm(pstate_field)); - Inst.addOperand(MCOperand::CreateImm(crm)); + Inst.addOperand(MCOperand::createImm(pstate_field)); + Inst.addOperand(MCOperand::createImm(crm)); bool ValidNamed; - (void)AArch64PState::PStateMapper().toString(pstate_field, ValidNamed); + const AArch64Disassembler *Dis = + static_cast<const AArch64Disassembler *>(Decoder); + (void)AArch64PState::PStateMapper().toString(pstate_field, + Dis->getSubtargetInfo().getFeatureBits(), ValidNamed); return ValidNamed ? Success : Fail; } @@ -1528,9 +1537,9 @@ static DecodeStatus DecodeTestAndBranch(llvm::MCInst &Inst, uint32_t insn, DecodeGPR32RegisterClass(Inst, Rt, Addr, Decoder); else DecodeGPR64RegisterClass(Inst, Rt, Addr, Decoder); - Inst.addOperand(MCOperand::CreateImm(bit)); + Inst.addOperand(MCOperand::createImm(bit)); if (!Dis->tryAddingSymbolicOperand(Inst, dst * 4, Addr, true, 0, 4)) - Inst.addOperand(MCOperand::CreateImm(dst)); + Inst.addOperand(MCOperand::createImm(dst)); return Success; } diff --git a/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.cpp b/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.cpp index 2057c51..07e4a45 100644 --- a/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.cpp +++ b/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.cpp @@ -165,7 +165,7 @@ bool AArch64ExternalSymbolizer::tryAddingSymbolicOperand( if (SymbolicOp.AddSymbol.Present) { if (SymbolicOp.AddSymbol.Name) { StringRef Name(SymbolicOp.AddSymbol.Name); - MCSymbol *Sym = Ctx.GetOrCreateSymbol(Name); + MCSymbol *Sym = Ctx.getOrCreateSymbol(Name); MCSymbolRefExpr::VariantKind Variant = getVariant(SymbolicOp.VariantKind); if (Variant != MCSymbolRefExpr::VK_None) Add = MCSymbolRefExpr::Create(Sym, Variant, Ctx); @@ -180,7 +180,7 @@ bool AArch64ExternalSymbolizer::tryAddingSymbolicOperand( if (SymbolicOp.SubtractSymbol.Present) { if (SymbolicOp.SubtractSymbol.Name) { StringRef Name(SymbolicOp.SubtractSymbol.Name); - MCSymbol *Sym = Ctx.GetOrCreateSymbol(Name); + MCSymbol *Sym = Ctx.getOrCreateSymbol(Name); Sub = MCSymbolRefExpr::Create(Sym, Ctx); } else { Sub = MCConstantExpr::Create(SymbolicOp.SubtractSymbol.Value, Ctx); @@ -214,7 +214,7 @@ bool AArch64ExternalSymbolizer::tryAddingSymbolicOperand( Expr = MCConstantExpr::Create(0, Ctx); } - MI.addOperand(MCOperand::CreateExpr(Expr)); + MI.addOperand(MCOperand::createExpr(Expr)); return true; } diff --git a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp index 46a1d79..02bd929 100644 --- a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp +++ b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp @@ -34,18 +34,13 @@ using namespace llvm; AArch64InstPrinter::AArch64InstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII, - const MCRegisterInfo &MRI, - const MCSubtargetInfo &STI) - : MCInstPrinter(MAI, MII, MRI) { - // Initialize the set of available features. - setAvailableFeatures(STI.getFeatureBits()); -} + const MCRegisterInfo &MRI) + : MCInstPrinter(MAI, MII, MRI) {} AArch64AppleInstPrinter::AArch64AppleInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII, - const MCRegisterInfo &MRI, - const MCSubtargetInfo &STI) - : AArch64InstPrinter(MAI, MII, MRI, STI) {} + const MCRegisterInfo &MRI) + : AArch64InstPrinter(MAI, MII, MRI) {} void AArch64InstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const { // This is for .cfi directives. @@ -53,7 +48,8 @@ void AArch64InstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const { } void AArch64InstPrinter::printInst(const MCInst *MI, raw_ostream &O, - StringRef Annot) { + StringRef Annot, + const MCSubtargetInfo &STI) { // Check for special encodings and print the canonical alias instead. unsigned Opcode = MI->getOpcode(); @@ -166,11 +162,23 @@ void AArch64InstPrinter::printInst(const MCInst *MI, raw_ostream &O, int ImmR = MI->getOperand(3).getImm(); int ImmS = MI->getOperand(4).getImm(); - // BFI alias - if (ImmS < ImmR) { + if ((Op2.getReg() == AArch64::WZR || Op2.getReg() == AArch64::XZR) && + (ImmR == 0 || ImmS < ImmR)) { + // BFC takes precedence over its entire range, sligtly differently to BFI. + int BitWidth = Opcode == AArch64::BFMXri ? 64 : 32; + int LSB = (BitWidth - ImmR) % BitWidth; + int Width = ImmS + 1; + + O << "\tbfc\t" << getRegisterName(Op0.getReg()) + << ", #" << LSB << ", #" << Width; + printAnnotation(O, Annot); + return; + } else if (ImmS < ImmR) { + // BFI alias int BitWidth = Opcode == AArch64::BFMXri ? 64 : 32; int LSB = (BitWidth - ImmR) % BitWidth; int Width = ImmS + 1; + O << "\tbfi\t" << getRegisterName(Op0.getReg()) << ", " << getRegisterName(Op2.getReg()) << ", #" << LSB << ", #" << Width; printAnnotation(O, Annot); @@ -210,8 +218,8 @@ void AArch64InstPrinter::printInst(const MCInst *MI, raw_ostream &O, return; } - if (!printAliasInstr(MI, O)) - printInstruction(MI, O); + if (!printAliasInstr(MI, STI, O)) + printInstruction(MI, STI, O); printAnnotation(O, Annot); } @@ -614,7 +622,8 @@ static LdStNInstrDesc *getLdStNInstrDesc(unsigned Opcode) { } void AArch64AppleInstPrinter::printInst(const MCInst *MI, raw_ostream &O, - StringRef Annot) { + StringRef Annot, + const MCSubtargetInfo &STI) { unsigned Opcode = MI->getOpcode(); StringRef Layout, Mnemonic; @@ -624,7 +633,7 @@ void AArch64AppleInstPrinter::printInst(const MCInst *MI, raw_ostream &O, << getRegisterName(MI->getOperand(0).getReg(), AArch64::vreg) << ", "; unsigned ListOpNum = IsTbx ? 2 : 1; - printVectorList(MI, ListOpNum, O, ""); + printVectorList(MI, ListOpNum, STI, O, ""); O << ", " << getRegisterName(MI->getOperand(ListOpNum + 1).getReg(), AArch64::vreg); @@ -638,7 +647,7 @@ void AArch64AppleInstPrinter::printInst(const MCInst *MI, raw_ostream &O, // Now onto the operands: first a vector list with possible lane // specifier. E.g. { v0 }[2] int OpNum = LdStDesc->ListOperand; - printVectorList(MI, OpNum++, O, ""); + printVectorList(MI, OpNum++, STI, O, ""); if (LdStDesc->HasLane) O << '[' << MI->getOperand(OpNum++).getImm() << ']'; @@ -662,7 +671,7 @@ void AArch64AppleInstPrinter::printInst(const MCInst *MI, raw_ostream &O, return; } - AArch64InstPrinter::printInst(MI, O, Annot); + AArch64InstPrinter::printInst(MI, O, Annot, STI); } bool AArch64InstPrinter::printSysAlias(const MCInst *MI, raw_ostream &O) { @@ -889,6 +898,7 @@ bool AArch64InstPrinter::printSysAlias(const MCInst *MI, raw_ostream &O) { } void AArch64InstPrinter::printOperand(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, raw_ostream &O) { const MCOperand &Op = MI->getOperand(OpNo); if (Op.isReg()) { @@ -903,6 +913,7 @@ void AArch64InstPrinter::printOperand(const MCInst *MI, unsigned OpNo, } void AArch64InstPrinter::printHexImm(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, raw_ostream &O) { const MCOperand &Op = MI->getOperand(OpNo); O << format("#%#llx", Op.getImm()); @@ -922,6 +933,7 @@ void AArch64InstPrinter::printPostIncOperand(const MCInst *MI, unsigned OpNo, } void AArch64InstPrinter::printVRegOperand(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, raw_ostream &O) { const MCOperand &Op = MI->getOperand(OpNo); assert(Op.isReg() && "Non-register vreg operand!"); @@ -930,6 +942,7 @@ void AArch64InstPrinter::printVRegOperand(const MCInst *MI, unsigned OpNo, } void AArch64InstPrinter::printSysCROperand(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, raw_ostream &O) { const MCOperand &Op = MI->getOperand(OpNo); assert(Op.isImm() && "System instruction C[nm] operands must be immediates!"); @@ -937,6 +950,7 @@ void AArch64InstPrinter::printSysCROperand(const MCInst *MI, unsigned OpNo, } void AArch64InstPrinter::printAddSubImm(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O) { const MCOperand &MO = MI->getOperand(OpNum); if (MO.isImm()) { @@ -946,18 +960,19 @@ void AArch64InstPrinter::printAddSubImm(const MCInst *MI, unsigned OpNum, AArch64_AM::getShiftValue(MI->getOperand(OpNum + 1).getImm()); O << '#' << Val; if (Shift != 0) - printShifter(MI, OpNum + 1, O); + printShifter(MI, OpNum + 1, STI, O); if (CommentStream) *CommentStream << '=' << (Val << Shift) << '\n'; } else { assert(MO.isExpr() && "Unexpected operand type!"); O << *MO.getExpr(); - printShifter(MI, OpNum + 1, O); + printShifter(MI, OpNum + 1, STI, O); } } void AArch64InstPrinter::printLogicalImm32(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O) { uint64_t Val = MI->getOperand(OpNum).getImm(); O << "#0x"; @@ -965,6 +980,7 @@ void AArch64InstPrinter::printLogicalImm32(const MCInst *MI, unsigned OpNum, } void AArch64InstPrinter::printLogicalImm64(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O) { uint64_t Val = MI->getOperand(OpNum).getImm(); O << "#0x"; @@ -972,6 +988,7 @@ void AArch64InstPrinter::printLogicalImm64(const MCInst *MI, unsigned OpNum, } void AArch64InstPrinter::printShifter(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O) { unsigned Val = MI->getOperand(OpNum).getImm(); // LSL #0 should not be printed. @@ -983,18 +1000,21 @@ void AArch64InstPrinter::printShifter(const MCInst *MI, unsigned OpNum, } void AArch64InstPrinter::printShiftedRegister(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O) { O << getRegisterName(MI->getOperand(OpNum).getReg()); - printShifter(MI, OpNum + 1, O); + printShifter(MI, OpNum + 1, STI, O); } void AArch64InstPrinter::printExtendedRegister(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O) { O << getRegisterName(MI->getOperand(OpNum).getReg()); - printArithExtend(MI, OpNum + 1, O); + printArithExtend(MI, OpNum + 1, STI, O); } void AArch64InstPrinter::printArithExtend(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O) { unsigned Val = MI->getOperand(OpNum).getImm(); AArch64_AM::ShiftExtendType ExtType = AArch64_AM::getArithExtendType(Val); @@ -1038,24 +1058,28 @@ void AArch64InstPrinter::printMemExtend(const MCInst *MI, unsigned OpNum, } void AArch64InstPrinter::printCondCode(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O) { AArch64CC::CondCode CC = (AArch64CC::CondCode)MI->getOperand(OpNum).getImm(); O << AArch64CC::getCondCodeName(CC); } void AArch64InstPrinter::printInverseCondCode(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O) { AArch64CC::CondCode CC = (AArch64CC::CondCode)MI->getOperand(OpNum).getImm(); O << AArch64CC::getCondCodeName(AArch64CC::getInvertedCondCode(CC)); } void AArch64InstPrinter::printAMNoIndex(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O) { O << '[' << getRegisterName(MI->getOperand(OpNum).getReg()) << ']'; } template<int Scale> void AArch64InstPrinter::printImmScale(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O) { O << '#' << Scale * MI->getOperand(OpNum).getImm(); } @@ -1085,10 +1109,12 @@ void AArch64InstPrinter::printAMIndexedWB(const MCInst *MI, unsigned OpNum, } void AArch64InstPrinter::printPrefetchOp(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O) { unsigned prfop = MI->getOperand(OpNum).getImm(); bool Valid; - StringRef Name = AArch64PRFM::PRFMMapper().toString(prfop, Valid); + StringRef Name = + AArch64PRFM::PRFMMapper().toString(prfop, STI.getFeatureBits(), Valid); if (Valid) O << Name; else @@ -1096,6 +1122,7 @@ void AArch64InstPrinter::printPrefetchOp(const MCInst *MI, unsigned OpNum, } void AArch64InstPrinter::printFPImmOperand(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O) { const MCOperand &MO = MI->getOperand(OpNum); float FPImm = @@ -1151,6 +1178,7 @@ static unsigned getNextVectorRegister(unsigned Reg, unsigned Stride = 1) { } void AArch64InstPrinter::printVectorList(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O, StringRef LayoutSuffix) { unsigned Reg = MI->getOperand(OpNum).getReg(); @@ -1193,14 +1221,17 @@ void AArch64InstPrinter::printVectorList(const MCInst *MI, unsigned OpNum, O << " }"; } -void AArch64InstPrinter::printImplicitlyTypedVectorList(const MCInst *MI, - unsigned OpNum, - raw_ostream &O) { - printVectorList(MI, OpNum, O, ""); +void +AArch64InstPrinter::printImplicitlyTypedVectorList(const MCInst *MI, + unsigned OpNum, + const MCSubtargetInfo &STI, + raw_ostream &O) { + printVectorList(MI, OpNum, STI, O, ""); } template <unsigned NumLanes, char LaneKind> void AArch64InstPrinter::printTypedVectorList(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O) { std::string Suffix("."); if (NumLanes) @@ -1208,15 +1239,17 @@ void AArch64InstPrinter::printTypedVectorList(const MCInst *MI, unsigned OpNum, else Suffix += LaneKind; - printVectorList(MI, OpNum, O, Suffix); + printVectorList(MI, OpNum, STI, O, Suffix); } void AArch64InstPrinter::printVectorIndex(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O) { O << "[" << MI->getOperand(OpNum).getImm() << "]"; } void AArch64InstPrinter::printAlignedLabel(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O) { const MCOperand &Op = MI->getOperand(OpNum); @@ -1241,6 +1274,7 @@ void AArch64InstPrinter::printAlignedLabel(const MCInst *MI, unsigned OpNum, } void AArch64InstPrinter::printAdrpLabel(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O) { const MCOperand &Op = MI->getOperand(OpNum); @@ -1256,6 +1290,7 @@ void AArch64InstPrinter::printAdrpLabel(const MCInst *MI, unsigned OpNum, } void AArch64InstPrinter::printBarrierOption(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, raw_ostream &O) { unsigned Val = MI->getOperand(OpNo).getImm(); unsigned Opcode = MI->getOpcode(); @@ -1263,9 +1298,11 @@ void AArch64InstPrinter::printBarrierOption(const MCInst *MI, unsigned OpNo, bool Valid; StringRef Name; if (Opcode == AArch64::ISB) - Name = AArch64ISB::ISBMapper().toString(Val, Valid); + Name = AArch64ISB::ISBMapper().toString(Val, STI.getFeatureBits(), + Valid); else - Name = AArch64DB::DBarrierMapper().toString(Val, Valid); + Name = AArch64DB::DBarrierMapper().toString(Val, STI.getFeatureBits(), + Valid); if (Valid) O << Name; else @@ -1273,31 +1310,35 @@ void AArch64InstPrinter::printBarrierOption(const MCInst *MI, unsigned OpNo, } void AArch64InstPrinter::printMRSSystemRegister(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, raw_ostream &O) { unsigned Val = MI->getOperand(OpNo).getImm(); - auto Mapper = AArch64SysReg::MRSMapper(getAvailableFeatures()); - std::string Name = Mapper.toString(Val); + auto Mapper = AArch64SysReg::MRSMapper(); + std::string Name = Mapper.toString(Val, STI.getFeatureBits()); O << StringRef(Name).upper(); } void AArch64InstPrinter::printMSRSystemRegister(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, raw_ostream &O) { unsigned Val = MI->getOperand(OpNo).getImm(); - auto Mapper = AArch64SysReg::MSRMapper(getAvailableFeatures()); - std::string Name = Mapper.toString(Val); + auto Mapper = AArch64SysReg::MSRMapper(); + std::string Name = Mapper.toString(Val, STI.getFeatureBits()); O << StringRef(Name).upper(); } void AArch64InstPrinter::printSystemPStateField(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, raw_ostream &O) { unsigned Val = MI->getOperand(OpNo).getImm(); bool Valid; - StringRef Name = AArch64PState::PStateMapper().toString(Val, Valid); + StringRef Name = + AArch64PState::PStateMapper().toString(Val, STI.getFeatureBits(), Valid); if (Valid) O << StringRef(Name.str()).upper(); else @@ -1305,6 +1346,7 @@ void AArch64InstPrinter::printSystemPStateField(const MCInst *MI, unsigned OpNo, } void AArch64InstPrinter::printSIMDType10Operand(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, raw_ostream &O) { unsigned RawVal = MI->getOperand(OpNo).getImm(); uint64_t Val = AArch64_AM::decodeAdvSIMDModImmType10(RawVal); diff --git a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h index 5f51621..c2077a0 100644 --- a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h +++ b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h @@ -26,16 +26,21 @@ class MCOperand; class AArch64InstPrinter : public MCInstPrinter { public: AArch64InstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII, - const MCRegisterInfo &MRI, const MCSubtargetInfo &STI); + const MCRegisterInfo &MRI); - void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot) override; + void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot, + const MCSubtargetInfo &STI) override; void printRegName(raw_ostream &OS, unsigned RegNo) const override; // Autogenerated by tblgen. - virtual void printInstruction(const MCInst *MI, raw_ostream &O); - virtual bool printAliasInstr(const MCInst *MI, raw_ostream &O); + virtual void printInstruction(const MCInst *MI, const MCSubtargetInfo &STI, + raw_ostream &O); + virtual bool printAliasInstr(const MCInst *MI, const MCSubtargetInfo &STI, + raw_ostream &O); virtual void printCustomAliasOperand(const MCInst *MI, unsigned OpIdx, - unsigned PrintMethodIdx, raw_ostream &O); + unsigned PrintMethodIdx, + const MCSubtargetInfo &STI, + raw_ostream &O); virtual StringRef getRegName(unsigned RegNo) const { return getRegisterName(RegNo); } @@ -45,90 +50,126 @@ public: protected: bool printSysAlias(const MCInst *MI, raw_ostream &O); // Operand printers - void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); - void printHexImm(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printOperand(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, + raw_ostream &O); + void printHexImm(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, + raw_ostream &O); void printPostIncOperand(const MCInst *MI, unsigned OpNo, unsigned Imm, raw_ostream &O); - template<int Amount> - void printPostIncOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) { + template <int Amount> + void printPostIncOperand(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, raw_ostream &O) { printPostIncOperand(MI, OpNo, Amount, O); } - void printVRegOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); - void printSysCROperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); - void printAddSubImm(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printLogicalImm32(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printLogicalImm64(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printShifter(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printShiftedRegister(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printExtendedRegister(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printArithExtend(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printVRegOperand(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, raw_ostream &O); + void printSysCROperand(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, raw_ostream &O); + void printAddSubImm(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); + void printLogicalImm32(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); + void printLogicalImm64(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); + void printShifter(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); + void printShiftedRegister(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); + void printExtendedRegister(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); + void printArithExtend(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); void printMemExtend(const MCInst *MI, unsigned OpNum, raw_ostream &O, char SrcRegKind, unsigned Width); template <char SrcRegKind, unsigned Width> - void printMemExtend(const MCInst *MI, unsigned OpNum, raw_ostream &O) { + void printMemExtend(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O) { printMemExtend(MI, OpNum, O, SrcRegKind, Width); } - void printCondCode(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printInverseCondCode(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printAlignedLabel(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printCondCode(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); + void printInverseCondCode(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); + void printAlignedLabel(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); void printUImm12Offset(const MCInst *MI, unsigned OpNum, unsigned Scale, raw_ostream &O); void printAMIndexedWB(const MCInst *MI, unsigned OpNum, unsigned Scale, raw_ostream &O); - template<int Scale> - void printUImm12Offset(const MCInst *MI, unsigned OpNum, raw_ostream &O) { + template <int Scale> + void printUImm12Offset(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O) { printUImm12Offset(MI, OpNum, Scale, O); } - template<int BitWidth> - void printAMIndexedWB(const MCInst *MI, unsigned OpNum, raw_ostream &O) { + template <int BitWidth> + void printAMIndexedWB(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O) { printAMIndexedWB(MI, OpNum, BitWidth / 8, O); } - void printAMNoIndex(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printAMNoIndex(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); - template<int Scale> - void printImmScale(const MCInst *MI, unsigned OpNum, raw_ostream &O); + template <int Scale> + void printImmScale(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); - void printPrefetchOp(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printPrefetchOp(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); - void printFPImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printFPImmOperand(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); - void printVectorList(const MCInst *MI, unsigned OpNum, raw_ostream &O, + void printVectorList(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O, StringRef LayoutSuffix); /// Print a list of vector registers where the type suffix is implicit /// (i.e. attached to the instruction rather than the registers). void printImplicitlyTypedVectorList(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); template <unsigned NumLanes, char LaneKind> - void printTypedVectorList(const MCInst *MI, unsigned OpNum, raw_ostream &O); - - void printVectorIndex(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printAdrpLabel(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printBarrierOption(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printMSRSystemRegister(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printMRSSystemRegister(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printSystemPStateField(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printSIMDType10Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printTypedVectorList(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); + + void printVectorIndex(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); + void printAdrpLabel(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); + void printBarrierOption(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); + void printMSRSystemRegister(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); + void printMRSSystemRegister(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); + void printSystemPStateField(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); + void printSIMDType10Operand(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); }; class AArch64AppleInstPrinter : public AArch64InstPrinter { public: AArch64AppleInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII, - const MCRegisterInfo &MRI, const MCSubtargetInfo &STI); + const MCRegisterInfo &MRI); - void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot) override; + void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot, + const MCSubtargetInfo &STI) override; - void printInstruction(const MCInst *MI, raw_ostream &O) override; - bool printAliasInstr(const MCInst *MI, raw_ostream &O) override; + void printInstruction(const MCInst *MI, const MCSubtargetInfo &STI, + raw_ostream &O) override; + bool printAliasInstr(const MCInst *MI, const MCSubtargetInfo &STI, + raw_ostream &O) override; void printCustomAliasOperand(const MCInst *MI, unsigned OpIdx, unsigned PrintMethodIdx, + const MCSubtargetInfo &STI, raw_ostream &O) override; StringRef getRegName(unsigned RegNo) const override { return getRegisterName(RegNo); diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64AddressingModes.h b/lib/Target/AArch64/MCTargetDesc/AArch64AddressingModes.h index 4db9dea..ed24343 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64AddressingModes.h +++ b/lib/Target/AArch64/MCTargetDesc/AArch64AddressingModes.h @@ -237,15 +237,15 @@ static inline bool processLogicalImmediate(uint64_t Imm, unsigned RegSize, if (isShiftedMask_64(Imm)) { I = countTrailingZeros(Imm); assert(I < 64 && "undefined behavior"); - CTO = CountTrailingOnes_64(Imm >> I); + CTO = countTrailingOnes(Imm >> I); } else { Imm |= ~Mask; if (!isShiftedMask_64(~Imm)) return false; - unsigned CLO = CountLeadingOnes_64(Imm); + unsigned CLO = countLeadingOnes(Imm); I = 64 - CLO; - CTO = CLO + CountTrailingOnes_64(Imm) - (64 - Size); + CTO = CLO + countTrailingOnes(Imm) - (64 - Size); } // Encode in Immr the number of RORs it would take to get *from* 0^m 1^n diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp index 27cbac9..31fceb6 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp +++ b/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp @@ -18,6 +18,7 @@ #include "llvm/MC/MCObjectWriter.h" #include "llvm/MC/MCSectionELF.h" #include "llvm/MC/MCSectionMachO.h" +#include "llvm/MC/MCValue.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MachO.h" using namespace llvm; @@ -246,10 +247,7 @@ bool AArch64AsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const { // If the count is not 4-byte aligned, we must be writing data into the text // section (otherwise we have unaligned instructions, and thus have far // bigger problems), so just write zeros instead. - if ((Count & 3) != 0) { - for (uint64_t i = 0, e = (Count & 3); i != e; ++i) - OW->Write8(0); - } + OW->WriteZeros(Count % 4); // We are properly aligned, so write NOPs as requested. Count /= 4; @@ -312,47 +310,11 @@ public: DarwinAArch64AsmBackend(const Target &T, const MCRegisterInfo &MRI) : AArch64AsmBackend(T), MRI(MRI) {} - MCObjectWriter *createObjectWriter(raw_ostream &OS) const override { + MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override { return createAArch64MachObjectWriter(OS, MachO::CPU_TYPE_ARM64, MachO::CPU_SUBTYPE_ARM64_ALL); } - bool doesSectionRequireSymbols(const MCSection &Section) const override { - // Any section for which the linker breaks things into atoms needs to - // preserve symbols, including assembler local symbols, to identify - // those atoms. These sections are: - // Sections of type: - // - // S_CSTRING_LITERALS (e.g. __cstring) - // S_LITERAL_POINTERS (e.g. objc selector pointers) - // S_16BYTE_LITERALS, S_8BYTE_LITERALS, S_4BYTE_LITERALS - // - // Sections named: - // - // __TEXT,__eh_frame - // __TEXT,__ustring - // __DATA,__cfstring - // __DATA,__objc_classrefs - // __DATA,__objc_catlist - // - // FIXME: It would be better if the compiler used actual linker local - // symbols for each of these sections rather than preserving what - // are ostensibly assembler local symbols. - const MCSectionMachO &SMO = static_cast<const MCSectionMachO &>(Section); - return (SMO.getType() == MachO::S_CSTRING_LITERALS || - SMO.getType() == MachO::S_4BYTE_LITERALS || - SMO.getType() == MachO::S_8BYTE_LITERALS || - SMO.getType() == MachO::S_16BYTE_LITERALS || - SMO.getType() == MachO::S_LITERAL_POINTERS || - (SMO.getSegmentName() == "__TEXT" && - (SMO.getSectionName() == "__eh_frame" || - SMO.getSectionName() == "__ustring")) || - (SMO.getSegmentName() == "__DATA" && - (SMO.getSectionName() == "__cfstring" || - SMO.getSectionName() == "__objc_classrefs" || - SMO.getSectionName() == "__objc_catlist"))); - } - /// \brief Generate the compact unwind encoding from the CFI directives. uint32_t generateCompactUnwindEncoding( ArrayRef<MCCFIInstruction> Instrs) const override { @@ -496,7 +458,7 @@ public: ELFAArch64AsmBackend(const Target &T, uint8_t OSABI, bool IsLittleEndian) : AArch64AsmBackend(T), OSABI(OSABI), IsLittleEndian(IsLittleEndian) {} - MCObjectWriter *createObjectWriter(raw_ostream &OS) const override { + MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override { return createAArch64ELFObjectWriter(OS, OSABI, IsLittleEndian); } @@ -529,14 +491,28 @@ void ELFAArch64AsmBackend::processFixupValue( IsResolved = false; } +// Returns whether this fixup is based on an address in the .eh_frame section, +// and therefore should be byte swapped. +// FIXME: Should be replaced with something more principled. +static bool isByteSwappedFixup(const MCExpr *E) { + MCValue Val; + if (!E->EvaluateAsRelocatable(Val, nullptr, nullptr)) + return false; + + if (!Val.getSymA() || Val.getSymA()->getSymbol().isUndefined()) + return false; + + const MCSectionELF *SecELF = + dyn_cast<MCSectionELF>(&Val.getSymA()->getSymbol().getSection()); + return SecELF->getSectionName() == ".eh_frame"; +} + void ELFAArch64AsmBackend::applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize, uint64_t Value, bool IsPCRel) const { // store fixups in .eh_frame section in big endian order if (!IsLittleEndian && Fixup.getKind() == FK_Data_4) { - const MCSection *Sec = Fixup.getValue()->FindAssociatedSection(); - const MCSectionELF *SecELF = dyn_cast_or_null<const MCSectionELF>(Sec); - if (SecELF && SecELF->getSectionName() == ".eh_frame") + if (isByteSwappedFixup(Fixup.getValue())) Value = ByteSwap_32(unsigned(Value)); } AArch64AsmBackend::applyFixup (Fixup, Data, DataSize, Value, IsPCRel); diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp index 5ea49c3..1f516d1 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp +++ b/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp @@ -26,7 +26,7 @@ class AArch64ELFObjectWriter : public MCELFObjectTargetWriter { public: AArch64ELFObjectWriter(uint8_t OSABI, bool IsLittleEndian); - virtual ~AArch64ELFObjectWriter(); + ~AArch64ELFObjectWriter() override; protected: unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup, @@ -248,9 +248,9 @@ unsigned AArch64ELFObjectWriter::GetRelocType(const MCValue &Target, llvm_unreachable("Unimplemented fixup -> relocation"); } -MCObjectWriter *llvm::createAArch64ELFObjectWriter(raw_ostream &OS, - uint8_t OSABI, - bool IsLittleEndian) { +MCObjectWriter *llvm::createAArch64ELFObjectWriter(raw_pwrite_stream &OS, + uint8_t OSABI, + bool IsLittleEndian) { MCELFObjectTargetWriter *MOTW = new AArch64ELFObjectWriter(OSABI, IsLittleEndian); return createELFObjectWriter(MOTW, OS, IsLittleEndian); diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp index 8dc6c30..204a1ab 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp +++ b/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp @@ -13,6 +13,7 @@ // //===----------------------------------------------------------------------===// +#include "AArch64TargetStreamer.h" #include "llvm/MC/MCELFStreamer.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/StringExtras.h" @@ -59,7 +60,7 @@ AArch64TargetAsmStreamer::AArch64TargetAsmStreamer(MCStreamer &S, : AArch64TargetStreamer(S), OS(OS) {} void AArch64TargetAsmStreamer::emitInst(uint32_t Inst) { - OS << "\t.inst\t0x" << utohexstr(Inst) << "\n"; + OS << "\t.inst\t0x" << Twine::utohexstr(Inst) << "\n"; } class AArch64TargetELFStreamer : public AArch64TargetStreamer { @@ -89,15 +90,12 @@ class AArch64ELFStreamer : public MCELFStreamer { public: friend class AArch64TargetELFStreamer; - AArch64ELFStreamer(MCContext &Context, MCAsmBackend &TAB, raw_ostream &OS, - MCCodeEmitter *Emitter) + AArch64ELFStreamer(MCContext &Context, MCAsmBackend &TAB, + raw_pwrite_stream &OS, MCCodeEmitter *Emitter) : MCELFStreamer(Context, TAB, OS, Emitter), MappingSymbolCounter(0), LastEMS(EMS_None) {} - ~AArch64ELFStreamer() {} - - void ChangeSection(const MCSection *Section, - const MCExpr *Subsection) override { + void ChangeSection(MCSection *Section, const MCExpr *Subsection) override { // We have to keep track of the mapping symbol state of any sections we // use. Each one should start off as EMS_None, which is provided as the // default constructor by DenseMap::lookup. @@ -117,15 +115,8 @@ public: } void emitInst(uint32_t Inst) { - char Buffer[4]; - const bool LittleEndian = getContext().getAsmInfo()->isLittleEndian(); - EmitA64MappingSymbol(); - for (unsigned II = 0; II != 4; ++II) { - const unsigned I = LittleEndian ? (4 - II - 1) : II; - Buffer[4 - II - 1] = uint8_t(Inst >> I * CHAR_BIT); - } - MCELFStreamer::EmitBytes(StringRef(Buffer, 4)); + MCELFStreamer::EmitIntValue(Inst, 4); } /// This is one of the functions used to emit data into an ELF section, so the @@ -167,10 +158,10 @@ private: } void EmitMappingSymbol(StringRef Name) { - MCSymbol *Start = getContext().CreateTempSymbol(); + MCSymbol *Start = getContext().createTempSymbol(); EmitLabel(Start); - MCSymbol *Symbol = getContext().GetOrCreateSymbol( + MCSymbol *Symbol = getContext().getOrCreateSymbol( Name + "." + Twine(MappingSymbolCounter++)); MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol); @@ -189,8 +180,6 @@ private: DenseMap<const MCSection *, ElfMappingSymbol> LastMappingSymbols; ElfMappingSymbol LastEMS; - - /// @} }; } // end anonymous namespace @@ -203,24 +192,27 @@ void AArch64TargetELFStreamer::emitInst(uint32_t Inst) { } namespace llvm { -MCStreamer * -createAArch64MCAsmStreamer(MCContext &Ctx, formatted_raw_ostream &OS, - bool isVerboseAsm, bool useDwarfDirectory, - MCInstPrinter *InstPrint, MCCodeEmitter *CE, - MCAsmBackend *TAB, bool ShowInst) { - MCStreamer *S = llvm::createAsmStreamer( - Ctx, OS, isVerboseAsm, useDwarfDirectory, InstPrint, CE, TAB, ShowInst); - new AArch64TargetAsmStreamer(*S, OS); - return S; +MCTargetStreamer *createAArch64AsmTargetStreamer(MCStreamer &S, + formatted_raw_ostream &OS, + MCInstPrinter *InstPrint, + bool isVerboseAsm) { + return new AArch64TargetAsmStreamer(S, OS); } MCELFStreamer *createAArch64ELFStreamer(MCContext &Context, MCAsmBackend &TAB, - raw_ostream &OS, MCCodeEmitter *Emitter, - bool RelaxAll) { + raw_pwrite_stream &OS, + MCCodeEmitter *Emitter, bool RelaxAll) { AArch64ELFStreamer *S = new AArch64ELFStreamer(Context, TAB, OS, Emitter); - new AArch64TargetELFStreamer(*S); if (RelaxAll) S->getAssembler().setRelaxAll(true); return S; } + +MCTargetStreamer * +createAArch64ObjectTargetStreamer(MCStreamer &S, const MCSubtargetInfo &STI) { + Triple TT(STI.getTargetTriple()); + if (TT.getObjectFormat() == Triple::ELF) + return new AArch64TargetELFStreamer(S); + return nullptr; +} } diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.h b/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.h index 71b05cc..ef48203 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.h +++ b/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.h @@ -19,8 +19,8 @@ namespace llvm { MCELFStreamer *createAArch64ELFStreamer(MCContext &Context, MCAsmBackend &TAB, - raw_ostream &OS, MCCodeEmitter *Emitter, - bool RelaxAll); + raw_pwrite_stream &OS, + MCCodeEmitter *Emitter, bool RelaxAll); } #endif diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp index f048474..ab2cad6 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp +++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp @@ -48,6 +48,10 @@ AArch64MCAsmInfoDarwin::AArch64MCAsmInfoDarwin() { UseDataRegionDirectives = true; ExceptionsType = ExceptionHandling::DwarfCFI; + + // AArch64 Darwin doesn't have the baggage of X86/ARM, so it's fine to use + // LShr instead of AShr. + UseLogicalShr = true; } const MCExpr *AArch64MCAsmInfoDarwin::getExprForPersonalitySymbol( @@ -59,7 +63,7 @@ const MCExpr *AArch64MCAsmInfoDarwin::getExprForPersonalitySymbol( MCContext &Context = Streamer.getContext(); const MCExpr *Res = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_GOT, Context); - MCSymbol *PCSym = Context.CreateTempSymbol(); + MCSymbol *PCSym = Context.createTempSymbol(); Streamer.EmitLabel(PCSym); const MCExpr *PC = MCSymbolRefExpr::Create(PCSym, Context); return MCBinaryExpr::CreateSub(Res, PC, Context); diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h b/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h index 5d03c21..9b88de7 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h +++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h @@ -15,6 +15,7 @@ #define LLVM_LIB_TARGET_AARCH64_MCTARGETDESC_AARCH64MCASMINFO_H #include "llvm/MC/MCAsmInfoDarwin.h" +#include "llvm/MC/MCAsmInfoELF.h" namespace llvm { class Target; @@ -27,7 +28,7 @@ struct AArch64MCAsmInfoDarwin : public MCAsmInfoDarwin { MCStreamer &Streamer) const override; }; -struct AArch64MCAsmInfoELF : public MCAsmInfo { +struct AArch64MCAsmInfoELF : public MCAsmInfoELF { explicit AArch64MCAsmInfoELF(StringRef TT); }; diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp index 4756a192..277ea9f 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp +++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp @@ -38,11 +38,9 @@ class AArch64MCCodeEmitter : public MCCodeEmitter { AArch64MCCodeEmitter(const AArch64MCCodeEmitter &); // DO NOT IMPLEMENT void operator=(const AArch64MCCodeEmitter &); // DO NOT IMPLEMENT public: - AArch64MCCodeEmitter(const MCInstrInfo &mcii, const MCSubtargetInfo &sti, - MCContext &ctx) - : Ctx(ctx) {} + AArch64MCCodeEmitter(const MCInstrInfo &mcii, MCContext &ctx) : Ctx(ctx) {} - ~AArch64MCCodeEmitter() {} + ~AArch64MCCodeEmitter() override {} // getBinaryCodeForInstr - TableGen'erated function for getting the // binary encoding for an instruction. @@ -186,7 +184,7 @@ public: } } - void EncodeInstruction(const MCInst &MI, raw_ostream &OS, + void encodeInstruction(const MCInst &MI, raw_ostream &OS, SmallVectorImpl<MCFixup> &Fixups, const MCSubtargetInfo &STI) const override; @@ -205,9 +203,8 @@ public: MCCodeEmitter *llvm::createAArch64MCCodeEmitter(const MCInstrInfo &MCII, const MCRegisterInfo &MRI, - const MCSubtargetInfo &STI, MCContext &Ctx) { - return new AArch64MCCodeEmitter(MCII, STI, Ctx); + return new AArch64MCCodeEmitter(MCII, Ctx); } /// getMachineOpValue - Return binary encoding of operand. If the machine @@ -235,7 +232,7 @@ AArch64MCCodeEmitter::getLdStUImm12OpValue(const MCInst &MI, unsigned OpIdx, else { assert(MO.isExpr() && "unable to encode load/store imm operand"); MCFixupKind Kind = MCFixupKind(FixupKind); - Fixups.push_back(MCFixup::Create(0, MO.getExpr(), Kind, MI.getLoc())); + Fixups.push_back(MCFixup::create(0, MO.getExpr(), Kind, MI.getLoc())); ++MCNumFixups; } @@ -259,7 +256,7 @@ AArch64MCCodeEmitter::getAdrLabelOpValue(const MCInst &MI, unsigned OpIdx, MCFixupKind Kind = MI.getOpcode() == AArch64::ADR ? MCFixupKind(AArch64::fixup_aarch64_pcrel_adr_imm21) : MCFixupKind(AArch64::fixup_aarch64_pcrel_adrp_imm21); - Fixups.push_back(MCFixup::Create(0, Expr, Kind, MI.getLoc())); + Fixups.push_back(MCFixup::create(0, Expr, Kind, MI.getLoc())); MCNumFixups += 1; @@ -289,7 +286,7 @@ AArch64MCCodeEmitter::getAddSubImmOpValue(const MCInst &MI, unsigned OpIdx, // Encode the 12 bits of the fixup. MCFixupKind Kind = MCFixupKind(AArch64::fixup_aarch64_add_imm12); - Fixups.push_back(MCFixup::Create(0, Expr, Kind, MI.getLoc())); + Fixups.push_back(MCFixup::create(0, Expr, Kind, MI.getLoc())); ++MCNumFixups; @@ -309,7 +306,7 @@ uint32_t AArch64MCCodeEmitter::getCondBranchTargetOpValue( assert(MO.isExpr() && "Unexpected target type!"); MCFixupKind Kind = MCFixupKind(AArch64::fixup_aarch64_pcrel_branch19); - Fixups.push_back(MCFixup::Create(0, MO.getExpr(), Kind, MI.getLoc())); + Fixups.push_back(MCFixup::create(0, MO.getExpr(), Kind, MI.getLoc())); ++MCNumFixups; @@ -331,7 +328,7 @@ AArch64MCCodeEmitter::getLoadLiteralOpValue(const MCInst &MI, unsigned OpIdx, assert(MO.isExpr() && "Unexpected target type!"); MCFixupKind Kind = MCFixupKind(AArch64::fixup_aarch64_ldr_pcrel_imm19); - Fixups.push_back(MCFixup::Create(0, MO.getExpr(), Kind, MI.getLoc())); + Fixups.push_back(MCFixup::create(0, MO.getExpr(), Kind, MI.getLoc())); ++MCNumFixups; @@ -358,7 +355,7 @@ AArch64MCCodeEmitter::getMoveWideImmOpValue(const MCInst &MI, unsigned OpIdx, return MO.getImm(); assert(MO.isExpr() && "Unexpected movz/movk immediate"); - Fixups.push_back(MCFixup::Create( + Fixups.push_back(MCFixup::create( 0, MO.getExpr(), MCFixupKind(AArch64::fixup_aarch64_movw), MI.getLoc())); ++MCNumFixups; @@ -379,7 +376,7 @@ uint32_t AArch64MCCodeEmitter::getTestBranchTargetOpValue( assert(MO.isExpr() && "Unexpected ADR target type!"); MCFixupKind Kind = MCFixupKind(AArch64::fixup_aarch64_pcrel_branch14); - Fixups.push_back(MCFixup::Create(0, MO.getExpr(), Kind, MI.getLoc())); + Fixups.push_back(MCFixup::create(0, MO.getExpr(), Kind, MI.getLoc())); ++MCNumFixups; @@ -403,7 +400,7 @@ AArch64MCCodeEmitter::getBranchTargetOpValue(const MCInst &MI, unsigned OpIdx, MCFixupKind Kind = MI.getOpcode() == AArch64::BL ? MCFixupKind(AArch64::fixup_aarch64_pcrel_call26) : MCFixupKind(AArch64::fixup_aarch64_pcrel_branch26); - Fixups.push_back(MCFixup::Create(0, MO.getExpr(), Kind, MI.getLoc())); + Fixups.push_back(MCFixup::create(0, MO.getExpr(), Kind, MI.getLoc())); ++MCNumFixups; @@ -601,7 +598,7 @@ unsigned AArch64MCCodeEmitter::fixMOVZ(const MCInst &MI, unsigned EncodedValue, return EncodedValue & ~(1u << 30); } -void AArch64MCCodeEmitter::EncodeInstruction(const MCInst &MI, raw_ostream &OS, +void AArch64MCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS, SmallVectorImpl<MCFixup> &Fixups, const MCSubtargetInfo &STI) const { if (MI.getOpcode() == AArch64::TLSDESCCALL) { @@ -609,7 +606,7 @@ void AArch64MCCodeEmitter::EncodeInstruction(const MCInst &MI, raw_ostream &OS, // following (BLR) instruction. It doesn't emit any code itself so it // doesn't go through the normal TableGenerated channels. MCFixupKind Fixup = MCFixupKind(AArch64::fixup_aarch64_tlsdesc_call); - Fixups.push_back(MCFixup::Create(0, MI.getOperand(0).getExpr(), Fixup)); + Fixups.push_back(MCFixup::create(0, MI.getOperand(0).getExpr(), Fixup)); return; } diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp index e396df8..74b81af 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp +++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp @@ -16,6 +16,7 @@ #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCELF.h" +#include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" #include "llvm/MC/MCValue.h" #include "llvm/Object/ELF.h" @@ -85,7 +86,7 @@ void AArch64MCExpr::visitUsedExpr(MCStreamer &Streamer) const { Streamer.visitUsedExpr(*getSubExpr()); } -const MCSection *AArch64MCExpr::FindAssociatedSection() const { +MCSection *AArch64MCExpr::FindAssociatedSection() const { llvm_unreachable("FIXME: what goes here?"); } diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h b/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h index db48ac9..95d2277 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h +++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h @@ -120,7 +120,7 @@ public: /// @{ /// Get the kind of this expression. - VariantKind getKind() const { return static_cast<VariantKind>(Kind); } + VariantKind getKind() const { return Kind; } /// Get the expression this modifier applies to. const MCExpr *getSubExpr() const { return Expr; } @@ -149,7 +149,7 @@ public: void visitUsedExpr(MCStreamer &Streamer) const override; - const MCSection *FindAssociatedSection() const override; + MCSection *FindAssociatedSection() const override; bool EvaluateAsRelocatableImpl(MCValue &Res, const MCAsmLayout *Layout, diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp index 0f7a6b8..2e22de0 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp +++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp @@ -105,112 +105,78 @@ static MCCodeGenInfo *createAArch64MCCodeGenInfo(StringRef TT, Reloc::Model RM, RM = Reloc::Static; MCCodeGenInfo *X = new MCCodeGenInfo(); - X->InitMCCodeGenInfo(RM, CM, OL); + X->initMCCodeGenInfo(RM, CM, OL); return X; } -static MCInstPrinter *createAArch64MCInstPrinter(const Target &T, +static MCInstPrinter *createAArch64MCInstPrinter(const Triple &T, unsigned SyntaxVariant, const MCAsmInfo &MAI, const MCInstrInfo &MII, - const MCRegisterInfo &MRI, - const MCSubtargetInfo &STI) { + const MCRegisterInfo &MRI) { if (SyntaxVariant == 0) - return new AArch64InstPrinter(MAI, MII, MRI, STI); + return new AArch64InstPrinter(MAI, MII, MRI); if (SyntaxVariant == 1) - return new AArch64AppleInstPrinter(MAI, MII, MRI, STI); + return new AArch64AppleInstPrinter(MAI, MII, MRI); return nullptr; } -static MCStreamer *createMCStreamer(const Target &T, StringRef TT, - MCContext &Ctx, MCAsmBackend &TAB, - raw_ostream &OS, MCCodeEmitter *Emitter, - const MCSubtargetInfo &STI, bool RelaxAll) { - Triple TheTriple(TT); - - if (TheTriple.isOSDarwin()) - return createMachOStreamer(Ctx, TAB, OS, Emitter, RelaxAll, - /*LabelSections*/ true); - +static MCStreamer *createELFStreamer(const Triple &T, MCContext &Ctx, + MCAsmBackend &TAB, raw_pwrite_stream &OS, + MCCodeEmitter *Emitter, bool RelaxAll) { return createAArch64ELFStreamer(Ctx, TAB, OS, Emitter, RelaxAll); } +static MCStreamer *createMachOStreamer(MCContext &Ctx, MCAsmBackend &TAB, + raw_pwrite_stream &OS, + MCCodeEmitter *Emitter, bool RelaxAll, + bool DWARFMustBeAtTheEnd) { + return createMachOStreamer(Ctx, TAB, OS, Emitter, RelaxAll, + DWARFMustBeAtTheEnd, + /*LabelSections*/ true); +} + // Force static initialization. extern "C" void LLVMInitializeAArch64TargetMC() { - // Register the MC asm info. - RegisterMCAsmInfoFn X(TheAArch64leTarget, createAArch64MCAsmInfo); - RegisterMCAsmInfoFn Y(TheAArch64beTarget, createAArch64MCAsmInfo); - RegisterMCAsmInfoFn Z(TheARM64Target, createAArch64MCAsmInfo); - - // Register the MC codegen info. - TargetRegistry::RegisterMCCodeGenInfo(TheAArch64leTarget, - createAArch64MCCodeGenInfo); - TargetRegistry::RegisterMCCodeGenInfo(TheAArch64beTarget, - createAArch64MCCodeGenInfo); - TargetRegistry::RegisterMCCodeGenInfo(TheARM64Target, - createAArch64MCCodeGenInfo); - - // Register the MC instruction info. - TargetRegistry::RegisterMCInstrInfo(TheAArch64leTarget, - createAArch64MCInstrInfo); - TargetRegistry::RegisterMCInstrInfo(TheAArch64beTarget, - createAArch64MCInstrInfo); - TargetRegistry::RegisterMCInstrInfo(TheARM64Target, - createAArch64MCInstrInfo); - - // Register the MC register info. - TargetRegistry::RegisterMCRegInfo(TheAArch64leTarget, - createAArch64MCRegisterInfo); - TargetRegistry::RegisterMCRegInfo(TheAArch64beTarget, - createAArch64MCRegisterInfo); - TargetRegistry::RegisterMCRegInfo(TheARM64Target, - createAArch64MCRegisterInfo); - - // Register the MC subtarget info. - TargetRegistry::RegisterMCSubtargetInfo(TheAArch64leTarget, - createAArch64MCSubtargetInfo); - TargetRegistry::RegisterMCSubtargetInfo(TheAArch64beTarget, - createAArch64MCSubtargetInfo); - TargetRegistry::RegisterMCSubtargetInfo(TheARM64Target, - createAArch64MCSubtargetInfo); + for (Target *T : + {&TheAArch64leTarget, &TheAArch64beTarget, &TheARM64Target}) { + // Register the MC asm info. + RegisterMCAsmInfoFn X(*T, createAArch64MCAsmInfo); + + // Register the MC codegen info. + TargetRegistry::RegisterMCCodeGenInfo(*T, createAArch64MCCodeGenInfo); + + // Register the MC instruction info. + TargetRegistry::RegisterMCInstrInfo(*T, createAArch64MCInstrInfo); + + // Register the MC register info. + TargetRegistry::RegisterMCRegInfo(*T, createAArch64MCRegisterInfo); + + // Register the MC subtarget info. + TargetRegistry::RegisterMCSubtargetInfo(*T, createAArch64MCSubtargetInfo); + + // Register the MC Code Emitter + TargetRegistry::RegisterMCCodeEmitter(*T, createAArch64MCCodeEmitter); + + // Register the obj streamers. + TargetRegistry::RegisterELFStreamer(*T, createELFStreamer); + TargetRegistry::RegisterMachOStreamer(*T, createMachOStreamer); + + // Register the obj target streamer. + TargetRegistry::RegisterObjectTargetStreamer( + *T, createAArch64ObjectTargetStreamer); + + // Register the asm streamer. + TargetRegistry::RegisterAsmTargetStreamer(*T, + createAArch64AsmTargetStreamer); + // Register the MCInstPrinter. + TargetRegistry::RegisterMCInstPrinter(*T, createAArch64MCInstPrinter); + } // Register the asm backend. - TargetRegistry::RegisterMCAsmBackend(TheAArch64leTarget, - createAArch64leAsmBackend); + for (Target *T : {&TheAArch64leTarget, &TheARM64Target}) + TargetRegistry::RegisterMCAsmBackend(*T, createAArch64leAsmBackend); TargetRegistry::RegisterMCAsmBackend(TheAArch64beTarget, createAArch64beAsmBackend); - TargetRegistry::RegisterMCAsmBackend(TheARM64Target, - createAArch64leAsmBackend); - - // Register the MC Code Emitter - TargetRegistry::RegisterMCCodeEmitter(TheAArch64leTarget, - createAArch64MCCodeEmitter); - TargetRegistry::RegisterMCCodeEmitter(TheAArch64beTarget, - createAArch64MCCodeEmitter); - TargetRegistry::RegisterMCCodeEmitter(TheARM64Target, - createAArch64MCCodeEmitter); - - // Register the object streamer. - TargetRegistry::RegisterMCObjectStreamer(TheAArch64leTarget, - createMCStreamer); - TargetRegistry::RegisterMCObjectStreamer(TheAArch64beTarget, - createMCStreamer); - TargetRegistry::RegisterMCObjectStreamer(TheARM64Target, createMCStreamer); - - // Register the asm streamer. - TargetRegistry::RegisterAsmStreamer(TheAArch64leTarget, - createAArch64MCAsmStreamer); - TargetRegistry::RegisterAsmStreamer(TheAArch64beTarget, - createAArch64MCAsmStreamer); - TargetRegistry::RegisterAsmStreamer(TheARM64Target, - createAArch64MCAsmStreamer); - - // Register the MCInstPrinter. - TargetRegistry::RegisterMCInstPrinter(TheAArch64leTarget, - createAArch64MCInstPrinter); - TargetRegistry::RegisterMCInstPrinter(TheAArch64beTarget, - createAArch64MCInstPrinter); - TargetRegistry::RegisterMCInstPrinter(TheARM64Target, - createAArch64MCInstPrinter); } diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h index 1553115..4705bdf 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h +++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h @@ -28,18 +28,20 @@ class MCRegisterInfo; class MCObjectWriter; class MCStreamer; class MCSubtargetInfo; +class MCTargetStreamer; class StringRef; class Target; +class Triple; class raw_ostream; +class raw_pwrite_stream; extern Target TheAArch64leTarget; extern Target TheAArch64beTarget; extern Target TheARM64Target; MCCodeEmitter *createAArch64MCCodeEmitter(const MCInstrInfo &MCII, - const MCRegisterInfo &MRI, - const MCSubtargetInfo &STI, - MCContext &Ctx); + const MCRegisterInfo &MRI, + MCContext &Ctx); MCAsmBackend *createAArch64leAsmBackend(const Target &T, const MCRegisterInfo &MRI, StringRef TT, StringRef CPU); @@ -47,17 +49,22 @@ MCAsmBackend *createAArch64beAsmBackend(const Target &T, const MCRegisterInfo &MRI, StringRef TT, StringRef CPU); -MCObjectWriter *createAArch64ELFObjectWriter(raw_ostream &OS, uint8_t OSABI, +MCObjectWriter *createAArch64ELFObjectWriter(raw_pwrite_stream &OS, + uint8_t OSABI, bool IsLittleEndian); -MCObjectWriter *createAArch64MachObjectWriter(raw_ostream &OS, uint32_t CPUType, - uint32_t CPUSubtype); +MCObjectWriter *createAArch64MachObjectWriter(raw_pwrite_stream &OS, + uint32_t CPUType, + uint32_t CPUSubtype); + +MCTargetStreamer *createAArch64AsmTargetStreamer(MCStreamer &S, + formatted_raw_ostream &OS, + MCInstPrinter *InstPrint, + bool isVerboseAsm); + +MCTargetStreamer *createAArch64ObjectTargetStreamer(MCStreamer &S, + const MCSubtargetInfo &STI); -MCStreamer * -createAArch64MCAsmStreamer(MCContext &Ctx, formatted_raw_ostream &OS, - bool isVerboseAsm, bool useDwarfDirectory, - MCInstPrinter *InstPrint, MCCodeEmitter *CE, - MCAsmBackend *TAB, bool ShowInst); } // End llvm namespace // Defines symbolic names for AArch64 registers. This defines a mapping from diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp index e12a24b..d425975 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp +++ b/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp @@ -10,6 +10,7 @@ #include "MCTargetDesc/AArch64FixupKinds.h" #include "MCTargetDesc/AArch64MCTargetDesc.h" #include "llvm/ADT/Twine.h" +#include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCAsmLayout.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCContext.h" @@ -33,7 +34,7 @@ public: : MCMachObjectTargetWriter(true /* is64Bit */, CPUType, CPUSubtype, /*UseAggressiveSymbolFolding=*/true) {} - void RecordRelocation(MachObjectWriter *Writer, const MCAssembler &Asm, + void RecordRelocation(MachObjectWriter *Writer, MCAssembler &Asm, const MCAsmLayout &Layout, const MCFragment *Fragment, const MCFixup &Fixup, MCValue Target, uint64_t &FixedValue) override; @@ -91,7 +92,7 @@ bool AArch64MachObjectWriter::getAArch64FixupKindMachOInfo( // This encompasses the relocation for the whole 21-bit value. switch (Sym->getKind()) { default: - Asm.getContext().FatalError(Fixup.getLoc(), + Asm.getContext().reportFatalError(Fixup.getLoc(), "ADR/ADRP relocations must be GOT relative"); case MCSymbolRefExpr::VK_PAGE: RelocType = unsigned(MachO::ARM64_RELOC_PAGE21); @@ -112,8 +113,35 @@ bool AArch64MachObjectWriter::getAArch64FixupKindMachOInfo( } } +static bool canUseLocalRelocation(const MCSectionMachO &Section, + const MCSymbol &Symbol, unsigned Log2Size) { + // Debug info sections can use local relocations. + if (Section.hasAttribute(MachO::S_ATTR_DEBUG)) + return true; + + // Otherwise, only pointer sized relocations are supported. + if (Log2Size != 3) + return false; + + // But only if they don't point to a few forbidden sections. + if (!Symbol.isInSection()) + return true; + const MCSectionMachO &RefSec = cast<MCSectionMachO>(Symbol.getSection()); + if (RefSec.getType() == MachO::S_CSTRING_LITERALS) + return false; + + if (RefSec.getSegmentName() == "__DATA" && + RefSec.getSectionName() == "__objc_classrefs") + return false; + + // FIXME: ld64 currently handles internal pointer-sized relocations + // incorrectly (applying the addend twice). We should be able to return true + // unconditionally by this point when that's fixed. + return false; +} + void AArch64MachObjectWriter::RecordRelocation( - MachObjectWriter *Writer, const MCAssembler &Asm, const MCAsmLayout &Layout, + MachObjectWriter *Writer, MCAssembler &Asm, const MCAsmLayout &Layout, const MCFragment *Fragment, const MCFixup &Fixup, MCValue Target, uint64_t &FixedValue) { unsigned IsPCRel = Writer->isFixupKindPCRel(Asm, Fixup.getKind()); @@ -123,9 +151,9 @@ void AArch64MachObjectWriter::RecordRelocation( unsigned Log2Size = 0; int64_t Value = 0; unsigned Index = 0; - unsigned IsExtern = 0; unsigned Type = 0; unsigned Kind = Fixup.getKind(); + const MCSymbol *RelSymbol = nullptr; FixupOffset += Fixup.getOffset(); @@ -143,7 +171,7 @@ void AArch64MachObjectWriter::RecordRelocation( // assembler local symbols. If we got here, that's not what we have, // so complain loudly. if (Kind == AArch64::fixup_aarch64_pcrel_branch19) { - Asm.getContext().FatalError(Fixup.getLoc(), + Asm.getContext().reportFatalError(Fixup.getLoc(), "conditional branch requires assembler-local" " label. '" + Target.getSymA()->getSymbol().getName() + @@ -154,14 +182,14 @@ void AArch64MachObjectWriter::RecordRelocation( // 14-bit branch relocations should only target internal labels, and so // should never get here. if (Kind == AArch64::fixup_aarch64_pcrel_branch14) { - Asm.getContext().FatalError(Fixup.getLoc(), + Asm.getContext().reportFatalError(Fixup.getLoc(), "Invalid relocation on conditional branch!"); return; } if (!getAArch64FixupKindMachOInfo(Fixup, Type, Target.getSymA(), Log2Size, Asm)) { - Asm.getContext().FatalError(Fixup.getLoc(), "unknown AArch64 fixup kind!"); + Asm.getContext().reportFatalError(Fixup.getLoc(), "unknown AArch64 fixup kind!"); return; } @@ -171,11 +199,9 @@ void AArch64MachObjectWriter::RecordRelocation( // FIXME: Should this always be extern? // SymbolNum of 0 indicates the absolute section. Type = MachO::ARM64_RELOC_UNSIGNED; - Index = 0; if (IsPCRel) { - IsExtern = 1; - Asm.getContext().FatalError(Fixup.getLoc(), + Asm.getContext().reportFatalError(Fixup.getLoc(), "PC relative absolute relocation!"); // FIXME: x86_64 sets the type to a branch reloc here. Should we do @@ -184,39 +210,36 @@ void AArch64MachObjectWriter::RecordRelocation( } else if (Target.getSymB()) { // A - B + constant const MCSymbol *A = &Target.getSymA()->getSymbol(); const MCSymbolData &A_SD = Asm.getSymbolData(*A); - const MCSymbolData *A_Base = Asm.getAtom(&A_SD); + const MCSymbol *A_Base = Asm.getAtom(*A); const MCSymbol *B = &Target.getSymB()->getSymbol(); const MCSymbolData &B_SD = Asm.getSymbolData(*B); - const MCSymbolData *B_Base = Asm.getAtom(&B_SD); + const MCSymbol *B_Base = Asm.getAtom(*B); // Check for "_foo@got - .", which comes through here as: // Ltmp0: // ... _foo@got - Ltmp0 if (Target.getSymA()->getKind() == MCSymbolRefExpr::VK_GOT && Target.getSymB()->getKind() == MCSymbolRefExpr::VK_None && - Layout.getSymbolOffset(&B_SD) == + Layout.getSymbolOffset(*B) == Layout.getFragmentOffset(Fragment) + Fixup.getOffset()) { // SymB is the PC, so use a PC-rel pointer-to-GOT relocation. - Index = A_Base->getIndex(); - IsExtern = 1; Type = MachO::ARM64_RELOC_POINTER_TO_GOT; IsPCRel = 1; MachO::any_relocation_info MRE; MRE.r_word0 = FixupOffset; - MRE.r_word1 = ((Index << 0) | (IsPCRel << 24) | (Log2Size << 25) | - (IsExtern << 27) | (Type << 28)); - Writer->addRelocation(Fragment->getParent(), MRE); + MRE.r_word1 = (IsPCRel << 24) | (Log2Size << 25) | (Type << 28); + Writer->addRelocation(A_Base, Fragment->getParent(), MRE); return; } else if (Target.getSymA()->getKind() != MCSymbolRefExpr::VK_None || Target.getSymB()->getKind() != MCSymbolRefExpr::VK_None) // Otherwise, neither symbol can be modified. - Asm.getContext().FatalError(Fixup.getLoc(), + Asm.getContext().reportFatalError(Fixup.getLoc(), "unsupported relocation of modified symbol"); // We don't support PCrel relocations of differences. if (IsPCRel) - Asm.getContext().FatalError(Fixup.getLoc(), + Asm.getContext().reportFatalError(Fixup.getLoc(), "unsupported pc-relative relocation of " "difference"); @@ -227,50 +250,52 @@ void AArch64MachObjectWriter::RecordRelocation( // FIXME: We should probably just synthesize an external symbol and use // that. if (!A_Base) - Asm.getContext().FatalError( + Asm.getContext().reportFatalError( Fixup.getLoc(), "unsupported relocation of local symbol '" + A->getName() + "'. Must have non-local symbol earlier in section."); if (!B_Base) - Asm.getContext().FatalError( + Asm.getContext().reportFatalError( Fixup.getLoc(), "unsupported relocation of local symbol '" + B->getName() + "'. Must have non-local symbol earlier in section."); if (A_Base == B_Base && A_Base) - Asm.getContext().FatalError(Fixup.getLoc(), + Asm.getContext().reportFatalError(Fixup.getLoc(), "unsupported relocation with identical base"); - Value += (!A_SD.getFragment() ? 0 - : Writer->getSymbolAddress(&A_SD, Layout)) - - (!A_Base || !A_Base->getFragment() + Value += (!A_SD.getFragment() ? 0 : Writer->getSymbolAddress(*A, Layout)) - + (!A_Base || !A_Base->getData().getFragment() ? 0 - : Writer->getSymbolAddress(A_Base, Layout)); - Value -= (!B_SD.getFragment() ? 0 - : Writer->getSymbolAddress(&B_SD, Layout)) - - (!B_Base || !B_Base->getFragment() + : Writer->getSymbolAddress(*A_Base, Layout)); + Value -= (!B_SD.getFragment() ? 0 : Writer->getSymbolAddress(*B, Layout)) - + (!B_Base || !B_Base->getData().getFragment() ? 0 - : Writer->getSymbolAddress(B_Base, Layout)); + : Writer->getSymbolAddress(*B_Base, Layout)); - Index = A_Base->getIndex(); - IsExtern = 1; Type = MachO::ARM64_RELOC_UNSIGNED; MachO::any_relocation_info MRE; MRE.r_word0 = FixupOffset; - MRE.r_word1 = ((Index << 0) | (IsPCRel << 24) | (Log2Size << 25) | - (IsExtern << 27) | (Type << 28)); - Writer->addRelocation(Fragment->getParent(), MRE); + MRE.r_word1 = (IsPCRel << 24) | (Log2Size << 25) | (Type << 28); + Writer->addRelocation(A_Base, Fragment->getParent(), MRE); - Index = B_Base->getIndex(); - IsExtern = 1; + RelSymbol = B_Base; Type = MachO::ARM64_RELOC_SUBTRACTOR; } else { // A + constant const MCSymbol *Symbol = &Target.getSymA()->getSymbol(); - const MCSymbolData &SD = Asm.getSymbolData(*Symbol); - const MCSymbolData *Base = Asm.getAtom(&SD); - const MCSectionMachO &Section = static_cast<const MCSectionMachO &>( - Fragment->getParent()->getSection()); + const MCSectionMachO &Section = + static_cast<const MCSectionMachO &>(*Fragment->getParent()); + + bool CanUseLocalRelocation = + canUseLocalRelocation(Section, *Symbol, Log2Size); + if (Symbol->isTemporary() && (Value || !CanUseLocalRelocation)) { + const MCSection &Sec = Symbol->getSection(); + if (!Asm.getContext().getAsmInfo()->isSectionAtomizableBySymbols(Sec)) + Asm.addLocalUsedInReloc(*Symbol); + } + + const MCSymbol *Base = Asm.getAtom(*Symbol); // If the symbol is a variable and we weren't able to get a Base for it // (i.e., it's not in the symbol table associated with a section) resolve @@ -279,7 +304,7 @@ void AArch64MachObjectWriter::RecordRelocation( // If the evaluation is an absolute value, just use that directly // to keep things easy. int64_t Res; - if (SD.getSymbol().getVariableValue()->EvaluateAsAbsolute( + if (Symbol->getVariableValue()->EvaluateAsAbsolute( Res, Layout, Writer->getSectionAddressMap())) { FixedValue = Res; return; @@ -290,7 +315,7 @@ void AArch64MachObjectWriter::RecordRelocation( // the FixedValue? if (!Symbol->getVariableValue()->EvaluateAsRelocatable(Target, &Layout, &Fixup)) - Asm.getContext().FatalError(Fixup.getLoc(), + Asm.getContext().reportFatalError(Fixup.getLoc(), "unable to resolve variable '" + Symbol->getName() + "'"); return RecordRelocation(Writer, Asm, Layout, Fragment, Fixup, Target, @@ -310,42 +335,38 @@ void AArch64MachObjectWriter::RecordRelocation( // sections, and for pointer-sized relocations (.quad), we allow section // relocations. It's code sections that run into trouble. if (Base) { - Index = Base->getIndex(); - IsExtern = 1; + RelSymbol = Base; // Add the local offset, if needed. - if (Base != &SD) - Value += Layout.getSymbolOffset(&SD) - Layout.getSymbolOffset(Base); + if (Base != Symbol) + Value += + Layout.getSymbolOffset(*Symbol) - Layout.getSymbolOffset(*Base); } else if (Symbol->isInSection()) { - // Pointer-sized relocations can use a local relocation. Otherwise, - // we have to be in a debug info section. - if (!Section.hasAttribute(MachO::S_ATTR_DEBUG) && Log2Size != 3) - Asm.getContext().FatalError( + if (!CanUseLocalRelocation) + Asm.getContext().reportFatalError( Fixup.getLoc(), "unsupported relocation of local symbol '" + Symbol->getName() + "'. Must have non-local symbol earlier in section."); // Adjust the relocation to be section-relative. // The index is the section ordinal (1-based). - const MCSectionData &SymSD = - Asm.getSectionData(SD.getSymbol().getSection()); - Index = SymSD.getOrdinal() + 1; - IsExtern = 0; - Value += Writer->getSymbolAddress(&SD, Layout); + const MCSection &Sec = Symbol->getSection(); + Index = Sec.getOrdinal() + 1; + Value += Writer->getSymbolAddress(*Symbol, Layout); if (IsPCRel) Value -= Writer->getFragmentAddress(Fragment, Layout) + Fixup.getOffset() + (1ULL << Log2Size); } else { // Resolve constant variables. - if (SD.getSymbol().isVariable()) { + if (Symbol->isVariable()) { int64_t Res; - if (SD.getSymbol().getVariableValue()->EvaluateAsAbsolute( + if (Symbol->getVariableValue()->EvaluateAsAbsolute( Res, Layout, Writer->getSectionAddressMap())) { FixedValue = Res; return; } } - Asm.getContext().FatalError(Fixup.getLoc(), + Asm.getContext().reportFatalError(Fixup.getLoc(), "unsupported relocation of variable '" + Symbol->getName() + "'"); } @@ -362,16 +383,16 @@ void AArch64MachObjectWriter::RecordRelocation( MachO::any_relocation_info MRE; MRE.r_word0 = FixupOffset; - MRE.r_word1 = ((Index << 0) | (IsPCRel << 24) | (Log2Size << 25) | - (IsExtern << 27) | (Type << 28)); - Writer->addRelocation(Fragment->getParent(), MRE); + MRE.r_word1 = + (Index << 0) | (IsPCRel << 24) | (Log2Size << 25) | (Type << 28); + Writer->addRelocation(RelSymbol, Fragment->getParent(), MRE); // Now set up the Addend relocation. Type = MachO::ARM64_RELOC_ADDEND; Index = Value; + RelSymbol = nullptr; IsPCRel = 0; Log2Size = 2; - IsExtern = 0; // Put zero into the instruction itself. The addend is in the relocation. Value = 0; @@ -383,14 +404,14 @@ void AArch64MachObjectWriter::RecordRelocation( // struct relocation_info (8 bytes) MachO::any_relocation_info MRE; MRE.r_word0 = FixupOffset; - MRE.r_word1 = ((Index << 0) | (IsPCRel << 24) | (Log2Size << 25) | - (IsExtern << 27) | (Type << 28)); - Writer->addRelocation(Fragment->getParent(), MRE); + MRE.r_word1 = + (Index << 0) | (IsPCRel << 24) | (Log2Size << 25) | (Type << 28); + Writer->addRelocation(RelSymbol, Fragment->getParent(), MRE); } -MCObjectWriter *llvm::createAArch64MachObjectWriter(raw_ostream &OS, - uint32_t CPUType, - uint32_t CPUSubtype) { +MCObjectWriter *llvm::createAArch64MachObjectWriter(raw_pwrite_stream &OS, + uint32_t CPUType, + uint32_t CPUSubtype) { return createMachObjectWriter( new AArch64MachObjectWriter(CPUType, CPUSubtype), OS, /*IsLittleEndian=*/true); diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp index e3112fa..52b000d 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp +++ b/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp @@ -1,4 +1,4 @@ -//===- AArch64TargetStreamer.cpp - AArch64TargetStreamer class --*- C++ -*---------===// +//===- AArch64TargetStreamer.cpp - AArch64TargetStreamer class ------------===// // // The LLVM Compiler Infrastructure // @@ -10,12 +10,9 @@ // This file implements the AArch64TargetStreamer class. // //===----------------------------------------------------------------------===// -#include "llvm/ADT/MapVector.h" -#include "llvm/MC/ConstantPools.h" -#include "llvm/MC/MCContext.h" -#include "llvm/MC/MCExpr.h" -#include "llvm/MC/MCStreamer.h" +#include "AArch64TargetStreamer.h" +#include "llvm/MC/ConstantPools.h" using namespace llvm; // diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.h b/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.h new file mode 100644 index 0000000..fcc0d05 --- /dev/null +++ b/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.h @@ -0,0 +1,42 @@ +//===-- AArch64TargetStreamer.h - AArch64 Target Streamer ------*- C++ -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_AARCH64_MCTARGETDESC_AARCH64TARGETSTREAMER_H +#define LLVM_LIB_TARGET_AARCH64_MCTARGETDESC_AARCH64TARGETSTREAMER_H + +#include "llvm/MC/MCStreamer.h" + +namespace llvm { + +class AArch64TargetStreamer : public MCTargetStreamer { +public: + AArch64TargetStreamer(MCStreamer &S); + ~AArch64TargetStreamer() override; + + void finish() override; + + /// Callback used to implement the ldr= pseudo. + /// Add a new entry to the constant pool for the current section and return an + /// MCExpr that can be used to refer to the constant pool location. + const MCExpr *addConstantPoolEntry(const MCExpr *, unsigned Size); + + /// Callback used to implemnt the .ltorg directive. + /// Emit contents of constant pool for the current section. + void emitCurrentConstantPool(); + + /// Callback used to implement the .inst directive. + virtual void emitInst(uint32_t Inst); + +private: + std::unique_ptr<AssemblerConstantPools> ConstantPools; +}; + +} // end namespace llvm + +#endif diff --git a/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp b/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp index bc6c7a9..28b8e7e 100644 --- a/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp +++ b/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp @@ -18,11 +18,12 @@ using namespace llvm; -StringRef AArch64NamedImmMapper::toString(uint32_t Value, bool &Valid) const { - for (unsigned i = 0; i < NumPairs; ++i) { - if (Pairs[i].Value == Value) { +StringRef AArch64NamedImmMapper::toString(uint32_t Value, + const FeatureBitset& FeatureBits, bool &Valid) const { + for (unsigned i = 0; i < NumMappings; ++i) { + if (Mappings[i].isValueEqual(Value, FeatureBits)) { Valid = true; - return Pairs[i].Name; + return Mappings[i].Name; } } @@ -30,12 +31,13 @@ StringRef AArch64NamedImmMapper::toString(uint32_t Value, bool &Valid) const { return StringRef(); } -uint32_t AArch64NamedImmMapper::fromString(StringRef Name, bool &Valid) const { +uint32_t AArch64NamedImmMapper::fromString(StringRef Name, + const FeatureBitset& FeatureBits, bool &Valid) const { std::string LowerCaseName = Name.lower(); - for (unsigned i = 0; i < NumPairs; ++i) { - if (Pairs[i].Name == LowerCaseName) { + for (unsigned i = 0; i < NumMappings; ++i) { + if (Mappings[i].isNameEqual(LowerCaseName, FeatureBits)) { Valid = true; - return Pairs[i].Value; + return Mappings[i].Value; } } @@ -47,747 +49,781 @@ bool AArch64NamedImmMapper::validImm(uint32_t Value) const { return Value < TooBigImm; } -const AArch64NamedImmMapper::Mapping AArch64AT::ATMapper::ATPairs[] = { - {"s1e1r", S1E1R}, - {"s1e2r", S1E2R}, - {"s1e3r", S1E3R}, - {"s1e1w", S1E1W}, - {"s1e2w", S1E2W}, - {"s1e3w", S1E3W}, - {"s1e0r", S1E0R}, - {"s1e0w", S1E0W}, - {"s12e1r", S12E1R}, - {"s12e1w", S12E1W}, - {"s12e0r", S12E0R}, - {"s12e0w", S12E0W}, +const AArch64NamedImmMapper::Mapping AArch64AT::ATMapper::ATMappings[] = { + {"s1e1r", S1E1R, {}}, + {"s1e2r", S1E2R, {}}, + {"s1e3r", S1E3R, {}}, + {"s1e1w", S1E1W, {}}, + {"s1e2w", S1E2W, {}}, + {"s1e3w", S1E3W, {}}, + {"s1e0r", S1E0R, {}}, + {"s1e0w", S1E0W, {}}, + {"s12e1r", S12E1R, {}}, + {"s12e1w", S12E1W, {}}, + {"s12e0r", S12E0R, {}}, + {"s12e0w", S12E0W, {}}, }; AArch64AT::ATMapper::ATMapper() - : AArch64NamedImmMapper(ATPairs, 0) {} - -const AArch64NamedImmMapper::Mapping AArch64DB::DBarrierMapper::DBarrierPairs[] = { - {"oshld", OSHLD}, - {"oshst", OSHST}, - {"osh", OSH}, - {"nshld", NSHLD}, - {"nshst", NSHST}, - {"nsh", NSH}, - {"ishld", ISHLD}, - {"ishst", ISHST}, - {"ish", ISH}, - {"ld", LD}, - {"st", ST}, - {"sy", SY} + : AArch64NamedImmMapper(ATMappings, 0) {} + +const AArch64NamedImmMapper::Mapping AArch64DB::DBarrierMapper::DBarrierMappings[] = { + {"oshld", OSHLD, {}}, + {"oshst", OSHST, {}}, + {"osh", OSH, {}}, + {"nshld", NSHLD, {}}, + {"nshst", NSHST, {}}, + {"nsh", NSH, {}}, + {"ishld", ISHLD, {}}, + {"ishst", ISHST, {}}, + {"ish", ISH, {}}, + {"ld", LD, {}}, + {"st", ST, {}}, + {"sy", SY, {}} }; AArch64DB::DBarrierMapper::DBarrierMapper() - : AArch64NamedImmMapper(DBarrierPairs, 16u) {} - -const AArch64NamedImmMapper::Mapping AArch64DC::DCMapper::DCPairs[] = { - {"zva", ZVA}, - {"ivac", IVAC}, - {"isw", ISW}, - {"cvac", CVAC}, - {"csw", CSW}, - {"cvau", CVAU}, - {"civac", CIVAC}, - {"cisw", CISW} + : AArch64NamedImmMapper(DBarrierMappings, 16u) {} + +const AArch64NamedImmMapper::Mapping AArch64DC::DCMapper::DCMappings[] = { + {"zva", ZVA, {}}, + {"ivac", IVAC, {}}, + {"isw", ISW, {}}, + {"cvac", CVAC, {}}, + {"csw", CSW, {}}, + {"cvau", CVAU, {}}, + {"civac", CIVAC, {}}, + {"cisw", CISW, {}} }; AArch64DC::DCMapper::DCMapper() - : AArch64NamedImmMapper(DCPairs, 0) {} + : AArch64NamedImmMapper(DCMappings, 0) {} -const AArch64NamedImmMapper::Mapping AArch64IC::ICMapper::ICPairs[] = { - {"ialluis", IALLUIS}, - {"iallu", IALLU}, - {"ivau", IVAU} +const AArch64NamedImmMapper::Mapping AArch64IC::ICMapper::ICMappings[] = { + {"ialluis", IALLUIS, {}}, + {"iallu", IALLU, {}}, + {"ivau", IVAU, {}} }; AArch64IC::ICMapper::ICMapper() - : AArch64NamedImmMapper(ICPairs, 0) {} + : AArch64NamedImmMapper(ICMappings, 0) {} -const AArch64NamedImmMapper::Mapping AArch64ISB::ISBMapper::ISBPairs[] = { - {"sy", SY}, +const AArch64NamedImmMapper::Mapping AArch64ISB::ISBMapper::ISBMappings[] = { + {"sy", SY, {}}, }; AArch64ISB::ISBMapper::ISBMapper() - : AArch64NamedImmMapper(ISBPairs, 16) {} - -const AArch64NamedImmMapper::Mapping AArch64PRFM::PRFMMapper::PRFMPairs[] = { - {"pldl1keep", PLDL1KEEP}, - {"pldl1strm", PLDL1STRM}, - {"pldl2keep", PLDL2KEEP}, - {"pldl2strm", PLDL2STRM}, - {"pldl3keep", PLDL3KEEP}, - {"pldl3strm", PLDL3STRM}, - {"plil1keep", PLIL1KEEP}, - {"plil1strm", PLIL1STRM}, - {"plil2keep", PLIL2KEEP}, - {"plil2strm", PLIL2STRM}, - {"plil3keep", PLIL3KEEP}, - {"plil3strm", PLIL3STRM}, - {"pstl1keep", PSTL1KEEP}, - {"pstl1strm", PSTL1STRM}, - {"pstl2keep", PSTL2KEEP}, - {"pstl2strm", PSTL2STRM}, - {"pstl3keep", PSTL3KEEP}, - {"pstl3strm", PSTL3STRM} + : AArch64NamedImmMapper(ISBMappings, 16) {} + +const AArch64NamedImmMapper::Mapping AArch64PRFM::PRFMMapper::PRFMMappings[] = { + {"pldl1keep", PLDL1KEEP, {}}, + {"pldl1strm", PLDL1STRM, {}}, + {"pldl2keep", PLDL2KEEP, {}}, + {"pldl2strm", PLDL2STRM, {}}, + {"pldl3keep", PLDL3KEEP, {}}, + {"pldl3strm", PLDL3STRM, {}}, + {"plil1keep", PLIL1KEEP, {}}, + {"plil1strm", PLIL1STRM, {}}, + {"plil2keep", PLIL2KEEP, {}}, + {"plil2strm", PLIL2STRM, {}}, + {"plil3keep", PLIL3KEEP, {}}, + {"plil3strm", PLIL3STRM, {}}, + {"pstl1keep", PSTL1KEEP, {}}, + {"pstl1strm", PSTL1STRM, {}}, + {"pstl2keep", PSTL2KEEP, {}}, + {"pstl2strm", PSTL2STRM, {}}, + {"pstl3keep", PSTL3KEEP, {}}, + {"pstl3strm", PSTL3STRM, {}} }; AArch64PRFM::PRFMMapper::PRFMMapper() - : AArch64NamedImmMapper(PRFMPairs, 32) {} + : AArch64NamedImmMapper(PRFMMappings, 32) {} -const AArch64NamedImmMapper::Mapping AArch64PState::PStateMapper::PStatePairs[] = { - {"spsel", SPSel}, - {"daifset", DAIFSet}, - {"daifclr", DAIFClr} +const AArch64NamedImmMapper::Mapping AArch64PState::PStateMapper::PStateMappings[] = { + {"spsel", SPSel, {}}, + {"daifset", DAIFSet, {}}, + {"daifclr", DAIFClr, {}}, + + // v8.1a "Privileged Access Never" extension-specific PStates + {"pan", PAN, {AArch64::HasV8_1aOps}}, }; AArch64PState::PStateMapper::PStateMapper() - : AArch64NamedImmMapper(PStatePairs, 0) {} - -const AArch64NamedImmMapper::Mapping AArch64SysReg::MRSMapper::MRSPairs[] = { - {"mdccsr_el0", MDCCSR_EL0}, - {"dbgdtrrx_el0", DBGDTRRX_EL0}, - {"mdrar_el1", MDRAR_EL1}, - {"oslsr_el1", OSLSR_EL1}, - {"dbgauthstatus_el1", DBGAUTHSTATUS_EL1}, - {"pmceid0_el0", PMCEID0_EL0}, - {"pmceid1_el0", PMCEID1_EL0}, - {"midr_el1", MIDR_EL1}, - {"ccsidr_el1", CCSIDR_EL1}, - {"clidr_el1", CLIDR_EL1}, - {"ctr_el0", CTR_EL0}, - {"mpidr_el1", MPIDR_EL1}, - {"revidr_el1", REVIDR_EL1}, - {"aidr_el1", AIDR_EL1}, - {"dczid_el0", DCZID_EL0}, - {"id_pfr0_el1", ID_PFR0_EL1}, - {"id_pfr1_el1", ID_PFR1_EL1}, - {"id_dfr0_el1", ID_DFR0_EL1}, - {"id_afr0_el1", ID_AFR0_EL1}, - {"id_mmfr0_el1", ID_MMFR0_EL1}, - {"id_mmfr1_el1", ID_MMFR1_EL1}, - {"id_mmfr2_el1", ID_MMFR2_EL1}, - {"id_mmfr3_el1", ID_MMFR3_EL1}, - {"id_isar0_el1", ID_ISAR0_EL1}, - {"id_isar1_el1", ID_ISAR1_EL1}, - {"id_isar2_el1", ID_ISAR2_EL1}, - {"id_isar3_el1", ID_ISAR3_EL1}, - {"id_isar4_el1", ID_ISAR4_EL1}, - {"id_isar5_el1", ID_ISAR5_EL1}, - {"id_aa64pfr0_el1", ID_A64PFR0_EL1}, - {"id_aa64pfr1_el1", ID_A64PFR1_EL1}, - {"id_aa64dfr0_el1", ID_A64DFR0_EL1}, - {"id_aa64dfr1_el1", ID_A64DFR1_EL1}, - {"id_aa64afr0_el1", ID_A64AFR0_EL1}, - {"id_aa64afr1_el1", ID_A64AFR1_EL1}, - {"id_aa64isar0_el1", ID_A64ISAR0_EL1}, - {"id_aa64isar1_el1", ID_A64ISAR1_EL1}, - {"id_aa64mmfr0_el1", ID_A64MMFR0_EL1}, - {"id_aa64mmfr1_el1", ID_A64MMFR1_EL1}, - {"mvfr0_el1", MVFR0_EL1}, - {"mvfr1_el1", MVFR1_EL1}, - {"mvfr2_el1", MVFR2_EL1}, - {"rvbar_el1", RVBAR_EL1}, - {"rvbar_el2", RVBAR_EL2}, - {"rvbar_el3", RVBAR_EL3}, - {"isr_el1", ISR_EL1}, - {"cntpct_el0", CNTPCT_EL0}, - {"cntvct_el0", CNTVCT_EL0}, + : AArch64NamedImmMapper(PStateMappings, 0) {} + +const AArch64NamedImmMapper::Mapping AArch64SysReg::MRSMapper::MRSMappings[] = { + {"mdccsr_el0", MDCCSR_EL0, {}}, + {"dbgdtrrx_el0", DBGDTRRX_EL0, {}}, + {"mdrar_el1", MDRAR_EL1, {}}, + {"oslsr_el1", OSLSR_EL1, {}}, + {"dbgauthstatus_el1", DBGAUTHSTATUS_EL1, {}}, + {"pmceid0_el0", PMCEID0_EL0, {}}, + {"pmceid1_el0", PMCEID1_EL0, {}}, + {"midr_el1", MIDR_EL1, {}}, + {"ccsidr_el1", CCSIDR_EL1, {}}, + {"clidr_el1", CLIDR_EL1, {}}, + {"ctr_el0", CTR_EL0, {}}, + {"mpidr_el1", MPIDR_EL1, {}}, + {"revidr_el1", REVIDR_EL1, {}}, + {"aidr_el1", AIDR_EL1, {}}, + {"dczid_el0", DCZID_EL0, {}}, + {"id_pfr0_el1", ID_PFR0_EL1, {}}, + {"id_pfr1_el1", ID_PFR1_EL1, {}}, + {"id_dfr0_el1", ID_DFR0_EL1, {}}, + {"id_afr0_el1", ID_AFR0_EL1, {}}, + {"id_mmfr0_el1", ID_MMFR0_EL1, {}}, + {"id_mmfr1_el1", ID_MMFR1_EL1, {}}, + {"id_mmfr2_el1", ID_MMFR2_EL1, {}}, + {"id_mmfr3_el1", ID_MMFR3_EL1, {}}, + {"id_isar0_el1", ID_ISAR0_EL1, {}}, + {"id_isar1_el1", ID_ISAR1_EL1, {}}, + {"id_isar2_el1", ID_ISAR2_EL1, {}}, + {"id_isar3_el1", ID_ISAR3_EL1, {}}, + {"id_isar4_el1", ID_ISAR4_EL1, {}}, + {"id_isar5_el1", ID_ISAR5_EL1, {}}, + {"id_aa64pfr0_el1", ID_A64PFR0_EL1, {}}, + {"id_aa64pfr1_el1", ID_A64PFR1_EL1, {}}, + {"id_aa64dfr0_el1", ID_A64DFR0_EL1, {}}, + {"id_aa64dfr1_el1", ID_A64DFR1_EL1, {}}, + {"id_aa64afr0_el1", ID_A64AFR0_EL1, {}}, + {"id_aa64afr1_el1", ID_A64AFR1_EL1, {}}, + {"id_aa64isar0_el1", ID_A64ISAR0_EL1, {}}, + {"id_aa64isar1_el1", ID_A64ISAR1_EL1, {}}, + {"id_aa64mmfr0_el1", ID_A64MMFR0_EL1, {}}, + {"id_aa64mmfr1_el1", ID_A64MMFR1_EL1, {}}, + {"mvfr0_el1", MVFR0_EL1, {}}, + {"mvfr1_el1", MVFR1_EL1, {}}, + {"mvfr2_el1", MVFR2_EL1, {}}, + {"rvbar_el1", RVBAR_EL1, {}}, + {"rvbar_el2", RVBAR_EL2, {}}, + {"rvbar_el3", RVBAR_EL3, {}}, + {"isr_el1", ISR_EL1, {}}, + {"cntpct_el0", CNTPCT_EL0, {}}, + {"cntvct_el0", CNTVCT_EL0, {}}, // Trace registers - {"trcstatr", TRCSTATR}, - {"trcidr8", TRCIDR8}, - {"trcidr9", TRCIDR9}, - {"trcidr10", TRCIDR10}, - {"trcidr11", TRCIDR11}, - {"trcidr12", TRCIDR12}, - {"trcidr13", TRCIDR13}, - {"trcidr0", TRCIDR0}, - {"trcidr1", TRCIDR1}, - {"trcidr2", TRCIDR2}, - {"trcidr3", TRCIDR3}, - {"trcidr4", TRCIDR4}, - {"trcidr5", TRCIDR5}, - {"trcidr6", TRCIDR6}, - {"trcidr7", TRCIDR7}, - {"trcoslsr", TRCOSLSR}, - {"trcpdsr", TRCPDSR}, - {"trcdevaff0", TRCDEVAFF0}, - {"trcdevaff1", TRCDEVAFF1}, - {"trclsr", TRCLSR}, - {"trcauthstatus", TRCAUTHSTATUS}, - {"trcdevarch", TRCDEVARCH}, - {"trcdevid", TRCDEVID}, - {"trcdevtype", TRCDEVTYPE}, - {"trcpidr4", TRCPIDR4}, - {"trcpidr5", TRCPIDR5}, - {"trcpidr6", TRCPIDR6}, - {"trcpidr7", TRCPIDR7}, - {"trcpidr0", TRCPIDR0}, - {"trcpidr1", TRCPIDR1}, - {"trcpidr2", TRCPIDR2}, - {"trcpidr3", TRCPIDR3}, - {"trccidr0", TRCCIDR0}, - {"trccidr1", TRCCIDR1}, - {"trccidr2", TRCCIDR2}, - {"trccidr3", TRCCIDR3}, + {"trcstatr", TRCSTATR, {}}, + {"trcidr8", TRCIDR8, {}}, + {"trcidr9", TRCIDR9, {}}, + {"trcidr10", TRCIDR10, {}}, + {"trcidr11", TRCIDR11, {}}, + {"trcidr12", TRCIDR12, {}}, + {"trcidr13", TRCIDR13, {}}, + {"trcidr0", TRCIDR0, {}}, + {"trcidr1", TRCIDR1, {}}, + {"trcidr2", TRCIDR2, {}}, + {"trcidr3", TRCIDR3, {}}, + {"trcidr4", TRCIDR4, {}}, + {"trcidr5", TRCIDR5, {}}, + {"trcidr6", TRCIDR6, {}}, + {"trcidr7", TRCIDR7, {}}, + {"trcoslsr", TRCOSLSR, {}}, + {"trcpdsr", TRCPDSR, {}}, + {"trcdevaff0", TRCDEVAFF0, {}}, + {"trcdevaff1", TRCDEVAFF1, {}}, + {"trclsr", TRCLSR, {}}, + {"trcauthstatus", TRCAUTHSTATUS, {}}, + {"trcdevarch", TRCDEVARCH, {}}, + {"trcdevid", TRCDEVID, {}}, + {"trcdevtype", TRCDEVTYPE, {}}, + {"trcpidr4", TRCPIDR4, {}}, + {"trcpidr5", TRCPIDR5, {}}, + {"trcpidr6", TRCPIDR6, {}}, + {"trcpidr7", TRCPIDR7, {}}, + {"trcpidr0", TRCPIDR0, {}}, + {"trcpidr1", TRCPIDR1, {}}, + {"trcpidr2", TRCPIDR2, {}}, + {"trcpidr3", TRCPIDR3, {}}, + {"trccidr0", TRCCIDR0, {}}, + {"trccidr1", TRCCIDR1, {}}, + {"trccidr2", TRCCIDR2, {}}, + {"trccidr3", TRCCIDR3, {}}, // GICv3 registers - {"icc_iar1_el1", ICC_IAR1_EL1}, - {"icc_iar0_el1", ICC_IAR0_EL1}, - {"icc_hppir1_el1", ICC_HPPIR1_EL1}, - {"icc_hppir0_el1", ICC_HPPIR0_EL1}, - {"icc_rpr_el1", ICC_RPR_EL1}, - {"ich_vtr_el2", ICH_VTR_EL2}, - {"ich_eisr_el2", ICH_EISR_EL2}, - {"ich_elsr_el2", ICH_ELSR_EL2} + {"icc_iar1_el1", ICC_IAR1_EL1, {}}, + {"icc_iar0_el1", ICC_IAR0_EL1, {}}, + {"icc_hppir1_el1", ICC_HPPIR1_EL1, {}}, + {"icc_hppir0_el1", ICC_HPPIR0_EL1, {}}, + {"icc_rpr_el1", ICC_RPR_EL1, {}}, + {"ich_vtr_el2", ICH_VTR_EL2, {}}, + {"ich_eisr_el2", ICH_EISR_EL2, {}}, + {"ich_elsr_el2", ICH_ELSR_EL2, {}}, + + // v8.1a "Limited Ordering Regions" extension-specific system registers + {"lorid_el1", LORID_EL1, {AArch64::HasV8_1aOps}}, }; -AArch64SysReg::MRSMapper::MRSMapper(uint64_t FeatureBits) - : SysRegMapper(FeatureBits) { - InstPairs = &MRSPairs[0]; - NumInstPairs = llvm::array_lengthof(MRSPairs); +AArch64SysReg::MRSMapper::MRSMapper() { + InstMappings = &MRSMappings[0]; + NumInstMappings = llvm::array_lengthof(MRSMappings); } -const AArch64NamedImmMapper::Mapping AArch64SysReg::MSRMapper::MSRPairs[] = { - {"dbgdtrtx_el0", DBGDTRTX_EL0}, - {"oslar_el1", OSLAR_EL1}, - {"pmswinc_el0", PMSWINC_EL0}, +const AArch64NamedImmMapper::Mapping AArch64SysReg::MSRMapper::MSRMappings[] = { + {"dbgdtrtx_el0", DBGDTRTX_EL0, {}}, + {"oslar_el1", OSLAR_EL1, {}}, + {"pmswinc_el0", PMSWINC_EL0, {}}, // Trace registers - {"trcoslar", TRCOSLAR}, - {"trclar", TRCLAR}, + {"trcoslar", TRCOSLAR, {}}, + {"trclar", TRCLAR, {}}, // GICv3 registers - {"icc_eoir1_el1", ICC_EOIR1_EL1}, - {"icc_eoir0_el1", ICC_EOIR0_EL1}, - {"icc_dir_el1", ICC_DIR_EL1}, - {"icc_sgi1r_el1", ICC_SGI1R_EL1}, - {"icc_asgi1r_el1", ICC_ASGI1R_EL1}, - {"icc_sgi0r_el1", ICC_SGI0R_EL1} + {"icc_eoir1_el1", ICC_EOIR1_EL1, {}}, + {"icc_eoir0_el1", ICC_EOIR0_EL1, {}}, + {"icc_dir_el1", ICC_DIR_EL1, {}}, + {"icc_sgi1r_el1", ICC_SGI1R_EL1, {}}, + {"icc_asgi1r_el1", ICC_ASGI1R_EL1, {}}, + {"icc_sgi0r_el1", ICC_SGI0R_EL1, {}}, + + // v8.1a "Privileged Access Never" extension-specific system registers + {"pan", PAN, {AArch64::HasV8_1aOps}}, }; -AArch64SysReg::MSRMapper::MSRMapper(uint64_t FeatureBits) - : SysRegMapper(FeatureBits) { - InstPairs = &MSRPairs[0]; - NumInstPairs = llvm::array_lengthof(MSRPairs); +AArch64SysReg::MSRMapper::MSRMapper() { + InstMappings = &MSRMappings[0]; + NumInstMappings = llvm::array_lengthof(MSRMappings); } -const AArch64NamedImmMapper::Mapping AArch64SysReg::SysRegMapper::SysRegPairs[] = { - {"osdtrrx_el1", OSDTRRX_EL1}, - {"osdtrtx_el1", OSDTRTX_EL1}, - {"teecr32_el1", TEECR32_EL1}, - {"mdccint_el1", MDCCINT_EL1}, - {"mdscr_el1", MDSCR_EL1}, - {"dbgdtr_el0", DBGDTR_EL0}, - {"oseccr_el1", OSECCR_EL1}, - {"dbgvcr32_el2", DBGVCR32_EL2}, - {"dbgbvr0_el1", DBGBVR0_EL1}, - {"dbgbvr1_el1", DBGBVR1_EL1}, - {"dbgbvr2_el1", DBGBVR2_EL1}, - {"dbgbvr3_el1", DBGBVR3_EL1}, - {"dbgbvr4_el1", DBGBVR4_EL1}, - {"dbgbvr5_el1", DBGBVR5_EL1}, - {"dbgbvr6_el1", DBGBVR6_EL1}, - {"dbgbvr7_el1", DBGBVR7_EL1}, - {"dbgbvr8_el1", DBGBVR8_EL1}, - {"dbgbvr9_el1", DBGBVR9_EL1}, - {"dbgbvr10_el1", DBGBVR10_EL1}, - {"dbgbvr11_el1", DBGBVR11_EL1}, - {"dbgbvr12_el1", DBGBVR12_EL1}, - {"dbgbvr13_el1", DBGBVR13_EL1}, - {"dbgbvr14_el1", DBGBVR14_EL1}, - {"dbgbvr15_el1", DBGBVR15_EL1}, - {"dbgbcr0_el1", DBGBCR0_EL1}, - {"dbgbcr1_el1", DBGBCR1_EL1}, - {"dbgbcr2_el1", DBGBCR2_EL1}, - {"dbgbcr3_el1", DBGBCR3_EL1}, - {"dbgbcr4_el1", DBGBCR4_EL1}, - {"dbgbcr5_el1", DBGBCR5_EL1}, - {"dbgbcr6_el1", DBGBCR6_EL1}, - {"dbgbcr7_el1", DBGBCR7_EL1}, - {"dbgbcr8_el1", DBGBCR8_EL1}, - {"dbgbcr9_el1", DBGBCR9_EL1}, - {"dbgbcr10_el1", DBGBCR10_EL1}, - {"dbgbcr11_el1", DBGBCR11_EL1}, - {"dbgbcr12_el1", DBGBCR12_EL1}, - {"dbgbcr13_el1", DBGBCR13_EL1}, - {"dbgbcr14_el1", DBGBCR14_EL1}, - {"dbgbcr15_el1", DBGBCR15_EL1}, - {"dbgwvr0_el1", DBGWVR0_EL1}, - {"dbgwvr1_el1", DBGWVR1_EL1}, - {"dbgwvr2_el1", DBGWVR2_EL1}, - {"dbgwvr3_el1", DBGWVR3_EL1}, - {"dbgwvr4_el1", DBGWVR4_EL1}, - {"dbgwvr5_el1", DBGWVR5_EL1}, - {"dbgwvr6_el1", DBGWVR6_EL1}, - {"dbgwvr7_el1", DBGWVR7_EL1}, - {"dbgwvr8_el1", DBGWVR8_EL1}, - {"dbgwvr9_el1", DBGWVR9_EL1}, - {"dbgwvr10_el1", DBGWVR10_EL1}, - {"dbgwvr11_el1", DBGWVR11_EL1}, - {"dbgwvr12_el1", DBGWVR12_EL1}, - {"dbgwvr13_el1", DBGWVR13_EL1}, - {"dbgwvr14_el1", DBGWVR14_EL1}, - {"dbgwvr15_el1", DBGWVR15_EL1}, - {"dbgwcr0_el1", DBGWCR0_EL1}, - {"dbgwcr1_el1", DBGWCR1_EL1}, - {"dbgwcr2_el1", DBGWCR2_EL1}, - {"dbgwcr3_el1", DBGWCR3_EL1}, - {"dbgwcr4_el1", DBGWCR4_EL1}, - {"dbgwcr5_el1", DBGWCR5_EL1}, - {"dbgwcr6_el1", DBGWCR6_EL1}, - {"dbgwcr7_el1", DBGWCR7_EL1}, - {"dbgwcr8_el1", DBGWCR8_EL1}, - {"dbgwcr9_el1", DBGWCR9_EL1}, - {"dbgwcr10_el1", DBGWCR10_EL1}, - {"dbgwcr11_el1", DBGWCR11_EL1}, - {"dbgwcr12_el1", DBGWCR12_EL1}, - {"dbgwcr13_el1", DBGWCR13_EL1}, - {"dbgwcr14_el1", DBGWCR14_EL1}, - {"dbgwcr15_el1", DBGWCR15_EL1}, - {"teehbr32_el1", TEEHBR32_EL1}, - {"osdlr_el1", OSDLR_EL1}, - {"dbgprcr_el1", DBGPRCR_EL1}, - {"dbgclaimset_el1", DBGCLAIMSET_EL1}, - {"dbgclaimclr_el1", DBGCLAIMCLR_EL1}, - {"csselr_el1", CSSELR_EL1}, - {"vpidr_el2", VPIDR_EL2}, - {"vmpidr_el2", VMPIDR_EL2}, - {"sctlr_el1", SCTLR_EL1}, - {"sctlr_el2", SCTLR_EL2}, - {"sctlr_el3", SCTLR_EL3}, - {"actlr_el1", ACTLR_EL1}, - {"actlr_el2", ACTLR_EL2}, - {"actlr_el3", ACTLR_EL3}, - {"cpacr_el1", CPACR_EL1}, - {"hcr_el2", HCR_EL2}, - {"scr_el3", SCR_EL3}, - {"mdcr_el2", MDCR_EL2}, - {"sder32_el3", SDER32_EL3}, - {"cptr_el2", CPTR_EL2}, - {"cptr_el3", CPTR_EL3}, - {"hstr_el2", HSTR_EL2}, - {"hacr_el2", HACR_EL2}, - {"mdcr_el3", MDCR_EL3}, - {"ttbr0_el1", TTBR0_EL1}, - {"ttbr0_el2", TTBR0_EL2}, - {"ttbr0_el3", TTBR0_EL3}, - {"ttbr1_el1", TTBR1_EL1}, - {"tcr_el1", TCR_EL1}, - {"tcr_el2", TCR_EL2}, - {"tcr_el3", TCR_EL3}, - {"vttbr_el2", VTTBR_EL2}, - {"vtcr_el2", VTCR_EL2}, - {"dacr32_el2", DACR32_EL2}, - {"spsr_el1", SPSR_EL1}, - {"spsr_el2", SPSR_EL2}, - {"spsr_el3", SPSR_EL3}, - {"elr_el1", ELR_EL1}, - {"elr_el2", ELR_EL2}, - {"elr_el3", ELR_EL3}, - {"sp_el0", SP_EL0}, - {"sp_el1", SP_EL1}, - {"sp_el2", SP_EL2}, - {"spsel", SPSel}, - {"nzcv", NZCV}, - {"daif", DAIF}, - {"currentel", CurrentEL}, - {"spsr_irq", SPSR_irq}, - {"spsr_abt", SPSR_abt}, - {"spsr_und", SPSR_und}, - {"spsr_fiq", SPSR_fiq}, - {"fpcr", FPCR}, - {"fpsr", FPSR}, - {"dspsr_el0", DSPSR_EL0}, - {"dlr_el0", DLR_EL0}, - {"ifsr32_el2", IFSR32_EL2}, - {"afsr0_el1", AFSR0_EL1}, - {"afsr0_el2", AFSR0_EL2}, - {"afsr0_el3", AFSR0_EL3}, - {"afsr1_el1", AFSR1_EL1}, - {"afsr1_el2", AFSR1_EL2}, - {"afsr1_el3", AFSR1_EL3}, - {"esr_el1", ESR_EL1}, - {"esr_el2", ESR_EL2}, - {"esr_el3", ESR_EL3}, - {"fpexc32_el2", FPEXC32_EL2}, - {"far_el1", FAR_EL1}, - {"far_el2", FAR_EL2}, - {"far_el3", FAR_EL3}, - {"hpfar_el2", HPFAR_EL2}, - {"par_el1", PAR_EL1}, - {"pmcr_el0", PMCR_EL0}, - {"pmcntenset_el0", PMCNTENSET_EL0}, - {"pmcntenclr_el0", PMCNTENCLR_EL0}, - {"pmovsclr_el0", PMOVSCLR_EL0}, - {"pmselr_el0", PMSELR_EL0}, - {"pmccntr_el0", PMCCNTR_EL0}, - {"pmxevtyper_el0", PMXEVTYPER_EL0}, - {"pmxevcntr_el0", PMXEVCNTR_EL0}, - {"pmuserenr_el0", PMUSERENR_EL0}, - {"pmintenset_el1", PMINTENSET_EL1}, - {"pmintenclr_el1", PMINTENCLR_EL1}, - {"pmovsset_el0", PMOVSSET_EL0}, - {"mair_el1", MAIR_EL1}, - {"mair_el2", MAIR_EL2}, - {"mair_el3", MAIR_EL3}, - {"amair_el1", AMAIR_EL1}, - {"amair_el2", AMAIR_EL2}, - {"amair_el3", AMAIR_EL3}, - {"vbar_el1", VBAR_EL1}, - {"vbar_el2", VBAR_EL2}, - {"vbar_el3", VBAR_EL3}, - {"rmr_el1", RMR_EL1}, - {"rmr_el2", RMR_EL2}, - {"rmr_el3", RMR_EL3}, - {"contextidr_el1", CONTEXTIDR_EL1}, - {"tpidr_el0", TPIDR_EL0}, - {"tpidr_el2", TPIDR_EL2}, - {"tpidr_el3", TPIDR_EL3}, - {"tpidrro_el0", TPIDRRO_EL0}, - {"tpidr_el1", TPIDR_EL1}, - {"cntfrq_el0", CNTFRQ_EL0}, - {"cntvoff_el2", CNTVOFF_EL2}, - {"cntkctl_el1", CNTKCTL_EL1}, - {"cnthctl_el2", CNTHCTL_EL2}, - {"cntp_tval_el0", CNTP_TVAL_EL0}, - {"cnthp_tval_el2", CNTHP_TVAL_EL2}, - {"cntps_tval_el1", CNTPS_TVAL_EL1}, - {"cntp_ctl_el0", CNTP_CTL_EL0}, - {"cnthp_ctl_el2", CNTHP_CTL_EL2}, - {"cntps_ctl_el1", CNTPS_CTL_EL1}, - {"cntp_cval_el0", CNTP_CVAL_EL0}, - {"cnthp_cval_el2", CNTHP_CVAL_EL2}, - {"cntps_cval_el1", CNTPS_CVAL_EL1}, - {"cntv_tval_el0", CNTV_TVAL_EL0}, - {"cntv_ctl_el0", CNTV_CTL_EL0}, - {"cntv_cval_el0", CNTV_CVAL_EL0}, - {"pmevcntr0_el0", PMEVCNTR0_EL0}, - {"pmevcntr1_el0", PMEVCNTR1_EL0}, - {"pmevcntr2_el0", PMEVCNTR2_EL0}, - {"pmevcntr3_el0", PMEVCNTR3_EL0}, - {"pmevcntr4_el0", PMEVCNTR4_EL0}, - {"pmevcntr5_el0", PMEVCNTR5_EL0}, - {"pmevcntr6_el0", PMEVCNTR6_EL0}, - {"pmevcntr7_el0", PMEVCNTR7_EL0}, - {"pmevcntr8_el0", PMEVCNTR8_EL0}, - {"pmevcntr9_el0", PMEVCNTR9_EL0}, - {"pmevcntr10_el0", PMEVCNTR10_EL0}, - {"pmevcntr11_el0", PMEVCNTR11_EL0}, - {"pmevcntr12_el0", PMEVCNTR12_EL0}, - {"pmevcntr13_el0", PMEVCNTR13_EL0}, - {"pmevcntr14_el0", PMEVCNTR14_EL0}, - {"pmevcntr15_el0", PMEVCNTR15_EL0}, - {"pmevcntr16_el0", PMEVCNTR16_EL0}, - {"pmevcntr17_el0", PMEVCNTR17_EL0}, - {"pmevcntr18_el0", PMEVCNTR18_EL0}, - {"pmevcntr19_el0", PMEVCNTR19_EL0}, - {"pmevcntr20_el0", PMEVCNTR20_EL0}, - {"pmevcntr21_el0", PMEVCNTR21_EL0}, - {"pmevcntr22_el0", PMEVCNTR22_EL0}, - {"pmevcntr23_el0", PMEVCNTR23_EL0}, - {"pmevcntr24_el0", PMEVCNTR24_EL0}, - {"pmevcntr25_el0", PMEVCNTR25_EL0}, - {"pmevcntr26_el0", PMEVCNTR26_EL0}, - {"pmevcntr27_el0", PMEVCNTR27_EL0}, - {"pmevcntr28_el0", PMEVCNTR28_EL0}, - {"pmevcntr29_el0", PMEVCNTR29_EL0}, - {"pmevcntr30_el0", PMEVCNTR30_EL0}, - {"pmccfiltr_el0", PMCCFILTR_EL0}, - {"pmevtyper0_el0", PMEVTYPER0_EL0}, - {"pmevtyper1_el0", PMEVTYPER1_EL0}, - {"pmevtyper2_el0", PMEVTYPER2_EL0}, - {"pmevtyper3_el0", PMEVTYPER3_EL0}, - {"pmevtyper4_el0", PMEVTYPER4_EL0}, - {"pmevtyper5_el0", PMEVTYPER5_EL0}, - {"pmevtyper6_el0", PMEVTYPER6_EL0}, - {"pmevtyper7_el0", PMEVTYPER7_EL0}, - {"pmevtyper8_el0", PMEVTYPER8_EL0}, - {"pmevtyper9_el0", PMEVTYPER9_EL0}, - {"pmevtyper10_el0", PMEVTYPER10_EL0}, - {"pmevtyper11_el0", PMEVTYPER11_EL0}, - {"pmevtyper12_el0", PMEVTYPER12_EL0}, - {"pmevtyper13_el0", PMEVTYPER13_EL0}, - {"pmevtyper14_el0", PMEVTYPER14_EL0}, - {"pmevtyper15_el0", PMEVTYPER15_EL0}, - {"pmevtyper16_el0", PMEVTYPER16_EL0}, - {"pmevtyper17_el0", PMEVTYPER17_EL0}, - {"pmevtyper18_el0", PMEVTYPER18_EL0}, - {"pmevtyper19_el0", PMEVTYPER19_EL0}, - {"pmevtyper20_el0", PMEVTYPER20_EL0}, - {"pmevtyper21_el0", PMEVTYPER21_EL0}, - {"pmevtyper22_el0", PMEVTYPER22_EL0}, - {"pmevtyper23_el0", PMEVTYPER23_EL0}, - {"pmevtyper24_el0", PMEVTYPER24_EL0}, - {"pmevtyper25_el0", PMEVTYPER25_EL0}, - {"pmevtyper26_el0", PMEVTYPER26_EL0}, - {"pmevtyper27_el0", PMEVTYPER27_EL0}, - {"pmevtyper28_el0", PMEVTYPER28_EL0}, - {"pmevtyper29_el0", PMEVTYPER29_EL0}, - {"pmevtyper30_el0", PMEVTYPER30_EL0}, +const AArch64NamedImmMapper::Mapping AArch64SysReg::SysRegMapper::SysRegMappings[] = { + {"osdtrrx_el1", OSDTRRX_EL1, {}}, + {"osdtrtx_el1", OSDTRTX_EL1, {}}, + {"teecr32_el1", TEECR32_EL1, {}}, + {"mdccint_el1", MDCCINT_EL1, {}}, + {"mdscr_el1", MDSCR_EL1, {}}, + {"dbgdtr_el0", DBGDTR_EL0, {}}, + {"oseccr_el1", OSECCR_EL1, {}}, + {"dbgvcr32_el2", DBGVCR32_EL2, {}}, + {"dbgbvr0_el1", DBGBVR0_EL1, {}}, + {"dbgbvr1_el1", DBGBVR1_EL1, {}}, + {"dbgbvr2_el1", DBGBVR2_EL1, {}}, + {"dbgbvr3_el1", DBGBVR3_EL1, {}}, + {"dbgbvr4_el1", DBGBVR4_EL1, {}}, + {"dbgbvr5_el1", DBGBVR5_EL1, {}}, + {"dbgbvr6_el1", DBGBVR6_EL1, {}}, + {"dbgbvr7_el1", DBGBVR7_EL1, {}}, + {"dbgbvr8_el1", DBGBVR8_EL1, {}}, + {"dbgbvr9_el1", DBGBVR9_EL1, {}}, + {"dbgbvr10_el1", DBGBVR10_EL1, {}}, + {"dbgbvr11_el1", DBGBVR11_EL1, {}}, + {"dbgbvr12_el1", DBGBVR12_EL1, {}}, + {"dbgbvr13_el1", DBGBVR13_EL1, {}}, + {"dbgbvr14_el1", DBGBVR14_EL1, {}}, + {"dbgbvr15_el1", DBGBVR15_EL1, {}}, + {"dbgbcr0_el1", DBGBCR0_EL1, {}}, + {"dbgbcr1_el1", DBGBCR1_EL1, {}}, + {"dbgbcr2_el1", DBGBCR2_EL1, {}}, + {"dbgbcr3_el1", DBGBCR3_EL1, {}}, + {"dbgbcr4_el1", DBGBCR4_EL1, {}}, + {"dbgbcr5_el1", DBGBCR5_EL1, {}}, + {"dbgbcr6_el1", DBGBCR6_EL1, {}}, + {"dbgbcr7_el1", DBGBCR7_EL1, {}}, + {"dbgbcr8_el1", DBGBCR8_EL1, {}}, + {"dbgbcr9_el1", DBGBCR9_EL1, {}}, + {"dbgbcr10_el1", DBGBCR10_EL1, {}}, + {"dbgbcr11_el1", DBGBCR11_EL1, {}}, + {"dbgbcr12_el1", DBGBCR12_EL1, {}}, + {"dbgbcr13_el1", DBGBCR13_EL1, {}}, + {"dbgbcr14_el1", DBGBCR14_EL1, {}}, + {"dbgbcr15_el1", DBGBCR15_EL1, {}}, + {"dbgwvr0_el1", DBGWVR0_EL1, {}}, + {"dbgwvr1_el1", DBGWVR1_EL1, {}}, + {"dbgwvr2_el1", DBGWVR2_EL1, {}}, + {"dbgwvr3_el1", DBGWVR3_EL1, {}}, + {"dbgwvr4_el1", DBGWVR4_EL1, {}}, + {"dbgwvr5_el1", DBGWVR5_EL1, {}}, + {"dbgwvr6_el1", DBGWVR6_EL1, {}}, + {"dbgwvr7_el1", DBGWVR7_EL1, {}}, + {"dbgwvr8_el1", DBGWVR8_EL1, {}}, + {"dbgwvr9_el1", DBGWVR9_EL1, {}}, + {"dbgwvr10_el1", DBGWVR10_EL1, {}}, + {"dbgwvr11_el1", DBGWVR11_EL1, {}}, + {"dbgwvr12_el1", DBGWVR12_EL1, {}}, + {"dbgwvr13_el1", DBGWVR13_EL1, {}}, + {"dbgwvr14_el1", DBGWVR14_EL1, {}}, + {"dbgwvr15_el1", DBGWVR15_EL1, {}}, + {"dbgwcr0_el1", DBGWCR0_EL1, {}}, + {"dbgwcr1_el1", DBGWCR1_EL1, {}}, + {"dbgwcr2_el1", DBGWCR2_EL1, {}}, + {"dbgwcr3_el1", DBGWCR3_EL1, {}}, + {"dbgwcr4_el1", DBGWCR4_EL1, {}}, + {"dbgwcr5_el1", DBGWCR5_EL1, {}}, + {"dbgwcr6_el1", DBGWCR6_EL1, {}}, + {"dbgwcr7_el1", DBGWCR7_EL1, {}}, + {"dbgwcr8_el1", DBGWCR8_EL1, {}}, + {"dbgwcr9_el1", DBGWCR9_EL1, {}}, + {"dbgwcr10_el1", DBGWCR10_EL1, {}}, + {"dbgwcr11_el1", DBGWCR11_EL1, {}}, + {"dbgwcr12_el1", DBGWCR12_EL1, {}}, + {"dbgwcr13_el1", DBGWCR13_EL1, {}}, + {"dbgwcr14_el1", DBGWCR14_EL1, {}}, + {"dbgwcr15_el1", DBGWCR15_EL1, {}}, + {"teehbr32_el1", TEEHBR32_EL1, {}}, + {"osdlr_el1", OSDLR_EL1, {}}, + {"dbgprcr_el1", DBGPRCR_EL1, {}}, + {"dbgclaimset_el1", DBGCLAIMSET_EL1, {}}, + {"dbgclaimclr_el1", DBGCLAIMCLR_EL1, {}}, + {"csselr_el1", CSSELR_EL1, {}}, + {"vpidr_el2", VPIDR_EL2, {}}, + {"vmpidr_el2", VMPIDR_EL2, {}}, + {"sctlr_el1", SCTLR_EL1, {}}, + {"sctlr_el2", SCTLR_EL2, {}}, + {"sctlr_el3", SCTLR_EL3, {}}, + {"actlr_el1", ACTLR_EL1, {}}, + {"actlr_el2", ACTLR_EL2, {}}, + {"actlr_el3", ACTLR_EL3, {}}, + {"cpacr_el1", CPACR_EL1, {}}, + {"hcr_el2", HCR_EL2, {}}, + {"scr_el3", SCR_EL3, {}}, + {"mdcr_el2", MDCR_EL2, {}}, + {"sder32_el3", SDER32_EL3, {}}, + {"cptr_el2", CPTR_EL2, {}}, + {"cptr_el3", CPTR_EL3, {}}, + {"hstr_el2", HSTR_EL2, {}}, + {"hacr_el2", HACR_EL2, {}}, + {"mdcr_el3", MDCR_EL3, {}}, + {"ttbr0_el1", TTBR0_EL1, {}}, + {"ttbr0_el2", TTBR0_EL2, {}}, + {"ttbr0_el3", TTBR0_EL3, {}}, + {"ttbr1_el1", TTBR1_EL1, {}}, + {"tcr_el1", TCR_EL1, {}}, + {"tcr_el2", TCR_EL2, {}}, + {"tcr_el3", TCR_EL3, {}}, + {"vttbr_el2", VTTBR_EL2, {}}, + {"vtcr_el2", VTCR_EL2, {}}, + {"dacr32_el2", DACR32_EL2, {}}, + {"spsr_el1", SPSR_EL1, {}}, + {"spsr_el2", SPSR_EL2, {}}, + {"spsr_el3", SPSR_EL3, {}}, + {"elr_el1", ELR_EL1, {}}, + {"elr_el2", ELR_EL2, {}}, + {"elr_el3", ELR_EL3, {}}, + {"sp_el0", SP_EL0, {}}, + {"sp_el1", SP_EL1, {}}, + {"sp_el2", SP_EL2, {}}, + {"spsel", SPSel, {}}, + {"nzcv", NZCV, {}}, + {"daif", DAIF, {}}, + {"currentel", CurrentEL, {}}, + {"spsr_irq", SPSR_irq, {}}, + {"spsr_abt", SPSR_abt, {}}, + {"spsr_und", SPSR_und, {}}, + {"spsr_fiq", SPSR_fiq, {}}, + {"fpcr", FPCR, {}}, + {"fpsr", FPSR, {}}, + {"dspsr_el0", DSPSR_EL0, {}}, + {"dlr_el0", DLR_EL0, {}}, + {"ifsr32_el2", IFSR32_EL2, {}}, + {"afsr0_el1", AFSR0_EL1, {}}, + {"afsr0_el2", AFSR0_EL2, {}}, + {"afsr0_el3", AFSR0_EL3, {}}, + {"afsr1_el1", AFSR1_EL1, {}}, + {"afsr1_el2", AFSR1_EL2, {}}, + {"afsr1_el3", AFSR1_EL3, {}}, + {"esr_el1", ESR_EL1, {}}, + {"esr_el2", ESR_EL2, {}}, + {"esr_el3", ESR_EL3, {}}, + {"fpexc32_el2", FPEXC32_EL2, {}}, + {"far_el1", FAR_EL1, {}}, + {"far_el2", FAR_EL2, {}}, + {"far_el3", FAR_EL3, {}}, + {"hpfar_el2", HPFAR_EL2, {}}, + {"par_el1", PAR_EL1, {}}, + {"pmcr_el0", PMCR_EL0, {}}, + {"pmcntenset_el0", PMCNTENSET_EL0, {}}, + {"pmcntenclr_el0", PMCNTENCLR_EL0, {}}, + {"pmovsclr_el0", PMOVSCLR_EL0, {}}, + {"pmselr_el0", PMSELR_EL0, {}}, + {"pmccntr_el0", PMCCNTR_EL0, {}}, + {"pmxevtyper_el0", PMXEVTYPER_EL0, {}}, + {"pmxevcntr_el0", PMXEVCNTR_EL0, {}}, + {"pmuserenr_el0", PMUSERENR_EL0, {}}, + {"pmintenset_el1", PMINTENSET_EL1, {}}, + {"pmintenclr_el1", PMINTENCLR_EL1, {}}, + {"pmovsset_el0", PMOVSSET_EL0, {}}, + {"mair_el1", MAIR_EL1, {}}, + {"mair_el2", MAIR_EL2, {}}, + {"mair_el3", MAIR_EL3, {}}, + {"amair_el1", AMAIR_EL1, {}}, + {"amair_el2", AMAIR_EL2, {}}, + {"amair_el3", AMAIR_EL3, {}}, + {"vbar_el1", VBAR_EL1, {}}, + {"vbar_el2", VBAR_EL2, {}}, + {"vbar_el3", VBAR_EL3, {}}, + {"rmr_el1", RMR_EL1, {}}, + {"rmr_el2", RMR_EL2, {}}, + {"rmr_el3", RMR_EL3, {}}, + {"contextidr_el1", CONTEXTIDR_EL1, {}}, + {"tpidr_el0", TPIDR_EL0, {}}, + {"tpidr_el2", TPIDR_EL2, {}}, + {"tpidr_el3", TPIDR_EL3, {}}, + {"tpidrro_el0", TPIDRRO_EL0, {}}, + {"tpidr_el1", TPIDR_EL1, {}}, + {"cntfrq_el0", CNTFRQ_EL0, {}}, + {"cntvoff_el2", CNTVOFF_EL2, {}}, + {"cntkctl_el1", CNTKCTL_EL1, {}}, + {"cnthctl_el2", CNTHCTL_EL2, {}}, + {"cntp_tval_el0", CNTP_TVAL_EL0, {}}, + {"cnthp_tval_el2", CNTHP_TVAL_EL2, {}}, + {"cntps_tval_el1", CNTPS_TVAL_EL1, {}}, + {"cntp_ctl_el0", CNTP_CTL_EL0, {}}, + {"cnthp_ctl_el2", CNTHP_CTL_EL2, {}}, + {"cntps_ctl_el1", CNTPS_CTL_EL1, {}}, + {"cntp_cval_el0", CNTP_CVAL_EL0, {}}, + {"cnthp_cval_el2", CNTHP_CVAL_EL2, {}}, + {"cntps_cval_el1", CNTPS_CVAL_EL1, {}}, + {"cntv_tval_el0", CNTV_TVAL_EL0, {}}, + {"cntv_ctl_el0", CNTV_CTL_EL0, {}}, + {"cntv_cval_el0", CNTV_CVAL_EL0, {}}, + {"pmevcntr0_el0", PMEVCNTR0_EL0, {}}, + {"pmevcntr1_el0", PMEVCNTR1_EL0, {}}, + {"pmevcntr2_el0", PMEVCNTR2_EL0, {}}, + {"pmevcntr3_el0", PMEVCNTR3_EL0, {}}, + {"pmevcntr4_el0", PMEVCNTR4_EL0, {}}, + {"pmevcntr5_el0", PMEVCNTR5_EL0, {}}, + {"pmevcntr6_el0", PMEVCNTR6_EL0, {}}, + {"pmevcntr7_el0", PMEVCNTR7_EL0, {}}, + {"pmevcntr8_el0", PMEVCNTR8_EL0, {}}, + {"pmevcntr9_el0", PMEVCNTR9_EL0, {}}, + {"pmevcntr10_el0", PMEVCNTR10_EL0, {}}, + {"pmevcntr11_el0", PMEVCNTR11_EL0, {}}, + {"pmevcntr12_el0", PMEVCNTR12_EL0, {}}, + {"pmevcntr13_el0", PMEVCNTR13_EL0, {}}, + {"pmevcntr14_el0", PMEVCNTR14_EL0, {}}, + {"pmevcntr15_el0", PMEVCNTR15_EL0, {}}, + {"pmevcntr16_el0", PMEVCNTR16_EL0, {}}, + {"pmevcntr17_el0", PMEVCNTR17_EL0, {}}, + {"pmevcntr18_el0", PMEVCNTR18_EL0, {}}, + {"pmevcntr19_el0", PMEVCNTR19_EL0, {}}, + {"pmevcntr20_el0", PMEVCNTR20_EL0, {}}, + {"pmevcntr21_el0", PMEVCNTR21_EL0, {}}, + {"pmevcntr22_el0", PMEVCNTR22_EL0, {}}, + {"pmevcntr23_el0", PMEVCNTR23_EL0, {}}, + {"pmevcntr24_el0", PMEVCNTR24_EL0, {}}, + {"pmevcntr25_el0", PMEVCNTR25_EL0, {}}, + {"pmevcntr26_el0", PMEVCNTR26_EL0, {}}, + {"pmevcntr27_el0", PMEVCNTR27_EL0, {}}, + {"pmevcntr28_el0", PMEVCNTR28_EL0, {}}, + {"pmevcntr29_el0", PMEVCNTR29_EL0, {}}, + {"pmevcntr30_el0", PMEVCNTR30_EL0, {}}, + {"pmccfiltr_el0", PMCCFILTR_EL0, {}}, + {"pmevtyper0_el0", PMEVTYPER0_EL0, {}}, + {"pmevtyper1_el0", PMEVTYPER1_EL0, {}}, + {"pmevtyper2_el0", PMEVTYPER2_EL0, {}}, + {"pmevtyper3_el0", PMEVTYPER3_EL0, {}}, + {"pmevtyper4_el0", PMEVTYPER4_EL0, {}}, + {"pmevtyper5_el0", PMEVTYPER5_EL0, {}}, + {"pmevtyper6_el0", PMEVTYPER6_EL0, {}}, + {"pmevtyper7_el0", PMEVTYPER7_EL0, {}}, + {"pmevtyper8_el0", PMEVTYPER8_EL0, {}}, + {"pmevtyper9_el0", PMEVTYPER9_EL0, {}}, + {"pmevtyper10_el0", PMEVTYPER10_EL0, {}}, + {"pmevtyper11_el0", PMEVTYPER11_EL0, {}}, + {"pmevtyper12_el0", PMEVTYPER12_EL0, {}}, + {"pmevtyper13_el0", PMEVTYPER13_EL0, {}}, + {"pmevtyper14_el0", PMEVTYPER14_EL0, {}}, + {"pmevtyper15_el0", PMEVTYPER15_EL0, {}}, + {"pmevtyper16_el0", PMEVTYPER16_EL0, {}}, + {"pmevtyper17_el0", PMEVTYPER17_EL0, {}}, + {"pmevtyper18_el0", PMEVTYPER18_EL0, {}}, + {"pmevtyper19_el0", PMEVTYPER19_EL0, {}}, + {"pmevtyper20_el0", PMEVTYPER20_EL0, {}}, + {"pmevtyper21_el0", PMEVTYPER21_EL0, {}}, + {"pmevtyper22_el0", PMEVTYPER22_EL0, {}}, + {"pmevtyper23_el0", PMEVTYPER23_EL0, {}}, + {"pmevtyper24_el0", PMEVTYPER24_EL0, {}}, + {"pmevtyper25_el0", PMEVTYPER25_EL0, {}}, + {"pmevtyper26_el0", PMEVTYPER26_EL0, {}}, + {"pmevtyper27_el0", PMEVTYPER27_EL0, {}}, + {"pmevtyper28_el0", PMEVTYPER28_EL0, {}}, + {"pmevtyper29_el0", PMEVTYPER29_EL0, {}}, + {"pmevtyper30_el0", PMEVTYPER30_EL0, {}}, // Trace registers - {"trcprgctlr", TRCPRGCTLR}, - {"trcprocselr", TRCPROCSELR}, - {"trcconfigr", TRCCONFIGR}, - {"trcauxctlr", TRCAUXCTLR}, - {"trceventctl0r", TRCEVENTCTL0R}, - {"trceventctl1r", TRCEVENTCTL1R}, - {"trcstallctlr", TRCSTALLCTLR}, - {"trctsctlr", TRCTSCTLR}, - {"trcsyncpr", TRCSYNCPR}, - {"trcccctlr", TRCCCCTLR}, - {"trcbbctlr", TRCBBCTLR}, - {"trctraceidr", TRCTRACEIDR}, - {"trcqctlr", TRCQCTLR}, - {"trcvictlr", TRCVICTLR}, - {"trcviiectlr", TRCVIIECTLR}, - {"trcvissctlr", TRCVISSCTLR}, - {"trcvipcssctlr", TRCVIPCSSCTLR}, - {"trcvdctlr", TRCVDCTLR}, - {"trcvdsacctlr", TRCVDSACCTLR}, - {"trcvdarcctlr", TRCVDARCCTLR}, - {"trcseqevr0", TRCSEQEVR0}, - {"trcseqevr1", TRCSEQEVR1}, - {"trcseqevr2", TRCSEQEVR2}, - {"trcseqrstevr", TRCSEQRSTEVR}, - {"trcseqstr", TRCSEQSTR}, - {"trcextinselr", TRCEXTINSELR}, - {"trccntrldvr0", TRCCNTRLDVR0}, - {"trccntrldvr1", TRCCNTRLDVR1}, - {"trccntrldvr2", TRCCNTRLDVR2}, - {"trccntrldvr3", TRCCNTRLDVR3}, - {"trccntctlr0", TRCCNTCTLR0}, - {"trccntctlr1", TRCCNTCTLR1}, - {"trccntctlr2", TRCCNTCTLR2}, - {"trccntctlr3", TRCCNTCTLR3}, - {"trccntvr0", TRCCNTVR0}, - {"trccntvr1", TRCCNTVR1}, - {"trccntvr2", TRCCNTVR2}, - {"trccntvr3", TRCCNTVR3}, - {"trcimspec0", TRCIMSPEC0}, - {"trcimspec1", TRCIMSPEC1}, - {"trcimspec2", TRCIMSPEC2}, - {"trcimspec3", TRCIMSPEC3}, - {"trcimspec4", TRCIMSPEC4}, - {"trcimspec5", TRCIMSPEC5}, - {"trcimspec6", TRCIMSPEC6}, - {"trcimspec7", TRCIMSPEC7}, - {"trcrsctlr2", TRCRSCTLR2}, - {"trcrsctlr3", TRCRSCTLR3}, - {"trcrsctlr4", TRCRSCTLR4}, - {"trcrsctlr5", TRCRSCTLR5}, - {"trcrsctlr6", TRCRSCTLR6}, - {"trcrsctlr7", TRCRSCTLR7}, - {"trcrsctlr8", TRCRSCTLR8}, - {"trcrsctlr9", TRCRSCTLR9}, - {"trcrsctlr10", TRCRSCTLR10}, - {"trcrsctlr11", TRCRSCTLR11}, - {"trcrsctlr12", TRCRSCTLR12}, - {"trcrsctlr13", TRCRSCTLR13}, - {"trcrsctlr14", TRCRSCTLR14}, - {"trcrsctlr15", TRCRSCTLR15}, - {"trcrsctlr16", TRCRSCTLR16}, - {"trcrsctlr17", TRCRSCTLR17}, - {"trcrsctlr18", TRCRSCTLR18}, - {"trcrsctlr19", TRCRSCTLR19}, - {"trcrsctlr20", TRCRSCTLR20}, - {"trcrsctlr21", TRCRSCTLR21}, - {"trcrsctlr22", TRCRSCTLR22}, - {"trcrsctlr23", TRCRSCTLR23}, - {"trcrsctlr24", TRCRSCTLR24}, - {"trcrsctlr25", TRCRSCTLR25}, - {"trcrsctlr26", TRCRSCTLR26}, - {"trcrsctlr27", TRCRSCTLR27}, - {"trcrsctlr28", TRCRSCTLR28}, - {"trcrsctlr29", TRCRSCTLR29}, - {"trcrsctlr30", TRCRSCTLR30}, - {"trcrsctlr31", TRCRSCTLR31}, - {"trcssccr0", TRCSSCCR0}, - {"trcssccr1", TRCSSCCR1}, - {"trcssccr2", TRCSSCCR2}, - {"trcssccr3", TRCSSCCR3}, - {"trcssccr4", TRCSSCCR4}, - {"trcssccr5", TRCSSCCR5}, - {"trcssccr6", TRCSSCCR6}, - {"trcssccr7", TRCSSCCR7}, - {"trcsscsr0", TRCSSCSR0}, - {"trcsscsr1", TRCSSCSR1}, - {"trcsscsr2", TRCSSCSR2}, - {"trcsscsr3", TRCSSCSR3}, - {"trcsscsr4", TRCSSCSR4}, - {"trcsscsr5", TRCSSCSR5}, - {"trcsscsr6", TRCSSCSR6}, - {"trcsscsr7", TRCSSCSR7}, - {"trcsspcicr0", TRCSSPCICR0}, - {"trcsspcicr1", TRCSSPCICR1}, - {"trcsspcicr2", TRCSSPCICR2}, - {"trcsspcicr3", TRCSSPCICR3}, - {"trcsspcicr4", TRCSSPCICR4}, - {"trcsspcicr5", TRCSSPCICR5}, - {"trcsspcicr6", TRCSSPCICR6}, - {"trcsspcicr7", TRCSSPCICR7}, - {"trcpdcr", TRCPDCR}, - {"trcacvr0", TRCACVR0}, - {"trcacvr1", TRCACVR1}, - {"trcacvr2", TRCACVR2}, - {"trcacvr3", TRCACVR3}, - {"trcacvr4", TRCACVR4}, - {"trcacvr5", TRCACVR5}, - {"trcacvr6", TRCACVR6}, - {"trcacvr7", TRCACVR7}, - {"trcacvr8", TRCACVR8}, - {"trcacvr9", TRCACVR9}, - {"trcacvr10", TRCACVR10}, - {"trcacvr11", TRCACVR11}, - {"trcacvr12", TRCACVR12}, - {"trcacvr13", TRCACVR13}, - {"trcacvr14", TRCACVR14}, - {"trcacvr15", TRCACVR15}, - {"trcacatr0", TRCACATR0}, - {"trcacatr1", TRCACATR1}, - {"trcacatr2", TRCACATR2}, - {"trcacatr3", TRCACATR3}, - {"trcacatr4", TRCACATR4}, - {"trcacatr5", TRCACATR5}, - {"trcacatr6", TRCACATR6}, - {"trcacatr7", TRCACATR7}, - {"trcacatr8", TRCACATR8}, - {"trcacatr9", TRCACATR9}, - {"trcacatr10", TRCACATR10}, - {"trcacatr11", TRCACATR11}, - {"trcacatr12", TRCACATR12}, - {"trcacatr13", TRCACATR13}, - {"trcacatr14", TRCACATR14}, - {"trcacatr15", TRCACATR15}, - {"trcdvcvr0", TRCDVCVR0}, - {"trcdvcvr1", TRCDVCVR1}, - {"trcdvcvr2", TRCDVCVR2}, - {"trcdvcvr3", TRCDVCVR3}, - {"trcdvcvr4", TRCDVCVR4}, - {"trcdvcvr5", TRCDVCVR5}, - {"trcdvcvr6", TRCDVCVR6}, - {"trcdvcvr7", TRCDVCVR7}, - {"trcdvcmr0", TRCDVCMR0}, - {"trcdvcmr1", TRCDVCMR1}, - {"trcdvcmr2", TRCDVCMR2}, - {"trcdvcmr3", TRCDVCMR3}, - {"trcdvcmr4", TRCDVCMR4}, - {"trcdvcmr5", TRCDVCMR5}, - {"trcdvcmr6", TRCDVCMR6}, - {"trcdvcmr7", TRCDVCMR7}, - {"trccidcvr0", TRCCIDCVR0}, - {"trccidcvr1", TRCCIDCVR1}, - {"trccidcvr2", TRCCIDCVR2}, - {"trccidcvr3", TRCCIDCVR3}, - {"trccidcvr4", TRCCIDCVR4}, - {"trccidcvr5", TRCCIDCVR5}, - {"trccidcvr6", TRCCIDCVR6}, - {"trccidcvr7", TRCCIDCVR7}, - {"trcvmidcvr0", TRCVMIDCVR0}, - {"trcvmidcvr1", TRCVMIDCVR1}, - {"trcvmidcvr2", TRCVMIDCVR2}, - {"trcvmidcvr3", TRCVMIDCVR3}, - {"trcvmidcvr4", TRCVMIDCVR4}, - {"trcvmidcvr5", TRCVMIDCVR5}, - {"trcvmidcvr6", TRCVMIDCVR6}, - {"trcvmidcvr7", TRCVMIDCVR7}, - {"trccidcctlr0", TRCCIDCCTLR0}, - {"trccidcctlr1", TRCCIDCCTLR1}, - {"trcvmidcctlr0", TRCVMIDCCTLR0}, - {"trcvmidcctlr1", TRCVMIDCCTLR1}, - {"trcitctrl", TRCITCTRL}, - {"trcclaimset", TRCCLAIMSET}, - {"trcclaimclr", TRCCLAIMCLR}, + {"trcprgctlr", TRCPRGCTLR, {}}, + {"trcprocselr", TRCPROCSELR, {}}, + {"trcconfigr", TRCCONFIGR, {}}, + {"trcauxctlr", TRCAUXCTLR, {}}, + {"trceventctl0r", TRCEVENTCTL0R, {}}, + {"trceventctl1r", TRCEVENTCTL1R, {}}, + {"trcstallctlr", TRCSTALLCTLR, {}}, + {"trctsctlr", TRCTSCTLR, {}}, + {"trcsyncpr", TRCSYNCPR, {}}, + {"trcccctlr", TRCCCCTLR, {}}, + {"trcbbctlr", TRCBBCTLR, {}}, + {"trctraceidr", TRCTRACEIDR, {}}, + {"trcqctlr", TRCQCTLR, {}}, + {"trcvictlr", TRCVICTLR, {}}, + {"trcviiectlr", TRCVIIECTLR, {}}, + {"trcvissctlr", TRCVISSCTLR, {}}, + {"trcvipcssctlr", TRCVIPCSSCTLR, {}}, + {"trcvdctlr", TRCVDCTLR, {}}, + {"trcvdsacctlr", TRCVDSACCTLR, {}}, + {"trcvdarcctlr", TRCVDARCCTLR, {}}, + {"trcseqevr0", TRCSEQEVR0, {}}, + {"trcseqevr1", TRCSEQEVR1, {}}, + {"trcseqevr2", TRCSEQEVR2, {}}, + {"trcseqrstevr", TRCSEQRSTEVR, {}}, + {"trcseqstr", TRCSEQSTR, {}}, + {"trcextinselr", TRCEXTINSELR, {}}, + {"trccntrldvr0", TRCCNTRLDVR0, {}}, + {"trccntrldvr1", TRCCNTRLDVR1, {}}, + {"trccntrldvr2", TRCCNTRLDVR2, {}}, + {"trccntrldvr3", TRCCNTRLDVR3, {}}, + {"trccntctlr0", TRCCNTCTLR0, {}}, + {"trccntctlr1", TRCCNTCTLR1, {}}, + {"trccntctlr2", TRCCNTCTLR2, {}}, + {"trccntctlr3", TRCCNTCTLR3, {}}, + {"trccntvr0", TRCCNTVR0, {}}, + {"trccntvr1", TRCCNTVR1, {}}, + {"trccntvr2", TRCCNTVR2, {}}, + {"trccntvr3", TRCCNTVR3, {}}, + {"trcimspec0", TRCIMSPEC0, {}}, + {"trcimspec1", TRCIMSPEC1, {}}, + {"trcimspec2", TRCIMSPEC2, {}}, + {"trcimspec3", TRCIMSPEC3, {}}, + {"trcimspec4", TRCIMSPEC4, {}}, + {"trcimspec5", TRCIMSPEC5, {}}, + {"trcimspec6", TRCIMSPEC6, {}}, + {"trcimspec7", TRCIMSPEC7, {}}, + {"trcrsctlr2", TRCRSCTLR2, {}}, + {"trcrsctlr3", TRCRSCTLR3, {}}, + {"trcrsctlr4", TRCRSCTLR4, {}}, + {"trcrsctlr5", TRCRSCTLR5, {}}, + {"trcrsctlr6", TRCRSCTLR6, {}}, + {"trcrsctlr7", TRCRSCTLR7, {}}, + {"trcrsctlr8", TRCRSCTLR8, {}}, + {"trcrsctlr9", TRCRSCTLR9, {}}, + {"trcrsctlr10", TRCRSCTLR10, {}}, + {"trcrsctlr11", TRCRSCTLR11, {}}, + {"trcrsctlr12", TRCRSCTLR12, {}}, + {"trcrsctlr13", TRCRSCTLR13, {}}, + {"trcrsctlr14", TRCRSCTLR14, {}}, + {"trcrsctlr15", TRCRSCTLR15, {}}, + {"trcrsctlr16", TRCRSCTLR16, {}}, + {"trcrsctlr17", TRCRSCTLR17, {}}, + {"trcrsctlr18", TRCRSCTLR18, {}}, + {"trcrsctlr19", TRCRSCTLR19, {}}, + {"trcrsctlr20", TRCRSCTLR20, {}}, + {"trcrsctlr21", TRCRSCTLR21, {}}, + {"trcrsctlr22", TRCRSCTLR22, {}}, + {"trcrsctlr23", TRCRSCTLR23, {}}, + {"trcrsctlr24", TRCRSCTLR24, {}}, + {"trcrsctlr25", TRCRSCTLR25, {}}, + {"trcrsctlr26", TRCRSCTLR26, {}}, + {"trcrsctlr27", TRCRSCTLR27, {}}, + {"trcrsctlr28", TRCRSCTLR28, {}}, + {"trcrsctlr29", TRCRSCTLR29, {}}, + {"trcrsctlr30", TRCRSCTLR30, {}}, + {"trcrsctlr31", TRCRSCTLR31, {}}, + {"trcssccr0", TRCSSCCR0, {}}, + {"trcssccr1", TRCSSCCR1, {}}, + {"trcssccr2", TRCSSCCR2, {}}, + {"trcssccr3", TRCSSCCR3, {}}, + {"trcssccr4", TRCSSCCR4, {}}, + {"trcssccr5", TRCSSCCR5, {}}, + {"trcssccr6", TRCSSCCR6, {}}, + {"trcssccr7", TRCSSCCR7, {}}, + {"trcsscsr0", TRCSSCSR0, {}}, + {"trcsscsr1", TRCSSCSR1, {}}, + {"trcsscsr2", TRCSSCSR2, {}}, + {"trcsscsr3", TRCSSCSR3, {}}, + {"trcsscsr4", TRCSSCSR4, {}}, + {"trcsscsr5", TRCSSCSR5, {}}, + {"trcsscsr6", TRCSSCSR6, {}}, + {"trcsscsr7", TRCSSCSR7, {}}, + {"trcsspcicr0", TRCSSPCICR0, {}}, + {"trcsspcicr1", TRCSSPCICR1, {}}, + {"trcsspcicr2", TRCSSPCICR2, {}}, + {"trcsspcicr3", TRCSSPCICR3, {}}, + {"trcsspcicr4", TRCSSPCICR4, {}}, + {"trcsspcicr5", TRCSSPCICR5, {}}, + {"trcsspcicr6", TRCSSPCICR6, {}}, + {"trcsspcicr7", TRCSSPCICR7, {}}, + {"trcpdcr", TRCPDCR, {}}, + {"trcacvr0", TRCACVR0, {}}, + {"trcacvr1", TRCACVR1, {}}, + {"trcacvr2", TRCACVR2, {}}, + {"trcacvr3", TRCACVR3, {}}, + {"trcacvr4", TRCACVR4, {}}, + {"trcacvr5", TRCACVR5, {}}, + {"trcacvr6", TRCACVR6, {}}, + {"trcacvr7", TRCACVR7, {}}, + {"trcacvr8", TRCACVR8, {}}, + {"trcacvr9", TRCACVR9, {}}, + {"trcacvr10", TRCACVR10, {}}, + {"trcacvr11", TRCACVR11, {}}, + {"trcacvr12", TRCACVR12, {}}, + {"trcacvr13", TRCACVR13, {}}, + {"trcacvr14", TRCACVR14, {}}, + {"trcacvr15", TRCACVR15, {}}, + {"trcacatr0", TRCACATR0, {}}, + {"trcacatr1", TRCACATR1, {}}, + {"trcacatr2", TRCACATR2, {}}, + {"trcacatr3", TRCACATR3, {}}, + {"trcacatr4", TRCACATR4, {}}, + {"trcacatr5", TRCACATR5, {}}, + {"trcacatr6", TRCACATR6, {}}, + {"trcacatr7", TRCACATR7, {}}, + {"trcacatr8", TRCACATR8, {}}, + {"trcacatr9", TRCACATR9, {}}, + {"trcacatr10", TRCACATR10, {}}, + {"trcacatr11", TRCACATR11, {}}, + {"trcacatr12", TRCACATR12, {}}, + {"trcacatr13", TRCACATR13, {}}, + {"trcacatr14", TRCACATR14, {}}, + {"trcacatr15", TRCACATR15, {}}, + {"trcdvcvr0", TRCDVCVR0, {}}, + {"trcdvcvr1", TRCDVCVR1, {}}, + {"trcdvcvr2", TRCDVCVR2, {}}, + {"trcdvcvr3", TRCDVCVR3, {}}, + {"trcdvcvr4", TRCDVCVR4, {}}, + {"trcdvcvr5", TRCDVCVR5, {}}, + {"trcdvcvr6", TRCDVCVR6, {}}, + {"trcdvcvr7", TRCDVCVR7, {}}, + {"trcdvcmr0", TRCDVCMR0, {}}, + {"trcdvcmr1", TRCDVCMR1, {}}, + {"trcdvcmr2", TRCDVCMR2, {}}, + {"trcdvcmr3", TRCDVCMR3, {}}, + {"trcdvcmr4", TRCDVCMR4, {}}, + {"trcdvcmr5", TRCDVCMR5, {}}, + {"trcdvcmr6", TRCDVCMR6, {}}, + {"trcdvcmr7", TRCDVCMR7, {}}, + {"trccidcvr0", TRCCIDCVR0, {}}, + {"trccidcvr1", TRCCIDCVR1, {}}, + {"trccidcvr2", TRCCIDCVR2, {}}, + {"trccidcvr3", TRCCIDCVR3, {}}, + {"trccidcvr4", TRCCIDCVR4, {}}, + {"trccidcvr5", TRCCIDCVR5, {}}, + {"trccidcvr6", TRCCIDCVR6, {}}, + {"trccidcvr7", TRCCIDCVR7, {}}, + {"trcvmidcvr0", TRCVMIDCVR0, {}}, + {"trcvmidcvr1", TRCVMIDCVR1, {}}, + {"trcvmidcvr2", TRCVMIDCVR2, {}}, + {"trcvmidcvr3", TRCVMIDCVR3, {}}, + {"trcvmidcvr4", TRCVMIDCVR4, {}}, + {"trcvmidcvr5", TRCVMIDCVR5, {}}, + {"trcvmidcvr6", TRCVMIDCVR6, {}}, + {"trcvmidcvr7", TRCVMIDCVR7, {}}, + {"trccidcctlr0", TRCCIDCCTLR0, {}}, + {"trccidcctlr1", TRCCIDCCTLR1, {}}, + {"trcvmidcctlr0", TRCVMIDCCTLR0, {}}, + {"trcvmidcctlr1", TRCVMIDCCTLR1, {}}, + {"trcitctrl", TRCITCTRL, {}}, + {"trcclaimset", TRCCLAIMSET, {}}, + {"trcclaimclr", TRCCLAIMCLR, {}}, // GICv3 registers - {"icc_bpr1_el1", ICC_BPR1_EL1}, - {"icc_bpr0_el1", ICC_BPR0_EL1}, - {"icc_pmr_el1", ICC_PMR_EL1}, - {"icc_ctlr_el1", ICC_CTLR_EL1}, - {"icc_ctlr_el3", ICC_CTLR_EL3}, - {"icc_sre_el1", ICC_SRE_EL1}, - {"icc_sre_el2", ICC_SRE_EL2}, - {"icc_sre_el3", ICC_SRE_EL3}, - {"icc_igrpen0_el1", ICC_IGRPEN0_EL1}, - {"icc_igrpen1_el1", ICC_IGRPEN1_EL1}, - {"icc_igrpen1_el3", ICC_IGRPEN1_EL3}, - {"icc_seien_el1", ICC_SEIEN_EL1}, - {"icc_ap0r0_el1", ICC_AP0R0_EL1}, - {"icc_ap0r1_el1", ICC_AP0R1_EL1}, - {"icc_ap0r2_el1", ICC_AP0R2_EL1}, - {"icc_ap0r3_el1", ICC_AP0R3_EL1}, - {"icc_ap1r0_el1", ICC_AP1R0_EL1}, - {"icc_ap1r1_el1", ICC_AP1R1_EL1}, - {"icc_ap1r2_el1", ICC_AP1R2_EL1}, - {"icc_ap1r3_el1", ICC_AP1R3_EL1}, - {"ich_ap0r0_el2", ICH_AP0R0_EL2}, - {"ich_ap0r1_el2", ICH_AP0R1_EL2}, - {"ich_ap0r2_el2", ICH_AP0R2_EL2}, - {"ich_ap0r3_el2", ICH_AP0R3_EL2}, - {"ich_ap1r0_el2", ICH_AP1R0_EL2}, - {"ich_ap1r1_el2", ICH_AP1R1_EL2}, - {"ich_ap1r2_el2", ICH_AP1R2_EL2}, - {"ich_ap1r3_el2", ICH_AP1R3_EL2}, - {"ich_hcr_el2", ICH_HCR_EL2}, - {"ich_misr_el2", ICH_MISR_EL2}, - {"ich_vmcr_el2", ICH_VMCR_EL2}, - {"ich_vseir_el2", ICH_VSEIR_EL2}, - {"ich_lr0_el2", ICH_LR0_EL2}, - {"ich_lr1_el2", ICH_LR1_EL2}, - {"ich_lr2_el2", ICH_LR2_EL2}, - {"ich_lr3_el2", ICH_LR3_EL2}, - {"ich_lr4_el2", ICH_LR4_EL2}, - {"ich_lr5_el2", ICH_LR5_EL2}, - {"ich_lr6_el2", ICH_LR6_EL2}, - {"ich_lr7_el2", ICH_LR7_EL2}, - {"ich_lr8_el2", ICH_LR8_EL2}, - {"ich_lr9_el2", ICH_LR9_EL2}, - {"ich_lr10_el2", ICH_LR10_EL2}, - {"ich_lr11_el2", ICH_LR11_EL2}, - {"ich_lr12_el2", ICH_LR12_EL2}, - {"ich_lr13_el2", ICH_LR13_EL2}, - {"ich_lr14_el2", ICH_LR14_EL2}, - {"ich_lr15_el2", ICH_LR15_EL2} -}; - -const AArch64NamedImmMapper::Mapping -AArch64SysReg::SysRegMapper::CycloneSysRegPairs[] = { - {"cpm_ioacc_ctl_el3", CPM_IOACC_CTL_EL3} + {"icc_bpr1_el1", ICC_BPR1_EL1, {}}, + {"icc_bpr0_el1", ICC_BPR0_EL1, {}}, + {"icc_pmr_el1", ICC_PMR_EL1, {}}, + {"icc_ctlr_el1", ICC_CTLR_EL1, {}}, + {"icc_ctlr_el3", ICC_CTLR_EL3, {}}, + {"icc_sre_el1", ICC_SRE_EL1, {}}, + {"icc_sre_el2", ICC_SRE_EL2, {}}, + {"icc_sre_el3", ICC_SRE_EL3, {}}, + {"icc_igrpen0_el1", ICC_IGRPEN0_EL1, {}}, + {"icc_igrpen1_el1", ICC_IGRPEN1_EL1, {}}, + {"icc_igrpen1_el3", ICC_IGRPEN1_EL3, {}}, + {"icc_seien_el1", ICC_SEIEN_EL1, {}}, + {"icc_ap0r0_el1", ICC_AP0R0_EL1, {}}, + {"icc_ap0r1_el1", ICC_AP0R1_EL1, {}}, + {"icc_ap0r2_el1", ICC_AP0R2_EL1, {}}, + {"icc_ap0r3_el1", ICC_AP0R3_EL1, {}}, + {"icc_ap1r0_el1", ICC_AP1R0_EL1, {}}, + {"icc_ap1r1_el1", ICC_AP1R1_EL1, {}}, + {"icc_ap1r2_el1", ICC_AP1R2_EL1, {}}, + {"icc_ap1r3_el1", ICC_AP1R3_EL1, {}}, + {"ich_ap0r0_el2", ICH_AP0R0_EL2, {}}, + {"ich_ap0r1_el2", ICH_AP0R1_EL2, {}}, + {"ich_ap0r2_el2", ICH_AP0R2_EL2, {}}, + {"ich_ap0r3_el2", ICH_AP0R3_EL2, {}}, + {"ich_ap1r0_el2", ICH_AP1R0_EL2, {}}, + {"ich_ap1r1_el2", ICH_AP1R1_EL2, {}}, + {"ich_ap1r2_el2", ICH_AP1R2_EL2, {}}, + {"ich_ap1r3_el2", ICH_AP1R3_EL2, {}}, + {"ich_hcr_el2", ICH_HCR_EL2, {}}, + {"ich_misr_el2", ICH_MISR_EL2, {}}, + {"ich_vmcr_el2", ICH_VMCR_EL2, {}}, + {"ich_vseir_el2", ICH_VSEIR_EL2, {}}, + {"ich_lr0_el2", ICH_LR0_EL2, {}}, + {"ich_lr1_el2", ICH_LR1_EL2, {}}, + {"ich_lr2_el2", ICH_LR2_EL2, {}}, + {"ich_lr3_el2", ICH_LR3_EL2, {}}, + {"ich_lr4_el2", ICH_LR4_EL2, {}}, + {"ich_lr5_el2", ICH_LR5_EL2, {}}, + {"ich_lr6_el2", ICH_LR6_EL2, {}}, + {"ich_lr7_el2", ICH_LR7_EL2, {}}, + {"ich_lr8_el2", ICH_LR8_EL2, {}}, + {"ich_lr9_el2", ICH_LR9_EL2, {}}, + {"ich_lr10_el2", ICH_LR10_EL2, {}}, + {"ich_lr11_el2", ICH_LR11_EL2, {}}, + {"ich_lr12_el2", ICH_LR12_EL2, {}}, + {"ich_lr13_el2", ICH_LR13_EL2, {}}, + {"ich_lr14_el2", ICH_LR14_EL2, {}}, + {"ich_lr15_el2", ICH_LR15_EL2, {}}, + + // Cyclone registers + {"cpm_ioacc_ctl_el3", CPM_IOACC_CTL_EL3, {AArch64::ProcCyclone}}, + + // v8.1a "Privileged Access Never" extension-specific system registers + {"pan", PAN, {AArch64::HasV8_1aOps}}, + + // v8.1a "Limited Ordering Regions" extension-specific system registers + {"lorsa_el1", LORSA_EL1, {AArch64::HasV8_1aOps}}, + {"lorea_el1", LOREA_EL1, {AArch64::HasV8_1aOps}}, + {"lorn_el1", LORN_EL1, {AArch64::HasV8_1aOps}}, + {"lorc_el1", LORC_EL1, {AArch64::HasV8_1aOps}}, + + // v8.1a "Virtualization host extensions" system registers + {"ttbr1_el2", TTBR1_EL2, {AArch64::HasV8_1aOps}}, + {"contextidr_el2", CONTEXTIDR_EL2, {AArch64::HasV8_1aOps}}, + {"cnthv_tval_el2", CNTHV_TVAL_EL2, {AArch64::HasV8_1aOps}}, + {"cnthv_cval_el2", CNTHV_CVAL_EL2, {AArch64::HasV8_1aOps}}, + {"cnthv_ctl_el2", CNTHV_CTL_EL2, {AArch64::HasV8_1aOps}}, + {"sctlr_el12", SCTLR_EL12, {AArch64::HasV8_1aOps}}, + {"cpacr_el12", CPACR_EL12, {AArch64::HasV8_1aOps}}, + {"ttbr0_el12", TTBR0_EL12, {AArch64::HasV8_1aOps}}, + {"ttbr1_el12", TTBR1_EL12, {AArch64::HasV8_1aOps}}, + {"tcr_el12", TCR_EL12, {AArch64::HasV8_1aOps}}, + {"afsr0_el12", AFSR0_EL12, {AArch64::HasV8_1aOps}}, + {"afsr1_el12", AFSR1_EL12, {AArch64::HasV8_1aOps}}, + {"esr_el12", ESR_EL12, {AArch64::HasV8_1aOps}}, + {"far_el12", FAR_EL12, {AArch64::HasV8_1aOps}}, + {"mair_el12", MAIR_EL12, {AArch64::HasV8_1aOps}}, + {"amair_el12", AMAIR_EL12, {AArch64::HasV8_1aOps}}, + {"vbar_el12", VBAR_EL12, {AArch64::HasV8_1aOps}}, + {"contextidr_el12", CONTEXTIDR_EL12, {AArch64::HasV8_1aOps}}, + {"cntkctl_el12", CNTKCTL_EL12, {AArch64::HasV8_1aOps}}, + {"cntp_tval_el02", CNTP_TVAL_EL02, {AArch64::HasV8_1aOps}}, + {"cntp_ctl_el02", CNTP_CTL_EL02, {AArch64::HasV8_1aOps}}, + {"cntp_cval_el02", CNTP_CVAL_EL02, {AArch64::HasV8_1aOps}}, + {"cntv_tval_el02", CNTV_TVAL_EL02, {AArch64::HasV8_1aOps}}, + {"cntv_ctl_el02", CNTV_CTL_EL02, {AArch64::HasV8_1aOps}}, + {"cntv_cval_el02", CNTV_CVAL_EL02, {AArch64::HasV8_1aOps}}, + {"spsr_el12", SPSR_EL12, {AArch64::HasV8_1aOps}}, + {"elr_el12", ELR_EL12, {AArch64::HasV8_1aOps}}, }; uint32_t -AArch64SysReg::SysRegMapper::fromString(StringRef Name, bool &Valid) const { +AArch64SysReg::SysRegMapper::fromString(StringRef Name, + const FeatureBitset& FeatureBits, bool &Valid) const { std::string NameLower = Name.lower(); // First search the registers shared by all - for (unsigned i = 0; i < array_lengthof(SysRegPairs); ++i) { - if (SysRegPairs[i].Name == NameLower) { + for (unsigned i = 0; i < array_lengthof(SysRegMappings); ++i) { + if (SysRegMappings[i].isNameEqual(NameLower, FeatureBits)) { Valid = true; - return SysRegPairs[i].Value; - } - } - - // Next search for target specific registers - if (FeatureBits & AArch64::ProcCyclone) { - for (unsigned i = 0; i < array_lengthof(CycloneSysRegPairs); ++i) { - if (CycloneSysRegPairs[i].Name == NameLower) { - Valid = true; - return CycloneSysRegPairs[i].Value; - } + return SysRegMappings[i].Value; } } // Now try the instruction-specific registers (either read-only or // write-only). - for (unsigned i = 0; i < NumInstPairs; ++i) { - if (InstPairs[i].Name == NameLower) { + for (unsigned i = 0; i < NumInstMappings; ++i) { + if (InstMappings[i].isNameEqual(NameLower, FeatureBits)) { Valid = true; - return InstPairs[i].Value; + return InstMappings[i].Value; } } @@ -814,28 +850,20 @@ AArch64SysReg::SysRegMapper::fromString(StringRef Name, bool &Valid) const { } std::string -AArch64SysReg::SysRegMapper::toString(uint32_t Bits) const { +AArch64SysReg::SysRegMapper::toString(uint32_t Bits, + const FeatureBitset& FeatureBits) const { // First search the registers shared by all - for (unsigned i = 0; i < array_lengthof(SysRegPairs); ++i) { - if (SysRegPairs[i].Value == Bits) { - return SysRegPairs[i].Name; - } - } - - // Next search for target specific registers - if (FeatureBits & AArch64::ProcCyclone) { - for (unsigned i = 0; i < array_lengthof(CycloneSysRegPairs); ++i) { - if (CycloneSysRegPairs[i].Value == Bits) { - return CycloneSysRegPairs[i].Name; - } + for (unsigned i = 0; i < array_lengthof(SysRegMappings); ++i) { + if (SysRegMappings[i].isValueEqual(Bits, FeatureBits)) { + return SysRegMappings[i].Name; } } // Now try the instruction-specific registers (either read-only or // write-only). - for (unsigned i = 0; i < NumInstPairs; ++i) { - if (InstPairs[i].Value == Bits) { - return InstPairs[i].Name; + for (unsigned i = 0; i < NumInstMappings; ++i) { + if (InstMappings[i].isValueEqual(Bits, FeatureBits)) { + return InstMappings[i].Name; } } @@ -850,40 +878,40 @@ AArch64SysReg::SysRegMapper::toString(uint32_t Bits) const { + "_c" + utostr(CRm) + "_" + utostr(Op2); } -const AArch64NamedImmMapper::Mapping AArch64TLBI::TLBIMapper::TLBIPairs[] = { - {"ipas2e1is", IPAS2E1IS}, - {"ipas2le1is", IPAS2LE1IS}, - {"vmalle1is", VMALLE1IS}, - {"alle2is", ALLE2IS}, - {"alle3is", ALLE3IS}, - {"vae1is", VAE1IS}, - {"vae2is", VAE2IS}, - {"vae3is", VAE3IS}, - {"aside1is", ASIDE1IS}, - {"vaae1is", VAAE1IS}, - {"alle1is", ALLE1IS}, - {"vale1is", VALE1IS}, - {"vale2is", VALE2IS}, - {"vale3is", VALE3IS}, - {"vmalls12e1is", VMALLS12E1IS}, - {"vaale1is", VAALE1IS}, - {"ipas2e1", IPAS2E1}, - {"ipas2le1", IPAS2LE1}, - {"vmalle1", VMALLE1}, - {"alle2", ALLE2}, - {"alle3", ALLE3}, - {"vae1", VAE1}, - {"vae2", VAE2}, - {"vae3", VAE3}, - {"aside1", ASIDE1}, - {"vaae1", VAAE1}, - {"alle1", ALLE1}, - {"vale1", VALE1}, - {"vale2", VALE2}, - {"vale3", VALE3}, - {"vmalls12e1", VMALLS12E1}, - {"vaale1", VAALE1} +const AArch64NamedImmMapper::Mapping AArch64TLBI::TLBIMapper::TLBIMappings[] = { + {"ipas2e1is", IPAS2E1IS, {}}, + {"ipas2le1is", IPAS2LE1IS, {}}, + {"vmalle1is", VMALLE1IS, {}}, + {"alle2is", ALLE2IS, {}}, + {"alle3is", ALLE3IS, {}}, + {"vae1is", VAE1IS, {}}, + {"vae2is", VAE2IS, {}}, + {"vae3is", VAE3IS, {}}, + {"aside1is", ASIDE1IS, {}}, + {"vaae1is", VAAE1IS, {}}, + {"alle1is", ALLE1IS, {}}, + {"vale1is", VALE1IS, {}}, + {"vale2is", VALE2IS, {}}, + {"vale3is", VALE3IS, {}}, + {"vmalls12e1is", VMALLS12E1IS, {}}, + {"vaale1is", VAALE1IS, {}}, + {"ipas2e1", IPAS2E1, {}}, + {"ipas2le1", IPAS2LE1, {}}, + {"vmalle1", VMALLE1, {}}, + {"alle2", ALLE2, {}}, + {"alle3", ALLE3, {}}, + {"vae1", VAE1, {}}, + {"vae2", VAE2, {}}, + {"vae3", VAE3, {}}, + {"aside1", ASIDE1, {}}, + {"vaae1", VAAE1, {}}, + {"alle1", ALLE1, {}}, + {"vale1", VALE1, {}}, + {"vale2", VALE2, {}}, + {"vale3", VALE3, {}}, + {"vmalls12e1", VMALLS12E1, {}}, + {"vaale1", VAALE1, {}} }; AArch64TLBI::TLBIMapper::TLBIMapper() - : AArch64NamedImmMapper(TLBIPairs, 0) {} + : AArch64NamedImmMapper(TLBIMappings, 0) {} diff --git a/lib/Target/AArch64/Utils/AArch64BaseInfo.h b/lib/Target/AArch64/Utils/AArch64BaseInfo.h index 6d0337c..7125f14 100644 --- a/lib/Target/AArch64/Utils/AArch64BaseInfo.h +++ b/lib/Target/AArch64/Utils/AArch64BaseInfo.h @@ -22,6 +22,7 @@ #include "MCTargetDesc/AArch64MCTargetDesc.h" // For AArch64::X0 and friends. #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringSwitch.h" +#include "llvm/MC/SubtargetFeature.h" #include "llvm/Support/ErrorHandling.h" namespace llvm { @@ -280,22 +281,45 @@ struct AArch64NamedImmMapper { struct Mapping { const char *Name; uint32_t Value; + // Set of features this mapping is available for + // Zero value of FeatureBitSet means the mapping is always available + FeatureBitset FeatureBitSet; + + bool isNameEqual(std::string Other, + const FeatureBitset& FeatureBits) const { + if (FeatureBitSet.any() && + (FeatureBitSet & FeatureBits).none()) + return false; + return Name == Other; + } + + bool isValueEqual(uint32_t Other, + const FeatureBitset& FeatureBits) const { + if (FeatureBitSet.any() && + (FeatureBitSet & FeatureBits).none()) + return false; + return Value == Other; + } }; template<int N> - AArch64NamedImmMapper(const Mapping (&Pairs)[N], uint32_t TooBigImm) - : Pairs(&Pairs[0]), NumPairs(N), TooBigImm(TooBigImm) {} + AArch64NamedImmMapper(const Mapping (&Mappings)[N], uint32_t TooBigImm) + : Mappings(&Mappings[0]), NumMappings(N), TooBigImm(TooBigImm) {} - StringRef toString(uint32_t Value, bool &Valid) const; - uint32_t fromString(StringRef Name, bool &Valid) const; + // Maps value to string, depending on availability for FeatureBits given + StringRef toString(uint32_t Value, const FeatureBitset& FeatureBits, + bool &Valid) const; + // Maps string to value, depending on availability for FeatureBits given + uint32_t fromString(StringRef Name, const FeatureBitset& FeatureBits, + bool &Valid) const; /// Many of the instructions allow an alternative assembly form consisting of /// a simple immediate. Currently the only valid forms are ranges [0, N) where /// N being 0 indicates no immediate syntax-form is allowed. bool validImm(uint32_t Value) const; protected: - const Mapping *Pairs; - size_t NumPairs; + const Mapping *Mappings; + size_t NumMappings; uint32_t TooBigImm; }; @@ -317,7 +341,7 @@ namespace AArch64AT { }; struct ATMapper : AArch64NamedImmMapper { - const static Mapping ATPairs[]; + const static Mapping ATMappings[]; ATMapper(); }; @@ -341,7 +365,7 @@ namespace AArch64DB { }; struct DBarrierMapper : AArch64NamedImmMapper { - const static Mapping DBarrierPairs[]; + const static Mapping DBarrierMappings[]; DBarrierMapper(); }; @@ -361,7 +385,7 @@ namespace AArch64DC { }; struct DCMapper : AArch64NamedImmMapper { - const static Mapping DCPairs[]; + const static Mapping DCMappings[]; DCMapper(); }; @@ -378,7 +402,7 @@ namespace AArch64IC { struct ICMapper : AArch64NamedImmMapper { - const static Mapping ICPairs[]; + const static Mapping ICMappings[]; ICMapper(); }; @@ -394,7 +418,7 @@ namespace AArch64ISB { SY = 0xf }; struct ISBMapper : AArch64NamedImmMapper { - const static Mapping ISBPairs[]; + const static Mapping ISBMappings[]; ISBMapper(); }; @@ -424,7 +448,7 @@ namespace AArch64PRFM { }; struct PRFMMapper : AArch64NamedImmMapper { - const static Mapping PRFMPairs[]; + const static Mapping PRFMMappings[]; PRFMMapper(); }; @@ -435,11 +459,14 @@ namespace AArch64PState { Invalid = -1, SPSel = 0x05, DAIFSet = 0x1e, - DAIFClr = 0x1f + DAIFClr = 0x1f, + + // v8.1a "Privileged Access Never" extension-specific PStates + PAN = 0x04, }; struct PStateMapper : AArch64NamedImmMapper { - const static Mapping PStatePairs[]; + const static Mapping PStateMappings[]; PStateMapper(); }; @@ -1122,11 +1149,48 @@ namespace AArch64SysReg { ICH_LR13_EL2 = 0xe66d, // 11 100 1100 1101 101 ICH_LR14_EL2 = 0xe66e, // 11 100 1100 1101 110 ICH_LR15_EL2 = 0xe66f, // 11 100 1100 1101 111 - }; - // Cyclone specific system registers - enum CycloneSysRegValues { - CPM_IOACC_CTL_EL3 = 0xff90 + // v8.1a "Privileged Access Never" extension-specific system registers + PAN = 0xc213, // 11 000 0100 0010 011 + + // v8.1a "Limited Ordering Regions" extension-specific system registers + LORSA_EL1 = 0xc520, // 11 000 1010 0100 000 + LOREA_EL1 = 0xc521, // 11 000 1010 0100 001 + LORN_EL1 = 0xc522, // 11 000 1010 0100 010 + LORC_EL1 = 0xc523, // 11 000 1010 0100 011 + LORID_EL1 = 0xc527, // 11 000 1010 0100 111 + + // v8.1a "Virtualization host extensions" system registers + TTBR1_EL2 = 0xe101, // 11 100 0010 0000 001 + CONTEXTIDR_EL2 = 0xe681, // 11 100 1101 0000 001 + CNTHV_TVAL_EL2 = 0xe718, // 11 100 1110 0011 000 + CNTHV_CVAL_EL2 = 0xe71a, // 11 100 1110 0011 010 + CNTHV_CTL_EL2 = 0xe719, // 11 100 1110 0011 001 + SCTLR_EL12 = 0xe880, // 11 101 0001 0000 000 + CPACR_EL12 = 0xe882, // 11 101 0001 0000 010 + TTBR0_EL12 = 0xe900, // 11 101 0010 0000 000 + TTBR1_EL12 = 0xe901, // 11 101 0010 0000 001 + TCR_EL12 = 0xe902, // 11 101 0010 0000 010 + AFSR0_EL12 = 0xea88, // 11 101 0101 0001 000 + AFSR1_EL12 = 0xea89, // 11 101 0101 0001 001 + ESR_EL12 = 0xea90, // 11 101 0101 0010 000 + FAR_EL12 = 0xeb00, // 11 101 0110 0000 000 + MAIR_EL12 = 0xed10, // 11 101 1010 0010 000 + AMAIR_EL12 = 0xed18, // 11 101 1010 0011 000 + VBAR_EL12 = 0xee00, // 11 101 1100 0000 000 + CONTEXTIDR_EL12 = 0xee81, // 11 101 1101 0000 001 + CNTKCTL_EL12 = 0xef08, // 11 101 1110 0001 000 + CNTP_TVAL_EL02 = 0xef10, // 11 101 1110 0010 000 + CNTP_CTL_EL02 = 0xef11, // 11 101 1110 0010 001 + CNTP_CVAL_EL02 = 0xef12, // 11 101 1110 0010 010 + CNTV_TVAL_EL02 = 0xef18, // 11 101 1110 0011 000 + CNTV_CTL_EL02 = 0xef19, // 11 101 1110 0011 001 + CNTV_CVAL_EL02 = 0xef1a, // 11 101 1110 0011 010 + SPSR_EL12 = 0xea00, // 11 101 0100 0000 000 + ELR_EL12 = 0xea01, // 11 101 0100 0000 001 + + // Cyclone specific system registers + CPM_IOACC_CTL_EL3 = 0xff90, }; // Note that these do not inherit from AArch64NamedImmMapper. This class is @@ -1134,26 +1198,25 @@ namespace AArch64SysReg { // burdening the common AArch64NamedImmMapper with abstractions only needed in // this one case. struct SysRegMapper { - static const AArch64NamedImmMapper::Mapping SysRegPairs[]; - static const AArch64NamedImmMapper::Mapping CycloneSysRegPairs[]; + static const AArch64NamedImmMapper::Mapping SysRegMappings[]; - const AArch64NamedImmMapper::Mapping *InstPairs; - size_t NumInstPairs; - uint64_t FeatureBits; + const AArch64NamedImmMapper::Mapping *InstMappings; + size_t NumInstMappings; - SysRegMapper(uint64_t FeatureBits) : FeatureBits(FeatureBits) { } - uint32_t fromString(StringRef Name, bool &Valid) const; - std::string toString(uint32_t Bits) const; + SysRegMapper() { } + uint32_t fromString(StringRef Name, const FeatureBitset& FeatureBits, + bool &Valid) const; + std::string toString(uint32_t Bits, const FeatureBitset& FeatureBits) const; }; struct MSRMapper : SysRegMapper { - static const AArch64NamedImmMapper::Mapping MSRPairs[]; - MSRMapper(uint64_t FeatureBits); + static const AArch64NamedImmMapper::Mapping MSRMappings[]; + MSRMapper(); }; struct MRSMapper : SysRegMapper { - static const AArch64NamedImmMapper::Mapping MRSPairs[]; - MRSMapper(uint64_t FeatureBits); + static const AArch64NamedImmMapper::Mapping MRSMappings[]; + MRSMapper(); }; uint32_t ParseGenericRegister(StringRef Name, bool &Valid); @@ -1197,7 +1260,7 @@ namespace AArch64TLBI { }; struct TLBIMapper : AArch64NamedImmMapper { - const static Mapping TLBIPairs[]; + const static Mapping TLBIMappings[]; TLBIMapper(); }; |